예제 #1
0
def next_any_word_except(banned) -> Parse:
    """parser constructor that matches any next word except banned.
    Matching on banned words is up to synonym."""
    bansyn = [synonymize(lexer.singularize(b)) for b in banned]
    def p(tok):
        return not(tok.value in bansyn)
    return next_any_word().if_test(p)
예제 #2
0
def next_word(s:str) -> Parse: #was_next_word_syn
    """parser constructor that matches next word s, up to synonym"""
    #if len(s) < MIN_LEN_SYNONYM:
    #    return next_word_exact(s)
    syn = synonymize(lexer.singularize(s))
    #def p(tok):
    #    return tok.type == 'WORD' and synw(tok)==syn
    return next_any_word().if_value(syn).expect(s)
예제 #3
0
def synonym_add(ts):
    """add synonym list to dictionary"""
    #XX Debug: should check that at most one variant in ts is defined anywhere.
    for s in ts:
        if len(s.split(' '))> 1:
            return msg.error(f'synonym entries must be single words:{s}')
        if lexer.singularize(s) in synonym:
            return msg.error(f'synonym already declared: {s}')
        # len restriction prevents VAR from being added to dict.
        if len(s) < MIN_LEN_SYNONYM:
            return msg.error(f'synonyms must have at least {MIN_LEN_SYNONYM} chars: {s}')
        if not(s.isalpha()):
            return msg.error(f'synonyms must be words: {s}')
    ls = [lexer.singularize(s) for s in ts]
    ls.sort()
    js = ' '.join(ls)
    for s in ls:
        synonym[s] = js
예제 #4
0
def next_word(s:str) -> Parse: #was_next_word_syn
    """parser constructor that matches next word s, 
    up to synonym, singularization, and case.
    
    >>> pstream(next_word('trial'),'Trials x')
    LexToken(WORD,'trial',1,0)
    """
    syn = synonymize(lexer.singularize(s.lower()))
    def p(tok):
        return syn == synonymize(tok.value.lower())
    return next_any_word().if_test(p).name(s).setsample(sample.if_value(s))
예제 #5
0
def synonym_add(ts):
    """add synonym list to dictionary.
    All the words in the list are singularized, then made synonymous.
    The canonical form of the group of synonyms is created."""
    #XX Debug: should check that at most one variant in ts is defined anywhere.
    for s in ts:
        if len(s.split(' '))> 1:
            raise DataProcess(f'synonym entries must be single words:{s}')
        if lexer.singularize(s) in synonym:
            raise DataProcess(f'synonym already declared: {s}')
        # len restriction prevents VAR from being added to dict.
        if len(s) < MIN_LEN_SYNONYM:
            raise DataProcess(f'synonyms must have at least {MIN_LEN_SYNONYM} chars: {s}')
        if not(s.isalpha()):
            raise DataProcess(f'synonyms must be words: {s}')
    #make the canonical_form
    ls = [lexer.singularize(s) for s in ts]
    ls.sort()
    canonical_form = ' '.join(ls)
    #record the canonical_form as the key
    for s in ls:
        synonym[s] = canonical_form
예제 #6
0
def next_any_word_except(banned) -> Parse:
    """parser constructor that matches any next WORD token except banned.
    Matching is up to synonym, singularization, and case.
    
    >>> try:
    ...     pstream(next_any_word_except(['trial']),'Trials x')
    ... except ParseError:
    ...     print('exception')
    exception
    
    >>> pstream(next_any_word_except(['trail']),'Trials x')
    LexToken(WORD,'trial',1,0)
    """
    bansyn = [synonymize(lexer.singularize(b.lower())) for b in banned]
    def p(tok):
        return not(synonymize(tok.value.lower()) in bansyn)
    return Parse.next_token().if_types(['WORD']).if_test(p).setsample(sample.if_types(['WORD']))
예제 #7
0
def test_singular():
    for key in singular:
        assert_true('singular.' + key, lexer.singularize(key) == singular[key])
예제 #8
0
 def not_key(s):
     return not (lexer.singularize(s.lower()) in pattern_key)