def test_person(): Name = fact( 'Name', ['first', 'last'], ) Person = fact( 'Person', ['position', 'name'] ) LAST = and_( gram('Surn'), not_(gram('Abbr')), ) FIRST = and_( gram('Name'), not_(gram('Abbr')), ) POSITION = morph_pipeline([ 'управляющий директор', 'вице-мэр' ]) gnc = gnc_relation() NAME = rule( FIRST.interpretation( Name.first ).match(gnc), LAST.interpretation( Name.last ).match(gnc) ).interpretation( Name ) PERSON = rule( POSITION.interpretation( Person.position ).match(gnc), NAME.interpretation( Person.name ) ).interpretation( Person ) parser = Parser(PERSON) match = parser.match('управляющий директор Иван Ульянов') assert match assert match.fact == Person( position='управляющий директор', name=Name( first='Иван', last='Ульянов' ) )
def test_checks(): tokenizer = MorphTokenizer() context = Context(tokenizer) with pytest.raises(ValueError): gram('UNK').activate(context) with pytest.raises(ValueError): custom(lambda _: True, types='UNK').activate(context)
def test_predicate(): tokenizer = MorphTokenizer() predicate = or_(normalized('московским'), and_(gram('NOUN'), not_(gram('femn')))) context = Context(tokenizer) predicate = predicate.activate(context) tokens = tokenizer('московский зоопарк') values = [predicate(_) for _ in tokens] assert values == [True, True] tokens = tokenizer('московская погода') values = [predicate(_) for _ in tokens] assert values == [True, False]
def test_main(): relation = and_(number_relation(), gender_relation()) A = rule(gram('Surn'), main(gram('Name'))).match(relation) B = gram('VERB').match(relation) AB = rule(A, B) parser = Parser(AB) match = parser.match('иванов иван стал') assert match match = parser.match('иванов иван стали') assert not match match = parser.match('ивановы иван стал') assert match
def test_name(): Name = fact('Name', ['first', 'last']) gnc = gnc_relation() FIRST = gram('Name').interpretation(Name.first.inflected()).match(gnc) LAST = gram('Surn').interpretation(Name.last.inflected()).match(gnc) NAME = rule(FIRST, LAST).interpretation(Name) parser = Parser(NAME) match = parser.match('саше иванову') assert match.fact == Name(first='саша', last='иванов') match = parser.match('сашу иванову') assert match.fact == Name(first='саша', last='иванова') match = parser.match('сашу ивановой') assert not match
def test_activate(): from parser.parser import pipeline from parser.parser import gram from parser.parser import MorphTokenizer from parser.parser import Context tokenizer = MorphTokenizer() context = Context(tokenizer) A = pipeline(['a']).named('A') B = A.activate(context) assert_bnf(B, 'A -> pipeline') A = rule(gram('NOUN')).named('A') B = A.activate(context) assert_bnf(B, "A -> gram('NOUN')")