def test_name(): Name = fact( 'Name', ['first', 'last'] ) gnc = gnc_relation() FIRST = gram('Name').interpretation( Name.first.inflected() ).match(gnc) LAST = gram('Surn').interpretation( Name.last.inflected() ).match(gnc) NAME = rule( FIRST, LAST ).interpretation(Name) parser = Parser(NAME) match = parser.match('саше иванову') assert match.fact == Name(first='саша', last='иванов') match = parser.match('сашу иванову') assert match.fact == Name(first='саша', last='иванова') match = parser.match('сашу ивановой') assert not match
def test_person(): Name = fact( 'Name', ['first', 'last'], ) Person = fact('Person', ['position', 'name']) LAST = and_( gram('Surn'), not_(gram('Abbr')), ) FIRST = and_( gram('Name'), not_(gram('Abbr')), ) POSITION = morph_pipeline(['управляющий директор', 'вице-мэр']) gnc = gnc_relation() NAME = rule( FIRST.interpretation(Name.first).match(gnc), LAST.interpretation(Name.last).match(gnc)).interpretation(Name) PERSON = rule( POSITION.interpretation(Person.position).match(gnc), NAME.interpretation(Person.name)).interpretation(Person) parser = Parser(PERSON) match = parser.match('управляющий директор Иван Ульянов') assert match assert match.fact == Person(position='управляющий директор', name=Name(first='Иван', last='Ульянов'))
def test_bnf(): from yargy.interpretation import fact from yargy.relations import gnc_relation F = fact('F', ['a']) gnc = gnc_relation() assert_bnf(rule('a').named('A').interpretation(F), "F -> 'a'") assert_bnf( rule('a').interpretation(F.a).interpretation(F), 'F -> F.a', "F.a -> 'a'") assert_bnf(rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'") assert_bnf( rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a | F.a R0', "F.a -> 'a'") assert_bnf( rule('a').repeatable().interpretation(F.a), 'F.a -> R1', "R1 -> 'a' | 'a' R1")
def test_bnf(): from yargy.interpretation import fact from yargy.relations import gnc_relation F = fact('F', ['a']) gnc = gnc_relation() assert_bnf( rule('a').named('A').interpretation(F), "F -> 'a'" ) assert_bnf( rule('a').interpretation(F.a).interpretation(F), 'F -> F.a', "F.a -> 'a'" ) assert_bnf( rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'" ) assert_bnf( rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a R0 | F.a', "F.a -> 'a'" ) assert_bnf( rule('a').repeatable().interpretation(F.a), 'F.a -> R1', "R1 -> 'a' R1 | 'a'" ) A = rule('a') B = A.named('B') C = A.named('C') D = rule(B, C).named('D') assert_bnf( D, 'D -> B C', 'B -> R0', 'C -> R0', "R0 -> 'a'" )
def test_person(): Name = fact( 'Name', ['first', 'last'], ) Person = fact('Person', ['position', 'name']) LAST = and_( gram('Surn'), not_(gram('Abbr')), ) FIRST = and_( gram('Name'), not_(gram('Abbr')), ) class PositionPipeline(MorphPipeline): grammemes = {'Position'} keys = ['управляющий директор', 'вице-мэр'] POSITION = gram('Position') gnc = gnc_relation() NAME = rule( FIRST.match(gnc).interpretation(Name.first), LAST.match(gnc).interpretation(Name.last)).interpretation(Name) PERSON = rule(POSITION.interpretation(Person.position), NAME.interpretation(Person.name)).interpretation(Person) parser = Parser(PERSON, pipelines=[PositionPipeline()]) matches = list(parser.match('управляющий директор Иван Ульянов')) assert len(matches) == 1 assert matches[0].fact == Person(position='управляющий директор', name=Name(first='Иван', last='Ульянов'))
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, and_, or_, not_, fact) from yargy.predicates import (caseless, normalized, eq, length_eq, gram, dictionary, is_single, is_title) from yargy.relations import gnc_relation Location = fact( 'Location', ['name'], ) gnc = gnc_relation() REGION = rule( gram('ADJF').match(gnc), dictionary({ 'край', 'район', 'область', 'губерния', 'уезд', }).match(gnc), ).interpretation(Location.name.inflected()) gnc1 = gnc_relation() gnc2 = gnc_relation()
tagger = CrfTagger(NAME_MODEL, get_name_features) super(nameExtractor, self).__init__(NAME, tagger=tagger) ### ### Dictionaries IN_FIRST = dictionary(set(load_dict('first.txt'))) IN_MAYBE_FIRST = dictionary(set(load_dict('maybe_first.txt'))) IN_LAST = dictionary(set(load_dict('last.txt'))) ### Name = fact('Name', ['first', 'middle', 'last', 'nick']) ################# UNMODIFIED NATASHA ################# gnc = gnc_relation() ######## # # FIRST # ######## TITLE = is_capitalized() NOUN = gram('NOUN') NAME_CRF = tag('I') ABBR = gram('Abbr') SURN = gram('Surn') NAME = and_(gram('Name'), not_(ABBR))
from yargy.predicates import ( caseless, normalized, eq, length_eq, gram, dictionary, is_single, is_title ) from yargy.relations import gnc_relation Location = fact( 'Location', ['name'], ) gnc = gnc_relation() REGION = rule( gram('ADJF').match(gnc), dictionary({ 'край', 'район', 'область', 'губерния', 'уезд', }).match(gnc), ).interpretation(Location.name.inflected()) gnc = gnc_relation() FEDERAL_DISTRICT = rule(