def test_person(): Name = fact( 'Name', ['first', 'last'], ) Person = fact('Person', ['position', 'name']) LAST = and_( gram('Surn'), not_(gram('Abbr')), ) FIRST = and_( gram('Name'), not_(gram('Abbr')), ) class PositionPipeline(MorphPipeline): grammemes = {'Position'} keys = ['управляющий директор', 'вице-мэр'] POSITION = gram('Position') gnc = gnc_relation() NAME = rule( FIRST.match(gnc).interpretation(Name.first), LAST.match(gnc).interpretation(Name.last)).interpretation(Name) PERSON = rule(POSITION.interpretation(Person.position), NAME.interpretation(Person.name)).interpretation(Person) parser = Parser(PERSON, pipelines=[PositionPipeline()]) matches = list(parser.match('управляющий директор Иван Ульянов')) assert len(matches) == 1 assert matches[0].fact == Person(position='управляющий директор', name=Name(first='Иван', last='Ульянов'))
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, and_, or_, fact) from yargy.predicates import ( eq, gte, lte, dictionary, normalized, ) Date = fact('Date', ['year', 'month', 'day']) MONTHS = { 'январь': 1, 'февраль': 2, 'март': 3, 'апрель': 4, 'май': 5, 'июнь': 6, 'июль': 7, 'август': 8, 'сентябрь': 9, 'октябрь': 10, 'ноябрь': 11, 'декабрь': 12, } MONTH_NAME = dictionary(MONTHS).interpretation(Date.month.normalized())
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, fact, not_, and_, or_, attribute,) from yargy.predicates import (gram, caseless, normalized, is_title, dictionary, custom,) #from yargy.relations import (gnc_relation, case_relation,) ## 1 - FACT INIT DateRelative = fact('DateRelative', ['name']) from natasha.dictionaries.daterelative import DATERELATIVE_DICT ### ### 2 - INIT GRAMS & GRAM RULES (pymorphy2) ADJF = gram('ADJF') NOUN = gram('NOUN') INT = gram('INT') TITLE = is_title() ### ### 1-ST RING RULES R1_SIMPLE = rule( DATERELATIVE_DICT, ).repeatable() ### ### 2-ST RING RULES ###
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, and_, or_, not_, fact) from yargy.predicates import (caseless, normalized, eq, length_eq, gram, dictionary, is_single, is_title) from yargy.relations import gnc_relation Location = fact( 'Location', ['name'], ) gnc = gnc_relation() REGION = rule( gram('ADJF').match(gnc), dictionary({ 'край', 'район', 'область', 'губерния', 'уезд', }).match(gnc), ).interpretation(Location.name.inflected()) gnc1 = gnc_relation() gnc2 = gnc_relation()
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, and_, or_, fact) from yargy.predicates import (eq, in_, gram, normalized, caseless) Money = fact('Money', ['amount', 'currency']) EURO = normalized('евро') DOLLARS = or_(normalized('доллар'), eq('$')) RUBLES = or_(rule(normalized('рубль')), rule(or_(caseless('руб'), caseless('р')), eq('.').optional())) CURRENCY = or_(rule(EURO), rule(DOLLARS), RUBLES).interpretation(Money.currency) INT = gram('INT') AMOUNT_ = or_( rule(INT), rule(INT, INT), rule(INT, INT, INT), rule(INT, '.', INT), rule(INT, '.', INT, '.', INT), ) FRACTION_AMOUN = rule(AMOUNT_, in_({',', '.'}), INT)
# coding: utf-8 from __future__ import unicode_literals from yargy import Parser, rule, fact from yargy.predicates import gram, dictionary Money = fact('Money', ['count', 'base', 'currency']) def test_constant_attribute(): MONEY_RULE = rule( gram('INT').interpretation(Money.count), dictionary({'тысяча'}).interpretation(Money.base.const(10**3)), dictionary({'рубль', 'доллар'}).interpretation(Money.currency), ).interpretation(Money) parser = Parser(MONEY_RULE) matches = list(parser.match('1 тысяча рублей')) assert matches[0].fact == Money(count=1, base=1000, currency='рублей')
normalized, is_capitalized, ) from yargy.relations import ( gnc_relation, case_relation, ) from yargy.pipelines import MorphPipeline from natasha.grammars.name import NAME_ from natasha.grammars.person import PERSON_ Organisation = fact('Organisation', ['name']) class OrganisationTypePipeline(MorphPipeline): grammemes = {'OrganisationType'} keys = [ 'АО', 'ОАО', 'ООО', 'ЗАО', 'ПАО', # TODO Check abbrs # 'ик', # 'нк', # 'хк',
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, fact, not_, and_, or_, attribute,) from yargy.predicates import (gram, caseless, normalized, is_title, dictionary, custom,) #from yargy.relations import (gnc_relation, case_relation,) ## 1 - FACT INIT Enum = fact('Enum', ['name']) from natasha.dictionaries.enum import ENUM_DICT ### ### 2 - INIT GRAMS & GRAM RULES (pymorphy2) ADJF = gram('ADJF') NOUN = gram('NOUN') INT = gram('INT') TITLE = is_title() ### ### 1-ST RING RULES R1_SIMPLE = rule( ENUM_DICT, ).repeatable() ### ### 2-ST RING RULES ###
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, fact, attribute, or_, and_) from yargy.predicates import (eq, lte, gte, gram, length_eq, in_, in_caseless, dictionary, normalized, caseless, is_title) from yargy.pipelines import (MorphPipeline) Address = fact('Address', [attribute('parts').repeatable()]) Index = fact('Index', ['value']) Country = fact('Country', ['name']) Region = fact('Region', ['name', 'type']) Settlement = fact('Settlement', ['name', 'type']) Street = fact('Street', ['name', 'type']) Building = fact('Building', ['number', 'type']) Room = fact('Room', ['number', 'type']) DASH = eq('-') DOT = eq('.') ADJF = gram('ADJF') NOUN = gram('NOUN') INT = gram('INT') TITLE = is_title() ANUM = rule(INT, DASH.optional(), in_caseless({'я', 'й', 'е', 'ое', 'ая', 'ий', 'ой'})) ######### # # STRANA
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, fact, or_) from yargy.predicates import gram from yargy.pipelines import MorphPipeline from .name import (Name, FIRST_LAST, LAST_FIRST, TITLE_FIRST_LAST, TITLE_LAST_FIRST, ABBR_FIRST_LAST, LAST_ABBR_FIRST, ABBR_FIRST_MIDDLE_LAST, LAST_ABBR_FIRST_MIDDLE, TITLE_FIRST_MIDDLE, TITLE_FIRST_MIDDLE_LAST, TITLE_LAST_FIRST_MIDDLE, JUST_FIRST, JUST_LAST) Person = fact('Person', ['position', 'name']) class PositionsPipeline(MorphPipeline): grammemes = {'Position'} keys = [ 'святой', 'патриарх', 'митрополит', 'царь', 'король', 'царица', 'император', 'императрица', 'принц', 'принцесса', 'князь', 'граф',
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, and_, or_, not_, fact) from yargy.predicates import (eq, length_eq, gram, dictionary, is_single, is_capitalized) from yargy.relations import gnc_relation from natasha.data import load_lines Name = fact('Name', ['first', 'last', 'middle', 'nick']) FIRST_DICT = set(load_lines('first.txt')) MAYBE_FIRST_DICT = set(load_lines('maybe_first.txt')) LAST_DICT = set(load_lines('last.txt')) MAYBE_LAST_DICT = set(load_lines('maybe_last.txt')) ########## # # COMPONENTS # ########### IS_FIRST = dictionary(FIRST_DICT) MAYBE_FIRST = or_( and_( gram('Name'), not_(gram('Abbr')) # А. Леонидов ), dictionary(MAYBE_FIRST_DICT))
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, fact, not_, and_, or_, attribute,) from yargy.predicates import (gram, caseless, normalized, is_title, dictionary, custom,) #from yargy.relations import (gnc_relation, case_relation,) ## 1 - FACT INIT Country = fact('Country', ['name']) from natasha.dictionaries.country import COUNTRY_DICT ### ### 2 - INIT GRAMS & GRAM RULES (pymorphy2) ADJF = gram('ADJF') NOUN = gram('NOUN') INT = gram('INT') TITLE = is_title() ### ### 1-ST RING RULES R1_SIMPLE = rule( COUNTRY_DICT, ).repeatable() ### ### 2-ST RING RULES ###