def test_type_errors(): F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( F.a ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( custom(str) ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact
def get_rules(): RU = type('RU') INT = type('INT') NONE = type('NONE') NOUN = gram('NOUN') ADJF = gram('ADJF') NAME = gram('Name') PREP = gram('PREP') #GEO=gram('Geox') GEO = rule( and_( gram('Geox'), not_( or_( eq('артема'), eq('фармана'), eq('оскол'), eq('мунарева'), )))) NAME_OR_NOUN = or_(NAME, NOUN) CITY = morph_pipeline(['город', 'Нижний', 'новгород']) CITY_EXEP = rule(morph_pipeline(['артем', 'фармана', 'оскол'])) CITY_NOT = rule(not_(or_(eq('артем'), eq('фармана'), eq('оскол'), INT))) CITY_PITER = rule(eq('санкт'), eq('-'), eq('петербург')) COMPLICATED_CITY = or_(rule(CITY.optional(), GEO), CITY_PITER) FINAL_CITY = or_(COMPLICATED_CITY) return FINAL_CITY
def test_type_errors(): F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( F.a ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( custom(str) ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact
def test_repeatable(): F = fact('F', [attribute('a').repeatable()]) RULE = rule(eq('a').interpretation(F.a), eq('b').interpretation(F.a)).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a=['a', 'b']) assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': ['a', 'b']}
def test_merge_facts(): F = fact('F', ['a', 'b']) A = rule(eq('a').interpretation(F.a)).interpretation(F) B = rule(eq('b').interpretation(F.b)).interpretation(F) RULE = rule(A, B).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a='a', b='b') assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': 'a', 'b': 'b'}
def get_rules(): RU = type('RU') INT = type('INT') NONE = type('NONE') NOUN = gram('NOUN') ADJF = gram('ADJF') GEO = gram('Geox') PREP = gram('PREP') CONJ = gram('CONJ') NAME = rule(and_(gram('Name'), not_(PREP), not_(GEO))) NOUN_NOT_CONJ = rule(and_(NOUN, not_(CONJ))) STREET_SUFFIXS = morph_pipeline([ 'улица', 'тракт', 'бульвар', 'проспект', 'микрорайон', 'проезд', 'шоссе', 'парк' ]) SPECIAL_STREET_SUFFIXS = morph_pipeline(['шоссе', 'тракт']) SIMPLE_STREETS_FROM_ARRAY = morph_pipeline([ 'краснопресненская', 'республике', 'маршала захарова', 'доватора', 'мичурина', 'зеленые аллеи', 'бехтеева', 'октябрьская', 'новогиреевская', 'югорская', 'артема', 'парковая', 'зеленые аллеи', 'алтуфьевское', 'горького', 'Кавказский', 'хамовнический вал', 'Кусковская', 'марьинский парк', 'московская', 'береговая', 'антонова овсиенко', 'школьная', 'юнтоловский', 'гагарина' ]) EXCEPTIONAL_STREET_CONST = morph_pipeline(['Кавказский']) NOUN_NOT_APPART = rule(not_(or_(eq('дом'), eq('квартира'), INT, CONJ))) COMPLICATED_STREETS = or_( rule(STREET_SUFFIXS, INT, NOUN, NOUN), rule(STREET_SUFFIXS, INT, ADJF, NOUN), rule(STREET_SUFFIXS, NOUN_NOT_CONJ, NOUN_NOT_APPART, NAME.optional()), rule(NAME, NOUN_NOT_APPART), rule(ADJF, NAME), rule(STREET_SUFFIXS, ADJF, NOUN_NOT_APPART), rule(STREET_SUFFIXS, CONJ, NOUN, NOUN)) SIMPLE_STREETS_WITH_STREET_SUFFIX = rule(STREET_SUFFIXS, NOUN_NOT_APPART) SPECIAL_SIMPLE_STREETS_WITH_STREET_SUFFIX = rule(ADJF, SPECIAL_STREET_SUFFIXS) SIMPLE_STREETS = or_(SPECIAL_SIMPLE_STREETS_WITH_STREET_SUFFIX, SIMPLE_STREETS_WITH_STREET_SUFFIX, SIMPLE_STREETS_FROM_ARRAY) FINAL_STREET = or_(COMPLICATED_STREETS, SIMPLE_STREETS) return FINAL_STREET
def test_repeatable(): F = fact('F', [attribute('a').repeatable()]) RULE = rule( eq('a').interpretation(F.a), eq('b').interpretation(F.a) ).interpretation( F ) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a=['a', 'b']) assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': ['a', 'b']}
def req_preposition(preposition: str = None): if preposition == "None": return y.empty() else: return y.or_( y.and_(yp.gram("PREP"), yp.eq(preposition)), y.not_(yp.gram("PREP")) )
def test_merge_facts(): F = fact('F', ['a', 'b']) A = rule( eq('a').interpretation(F.a) ).interpretation(F) B = rule( eq('b').interpretation(F.b) ).interpretation(F) RULE = rule( A, B ).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a='a', b='b') assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': 'a', 'b': 'b'}
def test_predicate_attribute(): F = fact('F', ['a']) RULE = rule(eq('a').interpretation(F.a)).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a='a') assert record.spans == [(0, 1)] assert record.as_json == {'a': 'a'}
def test_insted_attributes(): F = fact('F', ['a', 'b']) RULE = rule(eq('a').interpretation(F.a)).interpretation( F.b).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a=None, b='a') assert record.spans == [(0, 1)] assert record.as_json == {'b': 'a'}
def get_rules(): RU = type('RU') INT = type('INT') NONE = type('NONE') NOUN = gram('NOUN') ADJF = gram('ADJF') CONJ = gram('CONJ') NAME = gram('Name') PREP = gram('PREP') NPRO = gram('NPRO') #GEO=gram('Geox') GEO = rule( and_( gram('Geox'), not_( or_( eq('артема'), eq('фармана'), eq('оскол'), eq('мунарева'), )))) NAME_OR_NOUN = or_(NAME, NOUN) HOUSE = morph_pipeline(['дом', 'корпус', 'квартира', 'строение', 'ст']) CITY_EXEP = rule(morph_pipeline(['артем', 'фармана', 'оскол'])) HOUSE_NOT = rule(and_(not_(ADJF))) HOUSE1 = morph_pipeline(['a', 'а', '/', 'б']) UNIT1 = or_( rule(and_(INT, not_(eq('3'))), HOUSE1.optional(), HOUSE_NOT.optional(), INT.optional())) DOUBLED = rule(RU, RU) UNIT = or_(rule(HOUSE.optional(), UNIT1)) COMPLICATED_HOUSE = rule(UNIT.repeatable()) FINAL_HOUSE = or_(COMPLICATED_HOUSE) return FINAL_HOUSE
def test_nested_facts(): F = fact('F', ['a']) G = fact('G', ['b']) RULE = rule(eq('a').interpretation(F.a)).interpretation(F).interpretation( G.b).interpretation(G) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == G(b=F(a='a')) assert record.spans == [(0, 1)] assert record.as_json == {'b': {'a': 'a'}}
def test_predicate_attribute(): F = fact('F', ['a']) RULE = rule( eq('a').interpretation(F.a) ).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a='a') assert record.spans == [(0, 1)] assert record.as_json == {'a': 'a'}
def get_hyperonyms(main_word): HYPONYM = eq(utils.deaccent(main_word)) RULE = or_(rule(HYPONYM, ATAKJE, START, MID, END), rule(HYPONYM, MID, END), rule(START_S, END, KAK, HYPONYM), rule(END, INCLUDING, HYPONYM)) parser = Parser(RULE) text = utils.deaccent(wikipedia.summary(main_word)) print(text) text = re.sub(r'\(.+?\)', '', text) text = text.lower().replace('* сергии радонежскии* ', '') for idx, match in enumerate(parser.findall(text.lower())): k = [_.value for _ in match.tokens] print(k)
def test_insted_attributes(): F = fact('F', ['a', 'b']) RULE = rule( eq('a').interpretation(F.a) ).interpretation( F.b ).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a=None, b='a') assert record.spans == [(0, 1)] assert record.as_json == {'b': 'a'}
def _abbreviate(word: str, abbrs: List[str], opt=False): abbrs, dashed = partition(lambda abbr: '-' in abbr, abbrs) dashed = map( lambda a: rule(*map(caseless, intersperse('-', a.split('-')))), dashed) original_word = rule(normalized(word)) dashed_sequence = rule(or_(*dashed)) abbr_with_dot = rule( or_(*map(caseless, abbrs)), eq('.').optional(), ) result = or_(original_word, dashed_sequence, abbr_with_dot) \ .interpretation(interpretation.const(word)) return result.optional() if opt else result
def test_nested_facts(): F = fact('F', ['a']) G = fact('G', ['b']) RULE = rule( eq('a').interpretation(F.a) ).interpretation( F ).interpretation( G.b ).interpretation( G ) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == G(b=F(a='a')) assert record.spans == [(0, 1)] assert record.as_json == {'b': {'a': 'a'}}
pidxs = json.loads(prestr(val[2])) concp = [el.split(",")[0] for el in json.loads(prestr(val[3]))] idx2syns.update(dict(zip(pidxs, concp))) except: print(prestr(val[2])) print(prestr(val[3])) # In[ ]: # In[4]: START = rule( or_(rule(gram('ADJF')), rule(gram('NOUN'))).optional(), gram('NOUN')) START_S = or_( eq('такой'), eq('такие'), ) KAK = eq('как') INCLUDING = or_( or_( eq('в'), eq('том'), eq('числе'), ), eq('включающий'), or_( eq('включающий'), eq('в'), eq('себя'),
# coding: utf-8 from __future__ import unicode_literals from yargy import (rule, and_, or_, fact) from yargy.predicates import (eq, in_, gram, normalized, caseless) Money = fact('Money', ['amount', 'currency']) EURO = normalized('евро') DOLLARS = or_(normalized('доллар'), eq('$')) RUBLES = or_(rule(normalized('рубль')), rule(or_(caseless('руб'), caseless('р')), eq('.').optional())) CURRENCY = or_(rule(EURO), rule(DOLLARS), RUBLES).interpretation(Money.currency) INT = gram('INT') AMOUNT_ = or_( rule(INT), rule(INT, INT), rule(INT, INT, INT), rule(INT, '.', INT), rule(INT, '.', INT, '.', INT), ) FRACTION_AMOUN = rule(AMOUNT_, in_({',', '.'}), INT)
from yargy import rule, and_, or_ from yargy.interpretation import (fact, const, attribute) from yargy.predicates import (eq, length_eq, in_, in_caseless, type, normalized, caseless, dictionary) Part = fact('Part', ['part']) Money = fact('Money', [ 'integer_min', attribute('integer_max', -1), attribute('currency', '-'), attribute('multiplier', -1), attribute('period', '-') ]) DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## EURO = or_(normalized('евро'), normalized('euro'), eq('€'), caseless('EUR')).interpretation(const('EUR')) DOLLARS = or_(normalized('доллар'), normalized('дол'), normalized('dollar'), eq('$'), caseless('USD')).interpretation(const('USD')) RUBLES = or_(
max.amount *= 1000 if not min.currency: min.currency = max.currency # if (min.currency is not None) and (min.currency != 'RUB') and (max.currency is not None): # max.currency elif min.currency != max.currency: min.currency = max.currency # для рублевых вилок типа 150-250 без указания тысяч домножаем на тысячу if (max.amount < 1000) and (min.amount < 1000) and (max.currency == 'RUB'): min.amount *= 1000 max.amount *= 1000 return dsl.Range(min, max) DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## # EURO = or_( # normalized('евро'), # #in_(['€', 'EUR']) # eq('€'), # #eq('EUR') # ).interpretation( # const(dsl.EURO)
rule(caseless('м'), '.'), rule(normalized('метро')), ) __quotes = "„“”‚‘’'\"" LEFT_QUOTE = in_("«" + __quotes) RIGHT_QUOTE = in_("»" + __quotes) STATION = rule( STATION_WORD.optional(), METRO_WORD.optional(), LEFT_QUOTE.optional(), STATION_TITLE.interpretation( meaning.custom(lambda p: p.value)).interpretation(Station.name), rule( eq('-').optional(), LIST_OF_NUMERALS.interpretation(Station.num), ).optional(), RIGHT_QUOTE.optional(), ).interpretation(Station) LIST_OF_STATIONS = rule( STATION.means(Array.element), rule( in_caseless('и,-'), STATION.means(Array.element), ).repeatable().optional(), ).interpretation(Array).interpretation(meaning.custom(lambda p: p.element)) FROM_STATION_TO_STATION = rule( or_(caseless('с'), caseless('со')),
'девяносто': 90, 'сто': 100, 'двести': 200, 'триста': 300, 'четыреста': 400, 'пятьсот': 500, 'шестьсот': 600, 'семьсот': 700, 'восемьсот': 800, 'девятьсот': 900, 'тысяча': 10**3, 'миллион': 10**6, 'миллиард': 10**9, 'триллион': 10**12, } DOT = eq('.') INT = type('INT') THOUSANDTH = rule(caseless_pipeline(['тысячных', 'тысячная'])).interpretation(const(10**-3)) HUNDREDTH = rule(caseless_pipeline(['сотых', 'сотая'])).interpretation(const(10**-2)) TENTH = rule(caseless_pipeline(['десятых', 'десятая'])).interpretation(const(10**-1)) THOUSAND = or_( rule(caseless('т'), DOT), rule(caseless('тыс'), DOT.optional()), rule(normalized('тысяча')), rule(normalized('тыща')) ).interpretation(const(10**3)) MILLION = or_( rule(caseless('млн'), DOT.optional()), rule(normalized('миллион')) ).interpretation(const(10**6)) MILLIARD = or_(
'август': 8, 'сентябрь': 9, 'октябрь': 10, 'ноябрь': 11, 'декабрь': 12, } MONTH_NAME = dictionary(MONTHS).interpretation(Date.month.normalized()) MONTH = and_(gte(1), lte(12)).interpretation(Date.month) DAY = and_(gte(1), lte(31)).interpretation(Date.day) YEAR_WORD = or_( rule('г', eq('.').optional()), rule(normalized('г.'), eq('.').optional()), rule(normalized('год')), rule(normalized('гг')), rule(')'), ) YEAR_PREFIX = or_( rule('в '), rule('c '), rule(', '), rule('('), ) YEAR_POSTFIX = or_( rule('е'),
Range = fact('Range', ['min', 'max']) class Range(Range, Normalizable): @property def normalized(self): min = self.min.normalized max = self.max.normalized if not min.currency: min.currency = max.currency return dsl.Range(min, max) DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## EURO = or_(normalized('евро'), eq('€')).interpretation(const(dsl.EURO)) DOLLARS = or_(normalized('доллар'), eq('$')).interpretation(const(dsl.DOLLARS)) RUBLES = or_( rule(normalized('рубль')), rule(or_(caseless('руб'), caseless('р'), eq('₽')),
MONTH = and_( gte(1), lte(12) ).interpretation( Date.month.custom(int) ) DAY = and_( gte(1), lte(31) ).interpretation( Date.day.custom(int) ) YEAR_WORD = or_( rule('г', eq('.').optional()), rule(normalized('год')) ) YEAR = and_( gte(1000), lte(2100) ).interpretation( Date.year.custom(int) ) YEAR_SHORT = and_( length_eq(2), gte(0), lte(99) ).interpretation(
) Street = fact( 'Street', ['name', 'type'] ) Building = fact( 'Building', ['number', 'type'] ) Room = fact( 'Room', ['number', 'type'] ) DASH = eq('-') DOT = eq('.') ADJF = gram('ADJF') NOUN = gram('NOUN') INT = type('INT') TITLE = is_title() ANUM = rule( INT, DASH.optional(), in_caseless({ 'я', 'й', 'е', 'ое', 'ая', 'ий', 'ой' }) )
'Range', ['min', 'max'] ) class Range(Range, Normalizable): @property def normalized(self): min = self.min.normalized max = self.max.normalized if not min.currency: min.currency = max.currency return dsl.Range(min, max) DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## EURO = or_( normalized('евро'), eq('€') ).interpretation( const(dsl.EURO)
'июль': 7, 'август': 8, 'сентябрь': 9, 'октябрь': 10, 'ноябрь': 11, 'декабрь': 12, } MONTH_NAME = dictionary(MONTHS).interpretation(Date.month.normalized().custom( MONTHS.__getitem__)) MONTH = and_(gte(1), lte(12)).interpretation(Date.month.custom(int)) DAY = and_(gte(1), lte(31)).interpretation(Date.day.custom(int)) YEAR_WORD = or_(rule('г', eq('.').optional()), rule(normalized('год'))) YEAR = and_(gte(1000), lte(2100)).interpretation(Date.year.custom(int)) YEAR_SHORT = and_(gte(0), lte(99)).interpretation( Date.year.custom(lambda _: 1900 + int(_))) ERA_YEAR = and_(gte(1), lte(100000)).interpretation(Date.year.custom(int)) ERA_WORD = rule( eq('до'), or_(rule('н', eq('.'), 'э', eq('.').optional()), rule(normalized('наша'), normalized('эра')))).interpretation(Date.current_era.const(False))
from yargy import rule, and_, or_, not_ from yargy.predicates import eq, type as _type, normalized, custom from yargy.pipelines import morph_pipeline from yargy.interpretation import fact CityFact = fact('city', ['prefix', 'title']) CityTitle = morph_pipeline({ 'липецк', 'сургут', 'нальчик', 'москва', 'санкт-петербург', 'питер', 'нижний новгород', 'видное' }).interpretation(CityFact.title.normalized()) CityRule = rule( normalized('город').optional().interpretation(CityFact.prefix), CityTitle, eq(';').optional()).interpretation(CityFact)
) COD = fact( 'Codex', ['n0', 'point', 'n1', 'subpoint', 'n2', 'part', 'n3', 'article', 'n4', 'par', 'n5', 'subsection', 'n6', 'section', 'n7', 'chapter', 'n8', 'type', 'codex'] ) COURT_ = fact( 'Court', ['smth', 'type', 'court', 'rf'] ) NUM = and_(gte(1), lte(10000)) NUMBERS = rule(NUM, rule(eq('.').optional(), NUM).repeatable().optional()) CODEX = rule( or_(rule(normalized('пункт')), rule('п', eq('.').optional()) ).repeatable().optional().interpretation(COD.point), NUMBERS.repeatable().optional().interpretation(COD.n1), or_(rule(normalized('подпункт')), rule('пп', eq('.').optional()) ).repeatable().optional().interpretation(COD.subpoint), NUMBERS.repeatable().optional().interpretation(COD.n2),
'нотариальная контора', 'букмекерская контора', 'авиазавод', 'автозавод', 'винзавод', 'подстанция', 'гидроэлектростанция', ]) gnc = gnc_relation() ADJF_PREFIX = rule( or_( rule(gram('ADJF').match(gnc)), # международное rule( # историко-просветительское true(), eq('-'), gram('ADJF').match(gnc), ), ), or_(caseless('и'), eq(',')).optional(), ).repeatable() case = case_relation() GENT_GROUP = rule( gram('gent').match(case) ).repeatable().optional() QUOTED = rule( TYPE, in_(QUOTES), not_(in_(QUOTES)).repeatable(),
'data scientist', 'data engineer', 'engineer', 'analyst', 'data analyst', 'data manager', 'scientist', 'researcher', "developer", "intern" ]), rule(dictionary(['DS', 'DE']), is_capitalized()), morph_pipeline(["аналитик", "разработчик", "стажер"])).interpretation(Position.name.inflected())) FIELD = rule( caseless_pipeline([ 'ML', 'DL', 'CV', 'computer vision', 'NLP', 'bi', 'machine learning', 'deep learning', 'software', 'research', 'big data', 'python', 'c++', "scala", "java", 'ios', "android", 'devops', "backend", 'frontend' ]).interpretation(Position.field)) HEAD = rule( caseless('head').interpretation(Position.level), eq('of'), caseless_pipeline(['analytics', 'predictive analytics', 'data science']).interpretation(Position.field)) POSITION = or_( rule(LEVEL.optional(), FIELD.optional(), eq('-').optional(), NAME), HEAD).interpretation(Position) # TODO: нужен метод extract с фильтрацией. Например, разбирать возможные ложные срабатывания ("аналитика"), # фильтровать по длине (чем больше полей заполнено, тем предпочтительнее) # можно выдавать "аналитик" только тогда, когда ничего более конкретного не нашлось class PositionExtractor(Extractor): def __init__(self): super(PositionExtractor, self).__init__(POSITION)
from yargy import rule, and_, not_, or_ from yargy.interpretation import fact from yargy.predicates import gram, eq, type, in_ from yargy.relations import gnc_relation from yargy.pipelines import morph_pipeline from .data import NCONTRACT INT = type('INT') DOT = eq('.') LEFT = eq('<') RIGHT = rule(in_('>.')) Datecont = fact('Datecont', ['day', 'month', 'year']) OT = rule(eq('от')) BEFOREDATE = or_(NCONTRACT, OT) DAY = rule(INT).interpretation(Datecont.day.custom(int)) MONTH = or_( morph_pipeline([ 'январь', 'февраль', 'март', 'апрель', 'май', 'июнь', 'июль', 'август', 'сентябрь', 'октябрь', 'ноябрь', 'декабрь' ]), rule(INT)).interpretation(Datecont.month) YEAR = rule(INT).interpretation(Datecont.year.custom(int)) #Правило для даты документа DATECONT = rule(BEFOREDATE, LEFT.optional(), DAY, RIGHT.optional(), MONTH, DOT.optional(), YEAR).interpretation(Datecont)
RULE_DAY_TIME = rule( rule("в").optional(), or_( rule( HOUR_OF_A_DAY.interpretation(DayTime.hour.normalized().custom( lambda val: int(WORDS_HOUR_OF_A_DAY.get(val, val)))), normalized("час").optional(), AM_PM.optional().interpretation( DayTime.am_pm.normalized().custom(normalize_am_pm))), rule( and_( gte(0), lte(23), ).interpretation(DayTime.hour.custom(int)), eq(":").interpretation( DayTime.strict_format.custom(lambda _: True)), and_(gte(0), lte(59)).interpretation( DayTime.minute.normalized().custom(int)), rule( ":", and_(gte(0), lte(59)).interpretation(DayTime.second.custom( int))).optional()))).interpretation(DayTime) RULE_RELATIVE_DAY = rule( rule("в").optional(), RELATIVE_DAY.interpretation( RelativeDay.relative_day.normalized().custom(normalize_relative_day)), RULE_DAY_TIME.optional().interpretation( RelativeDay.day_time)).interpretation(RelativeDay) RULE_DAY_OF_THE_WEEK = rule(
from imaplib import Months from yargy import Parser, rule, and_, or_, not_ from yargy.pipelines import morph_pipeline from yargy.interpretation import fact from IPython.display import display from yargy.predicates import gram, eq, lte, gte, in_, is_capitalized, dictionary, normalized, caseless,type as typ from yargy.predicates.bank import tokenize from yargy.tokenizer import INT, MorphTokenizer # tokenizer = MorphTokenizer() NUMINT = typ('INT') DOT = or_(eq('.'), eq(',')) FLOAT = rule( NUMINT, DOT, NUMINT ) NUM = or_( rule(NUMINT), FLOAT ) # Temperature Temperature = fact( 'Temperature', ['min', 'max', 'singular'] )
'апрель': 4, 'май': 5, 'июнь': 6, 'июль': 7, 'август': 8, 'сентябрь': 9, 'октябрь': 10, 'ноябрь': 11, 'декабрь': 12, } MONTH_NAME = dictionary(MONTHS).interpretation(Date.month.normalized()) MONTH = and_(gte(1), lte(12)).interpretation(Date.month) DAY = and_(gte(1), lte(31)).interpretation(Date.day) YEAR_WORD = or_(rule('г', eq('.').optional()), rule(normalized('год'))) YEAR = and_(gte(1900), lte(2100)).interpretation(Date.year) YEAR_SHORT = and_(gte(0), lte(99)).interpretation(Date.year) DATE = or_( rule(DAY, '.', MONTH, '.', or_(YEAR, YEAR_SHORT), YEAR_WORD.optional()), rule(YEAR, YEAR_WORD), rule(DAY, MONTH_NAME), rule(MONTH_NAME, YEAR, YEAR_WORD.optional()), rule(DAY, MONTH_NAME, YEAR, YEAR_WORD.optional()), ).interpretation(Date)
or_( or_( gram('PREP'), gram('Vpre'), gram('CONJ'), gram('PRCL'), gram('INTJ'), ), gram('POST'), ).optional()) case = case_relation() GENT_GROUP = rule(gram('gent').match(case)).repeatable().optional() #ADJF ADJF_PREFIX_COUNTABLE = rule(or_(caseless('и'), eq(',')).optional(), ) ADJF_PREFIX_ADJF = and_(ADJF, TITLE).repeatable() ADJF_NORM = rule( and_(ADJF, custom(lambda s: EDUORG_DICT_REGEXP.search(s), types=(str)))).repeatable() ADJF_PREFIX = rule( ADJF_PREFIX_ADJF, ADJF.optional(), #Киевском государственном университете ADJF_PREFIX_COUNTABLE).repeatable() # ### ### 1-ST RING RULES