R1_ADJF = rule(ADJF_NORM, ).repeatable() ### ### 2-ST RING RULES #Кембриджском университете #Казанский и Московский университеты R2_SIMPLE_W_ADJF = rule( ADJF_PREFIX, R1_SIMPLE, ).repeatable() R2_QUOTED = or_( rule( EDUORG_DICT, in_(QUOTES), not_(or_(in_(QUOTES), #gram('END-OF-LINE'), )).repeatable(), in_(QUOTES)), rule( in_(QUOTES), ADJF.optional(), EDUORG_DICT, not_(or_(in_(QUOTES), #gram('END-OF-LINE'), )).repeatable(), in_(QUOTES), )) R2_KNOWN = rule(
from yargy import rule, and_, not_, or_ from yargy.interpretation import fact from yargy.predicates import gram, eq, type, in_ from yargy.relations import gnc_relation from yargy.pipelines import morph_pipeline from .data import NCONTRACT INT = type('INT') DOT = eq('.') LEFT = eq('<') RIGHT = rule(in_('>.')) Datecont = fact('Datecont', ['day', 'month', 'year']) OT = rule(eq('от')) BEFOREDATE = or_(NCONTRACT, OT) DAY = rule(INT).interpretation(Datecont.day.custom(int)) MONTH = or_( morph_pipeline([ 'январь', 'февраль', 'март', 'апрель', 'май', 'июнь', 'июль', 'август', 'сентябрь', 'октябрь', 'ноябрь', 'декабрь' ]), rule(INT)).interpretation(Datecont.month) YEAR = rule(INT).interpretation(Datecont.year.custom(int)) #Правило для даты документа DATECONT = rule(BEFOREDATE, LEFT.optional(), DAY, RIGHT.optional(), MONTH, DOT.optional(), YEAR).interpretation(Datecont)
'Vice City Stories', ]).interpretation(Game.name), morph_pipeline([]).interpretation(Game.release).optional()) megaten = rule( morph_pipeline([ 'Shin Megami Tensei', 'Megami Tensei', ]).interpretation(Game.series), morph_pipeline([ 'Persona', 'Devil Summoner', 'Digital Devil Saga', 'Devil Children Shiro', ]).interpretation(Game.name), in_('234').interpretation(Game.release).optional(), ) assassin = rule( morph_pipeline([ "Assassin's Creed", 'Ассассин', 'Ассассин Крид', 'Ассассинс Крид', ]).interpretation(Game.series.const("Assassin's Creed")), morph_pipeline([ '2', '3', '4', 'II', 'III',
eq, lte, gte, gram, type, tag, length_eq, in_, in_caseless, dictionary, normalized, caseless, is_title ) from yargy.tokenizer import QUOTES INT = type('INT') DOT = eq('.') ADJF = gram('ADJF') NOUN = gram('NOUN') TITLE = is_title() DASH = eq('-') SLASH = eq('/') QUOTE = in_(QUOTES) ANUM = rule( INT, DASH.optional(), in_caseless({ 'я', 'й', 'е', 'ое', 'ая', 'ий', 'ой' }) ) def value(key): @property def field(self): return getattr(self, key) return field
# caseless('rur'), caseless('р'), eq('₽')), DOT.optional())).interpretation(const("RUB")) CURRENCY = or_(EURO, DOLLARS, RUBLES).interpretation(Money.currency) ############ # # MULTIPLIER # ########## MILLION = or_(rule(caseless('млн'), DOT.optional()), rule(normalized('миллион')), rule(in_('мМmM'))).interpretation(const(10**6)) THOUSAND = or_(rule(caseless('т'), DOT), rule(caseless('к')), rule(caseless('k')), rule(caseless('тыс'), DOT.optional()), rule(normalized('тысяча'))).interpretation(const(10**3)) MULTIPLIER = or_(MILLION, THOUSAND).interpretation(Money.multiplier) ####### # # AMOUNT # ######## def normalize_integer(value):
rule(BULVAR_NAME, BULVAR_WORDS) ).interpretation( Street ) ############## # # ADDRESS VALUE # ############# LETTER = in_caseless(set('абвгдежзиклмнопрстуфхшщэюя')) QUOTE = in_(QUOTES) LETTER = or_( rule(LETTER), rule(QUOTE, LETTER, QUOTE) ) VALUE = rule( INT, LETTER.optional() ) SEP = in_(r'/\-') VALUE = or_( rule(VALUE),
from natasha.grammars import date, addr from .helpers import select_span_tokens, ID_TOKENIZER, show_matches, TOKENIZER, load_named_entities INT = type('INT') Socdem = fact('Socdem', ['name', 'gender', 'date_of_birth', 'age', 'location']) GENDERS_DICT = { 'Женщина': 'female', 'женщина': 'female', 'мужчина': 'male', 'Мужчина': 'male' } GENDER = rule(in_(GENDERS_DICT)).interpretation( Socdem.gender.custom(GENDERS_DICT.get)) AGE = rule(INT, normalized('год')).interpretation(Socdem.age) LOCATION = rule(or_(addr.GOROD, addr.DEREVNYA, addr.SELO, addr.POSELOK)).interpretation(Socdem.location) def update_rules(name): NAME = pipeline(name).interpretation(Socdem.name) SOCDEM_ELEMS = rule(or_(NAME, GENDER, date.DATE, AGE, LOCATION)) SOCDEM = rule( NAME, GENDER.optional(),
from .station_title import STATION_TITLE Station = fact('Station', ['name', attribute('num', default=[])]) STATION_WORD = or_( rule(caseless('ст'), '.'), rule(normalized('станция')), ) METRO_WORD = or_( rule(caseless('м'), '.'), rule(normalized('метро')), ) __quotes = "„“”‚‘’'\"" LEFT_QUOTE = in_("«" + __quotes) RIGHT_QUOTE = in_("»" + __quotes) STATION = rule( STATION_WORD.optional(), METRO_WORD.optional(), LEFT_QUOTE.optional(), STATION_TITLE.interpretation( meaning.custom(lambda p: p.value)).interpretation(Station.name), rule( eq('-').optional(), LIST_OF_NUMERALS.interpretation(Station.num), ).optional(), RIGHT_QUOTE.optional(), ).interpretation(Station)
# DATA tires_vendors_path = 'tires.vendors.json' # xls_path = '06.07.18 ДАКАР Уфа.xls' xls_path = 'Прайс_Колобокс_Шины_2018-07-07 (XLS).xls' data_list = xls_to_list(xls_path) # FACTS Tire = fact('Tire', [ 'vendor', 'width', 'profile', 'diameter', 'max_speed_index', 'max_load_index', 'season', 'spikes' ]) Vendor = fact('Vendor', ['id', 'name']) # HELPERS SEP = in_({'-', '/', '|', ':', ';', '.'}) NUM = type_('INT') INT = NUM.interpretation(interp.custom(int)) FLOAT = rule(NUM.repeatable(), in_({',', '.'}), NUM, NUM.optional()).interpretation(interp.custom(to_float)) # TIRE_VENDORS VENDORS_NAME, VENDORS_ID = get_vendor_dict(tires_vendors_path) VENDOR = rule( caseless_pipeline(VENDORS_NAME).interpretation( Vendor.name.normalized().custom( VENDORS_NAME.get))).interpretation(Vendor) # TIRE_HELPERS DIAMETER_WITH_LETTER = rule(NUM, or_(eq('С'), eq('C')).optional()) STRUCTURE = or_(
############## # # ADDRESS VALUE # ############# LETTER = in_caseless(set('абвгдежзиклмнопрстуфхшщэюя')) QUOTE = gram('QUOTE') LETTER = or_(rule(LETTER), rule(QUOTE, LETTER, QUOTE)) VALUE = rule(INT, LETTER.optional()) SEP = in_({'/', '\\', '-'}) VALUE = or_(rule(VALUE), rule(VALUE, SEP, VALUE), rule(VALUE, SEP, LETTER)) ADDRESS_VALUE = rule(eq('№').optional(), VALUE) ############ # # DOM # ############# DOM_WORDS = or_(rule(normalized('дом')), rule(caseless('д'), DOT.optional())).interpretation( Building.type.const('дом'))
) INT = gram('INT') AMWORD = rule( or_( dictionary({ 'тысяча', 'миллион' }), eq('т') ), eq('.').optional() ) SEP = in_({',', '.'}) AMOUNT_ = or_( rule(INT), rule(INT, INT), rule(INT, INT, INT), rule(INT, SEP, INT), rule(INT, SEP, INT, SEP, INT), ) FRACTION_AMOUN = rule( AMOUNT_, SEP, INT )
rule(gram('ADJF').match(gnc)), # международное rule( # историко-просветительское true(), eq('-'), gram('ADJF').match(gnc), ), ), or_(caseless('и'), eq(',')).optional(), ).repeatable() case = case_relation() GENT_GROUP = rule(gram('gent').match(case)).repeatable().optional() QUOTED = rule( TYPE, in_(QUOTES), not_(in_(QUOTES)).repeatable(), in_(QUOTES), ) TRIPLE_QUOTED = rule( TYPE, in_(QUOTES), not_(in_(QUOTES)).repeatable(), in_(QUOTES), not_(in_(QUOTES)).repeatable(), in_(QUOTES), ) QUOTED_WITH_ADJF_PREFIX = rule( ADJF_PREFIX,
true(), eq('-'), gram('ADJF').match(gnc), ), ), or_(caseless('и'), eq(',')).optional(), ).repeatable() case = case_relation() GENT_GROUP = rule( gram('gent').match(case) ).repeatable().optional() QUOTED = rule( TYPE, in_(QUOTES), not_(in_(QUOTES)).repeatable(), in_(QUOTES), ) TRIPLE_QUOTED = rule( TYPE, in_(QUOTES), not_(in_(QUOTES)).repeatable(), in_(QUOTES), not_(in_(QUOTES)).repeatable(), in_(QUOTES), ) QUOTED_WITH_ADJF_PREFIX = rule( ADJF_PREFIX,
# NUMERAL # ####### NUMR = or_( gram('NUMR'), # https://github.com/OpenCorpora/opencorpora/issues/818 dictionary({'ноль', 'один'}), ) MODIFIER = in_caseless({'целых', 'сотых', 'десятых'}) PART = or_(rule(or_(INT, NUMR, MODIFIER)), MILLIARD, MILLION, THOUSAND, CURRENCY, COINS_CURRENCY) BOUND = in_('()//') NUMERAL = rule(BOUND, PART.repeatable(), BOUND) ####### # # AMOUNT # ######## def normalize_integer(value): integer = re.sub('[\s.,]+', '', value) return int(integer)
PART = or_( rule( or_( INT, NUMR, MODIFIER ) ), MILLIARD, MILLION, THOUSAND, CURRENCY, COINS_CURRENCY ) BOUND = in_('()//') NUMERAL = rule( BOUND, PART.repeatable(), BOUND ) ####### # # AMOUNT # ########
LETTER = in_caseless(set('абвгдежзиклмнопрстуфхшщэюя')) TYPE_CITY = dictionary({'город'}).interpretation(City.typeCity) STRUCTURE_TYPE = dictionary({'строение', 'ст'}).interpretation(Structure.structureType) TYPE_APPART = dictionary({'квартира'}).interpretation(Appart.typeAppart) BUILDING_TYPE = dictionary({'дом', 'шоссе', 'проспект', 'улица'}).interpretation(Building.buildingType) VALUE = rule(INT, LETTER.optional()) SEP = in_(r'/\-') BUILDING_VALUE = or_(rule(VALUE, LETTER), rule(VALUE)).interpretation(Building.buildingName) STREET_VALUE = dictionary({ 'комсомольский', 'катукова', 'доватора', 'бехтеева', 'артема', 'алтуфьевское', 'миттова', 'школьная', 'рабочая', 'юнтоловский', 'школьная', 'меркулова' }) COMPLEX_STREET = morph_pipeline(['юрия гагарина']) COMPLEX = morph_pipeline([ 'санкт-петербург', 'нижний новгород', 'н.новгород',
Time = fact('Time', ['hours', 'minutes']) hour_re = re.compile(r'([01]\d|\d|2[0-3])') is_hour = custom(hour_re.fullmatch).activate(TOKENIZER) minute_re = re.compile(r'[0-5]\d') is_minute = custom(minute_re.fullmatch).activate(TOKENIZER) HOUR_UNIT = rule(morph_pipeline(['ч', 'час', 'часы']), eq('.').optional()) MINUTE_UNIT = rule(morph_pipeline(['м', 'мин', 'минуты']), eq('.').optional()) # 17:02 ч. TIME_DIGITAL = rule( is_hour.means(Time.hours), in_(':-.'), is_minute.means(Time.minutes), or_(HOUR_UNIT, MINUTE_UNIT).optional(), ).means(Time) # 17ч 02 TIME_HUMAN = rule( is_hour.means(Time.hours), HOUR_UNIT, is_minute.means(Time.minutes), MINUTE_UNIT.optional(), ).means(Time) # в 15:00 TIME = rule(caseless('в').optional(), or_( TIME_DIGITAL,
COINS_CURRENCY = or_(KOPEIKA, rule(CENT), rule(EUROCENT)) ############ # # MULTIPLIER # ########## # TODO: можно выпилить, чтобы шустрее работало MILLIARD = or_(rule(caseless('млрд'), DOT.optional()), rule(normalized('миллиард'))).interpretation(const(10**9)) MILLION = or_(rule(caseless('млн'), DOT.optional()), rule(normalized('миллион')), rule(in_('мМmM'))).interpretation(const(10**6)) THOUSAND = or_(rule(caseless('т'), DOT), rule(caseless('к')), rule(caseless('k')), rule(caseless('тыс'), DOT.optional()), rule(normalized('тысяча'))).interpretation(const(10**3)) MULTIPLIER = or_(MILLIARD, MILLION, THOUSAND).interpretation(Money.multiplier) ######## # # NUMERAL # ####### NUMR = or_( gram('NUMR'),
rule(BULVAR_NAME, BULVAR_WORDS) ).interpretation( Street ) ############## # # ADDR VALUE # ############# LETTER = in_caseless(set('абвгдежзиклмнопрстуфхшщэюя')) QUOTE = in_(QUOTES) LETTER = or_( rule(LETTER), rule(QUOTE, LETTER, QUOTE) ) VALUE = rule( INT, LETTER.optional() ) SEP = in_(r'/\-') VALUE = or_( rule(VALUE),
from yargy.predicates import (eq, in_, gram, normalized, caseless) Money = fact('Money', ['amount', 'currency']) EURO = normalized('евро') DOLLARS = or_(normalized('доллар'), eq('$')) RUBLES = or_(rule(normalized('рубль')), rule(or_(caseless('руб'), caseless('р')), eq('.').optional())) CURRENCY = or_(rule(EURO), rule(DOLLARS), RUBLES).interpretation(Money.currency) INT = gram('INT') AMOUNT_ = or_( rule(INT), rule(INT, INT), rule(INT, INT, INT), rule(INT, '.', INT), rule(INT, '.', INT, '.', INT), ) FRACTION_AMOUN = rule(AMOUNT_, in_({',', '.'}), INT) AMOUNT = or_(AMOUNT_, FRACTION_AMOUN).interpretation(Money.amount) MONEY = rule(AMOUNT, CURRENCY).interpretation(Money)
PERSON_, ), ).interpretation(Organisation.name) LATIN = rule( gram('OrganisationType'), or_( rule( and_( gram('LATN'), is_capitalized(), ) ), rule( gram('LATN'), in_({'&', '/', '.'}), gram('LATN'), ) ).repeatable() ).interpretation(Organisation.name) KNOWN = rule( gram('Orgn'), GENT_GROUP, ).interpretation(Organisation.name) ORGANISATION_ = or_( QUOTED, QUOTED_WITH_ADJF_PREFIX, BASIC, NAMED,
def __init__(self): super(NerTimeCount, self).__init__() self.last_request = None self.last_result = [None, None, None] self.name = 'TimeCount' # переводим числа от 1 до 59 в текст n2t60 = [n2t(i) for i in range(1, 60)] # для поиска порядковых числительных def not_coll_numbers(x): return ('NUMR' in str(morph.parse(x)[0].tag) and ('Coll' not in str(morph.parse(x)[0].tag))) or x == 'один' # часы в словах hours_t = and_(dictionary(n2t60[:24] + ["полтора", "полдень"]), custom(not_coll_numbers)) # минуты в словах minutes_t = dictionary(n2t60) coll_numbers_dic = dictionary( ["двое", "трое", "четверо", "пятеро", "шестеро", "семеро"]) list_0n = {"00", "01", "02", "03", "04", "05", "06", "08", "09"} # часы в цифрах hours_n = or_(and_(gte(1), lte(23)), in_(list_0n)) # минуты в цифрах minutes_n = or_(and_(gte(1), lte(59)), in_(list_0n)) # разделитель в чч_мм two_points = dictionary([":"]) separator = dictionary([":", "."]) # определяем предлоги pr_v = rule("в") pr_ok = rule("около") pr_vrayone = morph_pipeline(["В районе"]) pr_k = rule("к") pr_na = rule("на") pr_c = rule("с") start_prepositions = or_(pr_ok, pr_v, pr_k, pr_na, pr_c, pr_vrayone) pr_vtech = morph_pipeline(["в течение"]) pr_do = rule("до") pr_po = rule("по") duration_prepositions = or_(pr_vtech, pr_do, pr_po) # отрезки времени суток day_periods = or_(rule(normalized("утро")), rule(normalized("день")), rule(normalized("вечер"))) # час - особый случай, т.к. сам обозначает определённое время или длительность(аналогично "человк") hour = rule(normalized("час")) people = rule(normalized("человек")) # слова перед временем начала start_syn = dictionary([ "начало", "старт", "встреча", "переговорную", "переговорку", "пропуск" ]) start_verbs = dictionary([ "начать", "прийти", "заказать", "забронировать", "выделить", "состоится" ]) # слова перед продолжительнотью duration_verbs = dictionary(["займёт", "продлится"]) # слова перед временем конца end_verbs = dictionary(["закончить", "уйдём", "завершим"]) end_syn = dictionary(["конец", "окончание", "завершение"]) # для поиска времени начала, которое выделяется с помощью : или - start_with_separator = or_(rule("начало"), rule("старт"), rule("время"), morph_pipeline(["начало встречи"]), morph_pipeline(["старт встречи"]), morph_pipeline(["время встречи"])) duration_with_separator = or_( rule("продолжительность"), morph_pipeline(["продолжительность встречи"])) end_with_separator = or_(rule("конец"), rule("окончание"), rule("завершение"), morph_pipeline(["конец встречи"]), morph_pipeline(["окончание встречи"]), morph_pipeline(["завершение встречи"])) # относительные указатели на день(относительно сегодняшнего) day_pointer = or_(rule("понедельник"), morph_pipeline(["пн."]), rule("пн"), rule("вторник"), morph_pipeline(["вт."]), rule("вт"), rule("среда"), rule("среду"), morph_pipeline(["ср."]), rule("ср"), rule("четверг"), morph_pipeline(["чт."]), rule("чт"), rule("пятница"), rule("пятницу"), morph_pipeline(["пт."]), rule("пт"), rule("суббота"), rule("субботу"), morph_pipeline(["сб."]), rule("сб"), rule("воскресение"), rule("воскресенье"), morph_pipeline(["вс."]), rule("вс"), rule("завтра"), rule("послезавтра"), rule("сегодня")) # чужие слова self._foreignWords = [ "этаж", "январь", "февраль", "март", "апрель", "май", "июнь", "июль", "август", "сентябрь", "октябрь", "ноябрь", "декабрь" ] # количественные числительные в числа self._Counts = { "человек": 1, "1": 1, "один": 1, "два": 2, "2": 2, "двое": 2, "вдвоём": 2, "трое": 3, "три": 3, "3": 3, "втроём": 3, "четверо": 4, "четыре": 4, "4": 4, "вчетвером": 4, "5": 5, "пять": 5, "пятеро": 5, "впятером": 5, "6": 6, "шесть": 6, "шестеро": 6, "7": 7, "семь": 7, "семеро": 7, "8": 7, "восемь": 8, "9": 7, "девять": 9, "10": 7, "десять": 10 } # приведение времени к номальной форме self._ToNormalHours = { "08.00": "08:00", "8": "08:00", "восемь": "08:00", "09.00": "09:00", "9": "09:00", "девять": "09:00", "10.00": "10:00", "10": "10:00", "десять": "10:00", "11.00": "11:00", "11": "11:00", "одиннадцать": "11:00", "12.00": "12:00", "12": "12:00", "двенадцать": "12:00", "полдень": "12:00", "13.00": "13:00", "1": "13:00", "13": "13:00", "один": "13:00", "час": "13:00", "часу": "13:00", "14.00": "14:00", "2": "14:00", "14": "14:00", "два": "14:00", "15.00": "15:00", "3": "15:00", "15": "15:00", "три": "15:00", "16.00": "16:00", "4": "16:00", "16": "16:00", "четыре": "16:00", "17.00": "17:00", "5": "17:00", "17": "17:00", "пять": "17:00", "18.00": "18:00", "6": "18:00", "18": "18:00", "шесть": "18:00", "19.00": "19:00", "7": "19:00", "19": "19:00", "семь": "19:00" } # приведение промежутка времени к нормальной форме self._ToNormalDelta = { "1": "01:00", "один": "01:00", "час": "01:00", "1:5": "01:30", "полтора": "01:30", "2": "02:00", "два": "02:00", "3": "03:00", "три": "03:00", "4": "04:00", "четыре": "04:00", "5": "05:00", "пять": "05:00", "6": "06:00", "шесть": "06:00", "7": "7:00", "семь": "07:00" } # правила для времени в формате from time to time self._rulesFromTO = [ # from time to time rule(start_prepositions, or_(hour, rule(or_(hours_t, hours_n))), separator.optional(), minutes_n.optional(), or_(day_periods, hour).optional(), duration_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional()), # чч:мм - чч:мм rule(hours_n, separator, minutes_n, "-", hours_n, separator, minutes_n), # day time to time rule(day_pointer, rule(or_(hours_t, hours_n)), separator.optional(), minutes_n.optional(), or_(day_periods, hour).optional(), duration_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional()) ] # правила для времени в формате from time on time self._rulesFromOn = [ # from time on n hour rule(start_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional(), or_(day_periods, hour).optional(), pr_na, or_(hours_t, hours_n), hour.optional()), # from time on hour rule(start_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional(), or_(day_periods, hour).optional(), pr_na, hour) ] # правила для времени в формате on time from time self._rulesOnFrom = [ # on n hour from time rule(pr_na, or_(hours_t, hours_n), hour, start_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional(), or_(day_periods, hour).optional()), # on hour from time rule(pr_na, hour, start_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional(), or_(day_periods, hour).optional()) ] # правила для времени в формате from time self._rulesFrom = [ # day or start or start verb in time rule(or_(day_pointer, rule(start_syn), rule(start_verbs)), start_prepositions, or_(hours_t, hours_n), separator.optional(), minutes_n.optional()), # start with separator rule(start_with_separator, two_points, or_(rule(hours_t), rule(hours_n)), separator.optional(), minutes_n.optional()), # since time day or hour rule(pr_c, or_(rule(hours_t), rule(hours_n)), separator.optional(), minutes_n.optional(), or_(day_periods, hour)), # since hour rule(pr_c, hour), # on n часов day rule(pr_na, or_(hours_t, hours_n), hour.optional(), day_periods), # on час day rule(pr_na, hour, day_periods) ] # правила для времени окончания и продолжительности self._rulesTo = [ # end or end verb in time rule(or_(end_syn, end_verbs), start_prepositions, or_(rule(hours_t), rule(hours_n), hour), separator.optional(), minutes_n.optional()), # duration verb time-time rule(duration_verbs, hours_n.optional(), dictionary(["."]).optional(), minutes_n.optional(), "-", hours_n.optional(), dictionary(["."]).optional(), hour), # duration verb time rule(duration_verbs, or_(hours_t, hours_n), dictionary(["."]).optional(), minutes_n.optional(), hour), # end with separation rule(end_with_separator, two_points, or_(rule(hours_t), rule(hours_n)), separator.optional(), minutes_n.optional()), # duration with separation rule(duration_with_separator, two_points, or_(rule(hours_t), rule(hours_n)), separator.optional(), minutes_n.optional()) ] # общие правила для начального, конечного времени и продолжительности self._rulesCommon = [ # in time + hour or day period rule(or_(pr_v, pr_vrayone, pr_k), or_(hours_t, hours_n), or_(hour, day_periods)), # on time + day period rule(pr_na, or_(hours_t, hours_n), or_(day_periods)), # in hh:mm rule(pr_v, hours_n, separator, minutes_n), # hh:mm rule(hours_n, two_points, minutes_n, or_(day_periods, hour).optional()), # on n hour rule(pr_na, or_(hours_t, hours_n), hour), # on hour rule(pr_na, hour) ] # правила для количества людей self._rulesCount = [ # coll number rule(coll_numbers_dic), # n people rule(or_(hours_t, hours_n).optional()) ] # правила используемые в повторных запросах self._rulesTime = [ # всевозможные форматы времени rule(or_(rule(hours_t), rule(hours_n), hour), separator.optional(), minutes_n.optional()) ] self._rulesPeriod = [ # всевозможные интервалы времени rule(or_(rule(hours_t), rule(hours_n), hour), dictionary(["."]).optional(), minutes_n.optional()) ] self._rulesCountPeople = [ # количественные числительные rule(coll_numbers_dic), # n человек rule(or_(hours_t, hours_n).optional(), people) ]
LANE, SQUARE, HIGHWAY, EMBANKMENT, BOULEVARD, TRACT, STREET ).interpretation( Address.Street ) # # # # # # # # # От дома до квартиры # # # # # # # # # # # # Считаем номер "дома"/"квартиры" LETTER = in_caseless(set('абвгдежзиклмнопрстуфхшщэюя')) QUOTE = in_(QUOTES) # Буква в номере LETTER = or_( rule(LETTER), rule(QUOTE, LETTER, QUOTE) ) # Разделители SEP = in_({'-', '/', '\\'}) # Разные кейсы номера дома №1 VALUE = or_( rule( INT, SPACEBAR,