def initialize(lang: 'MorphLang' = None) -> None: """ Инициализация сервиса. Каждый анализатор нужно аинициализировать отдельно. Если вызывается Sdk.Initialize(), то там инициализация сервиса и всех анализаторов делается. Args: lang(MorphLang): необходимые языки (по умолчанию, русский и английский) """ from pullenti.ner.core.internal.NumberExHelper import NumberExHelper from pullenti.ner.core.internal.BlockLine import BlockLine from pullenti.ner.core.internal.NounPhraseItem import NounPhraseItem from pullenti.ner.core.PrepositionHelper import PrepositionHelper from pullenti.ner.core.ConjunctionHelper import ConjunctionHelper if (ProcessorService.__m_inited): return ProcessorService.__m_inited = True MorphologyService.initialize(lang) DerivateService.initialize(lang) Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = True PrepositionHelper._initialize() ConjunctionHelper._initialize() NounPhraseItem._initialize() NumberHelper._initialize() NumberExHelper._initialize() BlockLine.initialize() Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = False
def create_noun_group(gr : 'SemGraph', npt : 'NounPhraseToken') -> 'SemObject': noun = npt.noun.begin_token sem = SemObject(gr) sem.tokens.append(npt.noun) sem.typ = SemObjectType.NOUN if (npt.noun.morph.class0_.is_personal_pronoun): sem.typ = SemObjectType.PERSONALPRONOUN elif (npt.noun.morph.class0_.is_pronoun): sem.typ = SemObjectType.PRONOUN if (npt.noun.begin_token != npt.noun.end_token): sem.morph.normal_case = npt.noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) sem.morph.normal_full = npt.noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) sem.morph.class0_ = MorphClass.NOUN sem.morph.number = npt.morph.number sem.morph.gender = npt.morph.gender sem.morph.case_ = npt.morph.case_ elif (isinstance(noun, TextToken)): for wf in noun.morph.items: if (wf.check_accord(npt.morph, False, False) and (isinstance(wf, MorphWordForm))): CreateHelper._set_morph(sem, Utils.asObjectOrNull(wf, MorphWordForm)) break if (sem.morph.normal_case is None): sem.morph.normal_case = noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) sem.morph.normal_full = noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) grs = DerivateService.find_derivates(sem.morph.normal_full, True, None) if (grs is not None and len(grs) > 0): sem.concept = (grs[0]) elif (isinstance(noun, ReferentToken)): r = noun.referent if (r is None): return None sem.morph.normal_case = str(r) sem.morph.normal_full = sem.morph.normal_case sem.concept = (r) elif (isinstance(noun, NumberToken)): num = Utils.asObjectOrNull(noun, NumberToken) sem.morph.gender = noun.morph.gender sem.morph.number = noun.morph.number if (num.int_value is not None): sem.morph.normal_case = NumberHelper.get_number_adjective(num.int_value, noun.morph.gender, noun.morph.number) sem.morph.normal_full = NumberHelper.get_number_adjective(num.int_value, MorphGender.MASCULINE, MorphNumber.SINGULAR) else: sem.morph.normal_case = noun.get_source_text().upper() sem.morph.normal_full = sem.morph.normal_case noun.tag = (sem) if (len(npt.adjectives) > 0): for a in npt.adjectives: if (npt.multi_nouns and a != npt.adjectives[0]): break asem = CreateHelper.create_npt_adj(gr, npt, a) if (asem is not None): gr.add_link(SemLinkType.DETAIL, sem, asem, "какой", False, None) if (npt.internal_noun is not None): intsem = CreateHelper.create_noun_group(gr, npt.internal_noun) if (intsem is not None): gr.add_link(SemLinkType.DETAIL, sem, intsem, None, False, None) gr.objects.append(sem) return sem
def percent(self, value) -> float: if (value > 0): self.addSlot(FundsReferent.ATTR_PERCENT, NumberHelper.doubleToString(value), True, 0) else: self.addSlot(FundsReferent.ATTR_PERCENT, None, True, 0) return value
def create_referent_with_register(self, ad: 'AnalyzerData') -> 'UnitReferent': ur = self.ext_onto if (self.unit is not None): ur = UnitToken.__create_referent(self.unit) elif (self.unknown_name is not None): ur = UnitReferent() ur.add_slot(UnitReferent.ATTR_NAME, self.unknown_name, False, 0) ur.is_unknown = True if (self.pow0_ != 1): ur.add_slot(UnitReferent.ATTR_POW, str(self.pow0_), False, 0) owns = list() owns.append(ur) if (self.unit is not None): uu = self.unit.base_unit while uu is not None: ur0 = UnitToken.__create_referent(uu) owns.append(ur0) uu = uu.base_unit for i in range(len(owns) - 1, -1, -1): if (ad is not None): owns[i] = (Utils.asObjectOrNull(ad.register_referent(owns[i]), UnitReferent)) if (i > 0): owns[i - 1].add_slot(UnitReferent.ATTR_BASEUNIT, owns[i], False, 0) if (owns[i - 1].tag.base_multiplier != 0): owns[i - 1].add_slot( UnitReferent.ATTR_BASEFACTOR, NumberHelper.double_to_string( owns[i - 1].tag.base_multiplier), False, 0) return owns[0]
def tryAttach(t: 'Token', can_be_pure_number: bool = False, typ: 'OrgItemTypeToken' = None) -> 'OrgItemNumberToken': if (t is None): return None tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): t1 = MiscHelper.checkNumberPrefix(tt) if ((isinstance(t1, NumberToken)) and not t1.is_newline_before): return OrgItemNumberToken._new1704(tt, t1, str((t1).value)) if ((t.is_hiphen and (isinstance(t.next0_, NumberToken)) and not t.is_whitespace_before) and not t.is_whitespace_after): if (NumberHelper.tryParseAge(t.next0_) is None): return OrgItemNumberToken._new1704(t, t.next0_, str((t.next0_).value)) if (isinstance(t, NumberToken)): if ((not t.is_whitespace_before and t.previous is not None and t.previous.is_hiphen)): return OrgItemNumberToken._new1704(t, t, str((t).value)) if (typ is not None and typ.typ is not None and (( (typ.typ == "войсковая часть" or typ.typ == "військова частина" or "колония" in typ.typ) or "колонія" in typ.typ))): if (t.length_char >= 4 or t.length_char <= 6): res = OrgItemNumberToken._new1704(t, t, str((t).value)) if (t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.isCharOf("\\/"))) and not t.next0_.is_whitespace_after): if ((isinstance(t.next0_.next0_, NumberToken)) and ((t.length_char + t.next0_.next0_.length_char) < 9)): res.end_token = t.next0_.next0_ res.number = "{0}-{1}".format( res.number, (res.end_token).value) elif ((isinstance(t.next0_.next0_, TextToken)) and t.next0_.next0_.length_char == 1 and t.next0_.next0_.chars.is_letter): res.end_token = t.next0_.next0_ res.number = "{0}{1}".format( res.number, (res.end_token).term) elif ((isinstance(t.next0_, TextToken)) and t.next0_.length_char == 1 and t.next0_.chars.is_letter): res.end_token = t.next0_ res.number = "{0}{1}".format(res.number, (res.end_token).term) return res if (((isinstance(t, TextToken)) and t.length_char == 1 and t.chars.is_letter) and not t.is_whitespace_after): if (typ is not None and typ.typ is not None and (( (typ.typ == "войсковая часть" or typ.typ == "військова частина" or "колония" in typ.typ) or "колонія" in typ.typ))): tt1 = t.next0_ if (tt1 is not None and tt1.is_hiphen): tt1 = tt1.next0_ if ((isinstance(tt1, NumberToken)) and not tt1.is_whitespace_before): res = OrgItemNumberToken(t, tt1) res.number = "{0}{1}".format((t).term, (tt1).value) return res return None
def _addFioIdentity(self, last_name : 'PersonMorphCollection', first_name : 'PersonMorphCollection', middle_name : object) -> None: from pullenti.ner.person.internal.PersonMorphCollection import PersonMorphCollection if (last_name is not None): if (last_name.number > 0): num = NumberHelper.getNumberRoman(last_name.number) if (num is None): num = str(last_name.number) self.addSlot(PersonReferent.ATTR_NICKNAME, num, False, 0) else: last_name.correct() self.__m_surname_occurs.append(last_name) for v in last_name.values: self.addSlot(PersonReferent.ATTR_LASTNAME, v, False, 0) if (first_name is not None): first_name.correct() if (first_name.head is not None and len(first_name.head) > 2): self.__m_name_occurs.append(first_name) for v in first_name.values: self.addSlot(PersonReferent.ATTR_FIRSTNAME, v, False, 0) if (isinstance(middle_name, str)): self.addSlot(PersonReferent.ATTR_MIDDLENAME, middle_name, False, 0) elif (isinstance(middle_name, PersonMorphCollection)): mm = (Utils.asObjectOrNull(middle_name, PersonMorphCollection)) if (mm.head is not None and len(mm.head) > 2): self.__m_sec_occurs.append(mm) for v in mm.values: self.addSlot(PersonReferent.ATTR_MIDDLENAME, v, False, 0) self._correctData()
def initialize(lang: 'MorphLang' = None) -> None: """ Инициализация сервиса. Внимание! После этого нужно инициализровать анализаторы (см. документацию) <param name="lang">необходимые языки (по умолчанию, русский и английский)</param> """ from pullenti.ner.core.internal.NumberExHelper import NumberExHelper from pullenti.ner.core.internal.NounPhraseItem import NounPhraseItem if (ProcessorService.__m_inited): return ProcessorService.__m_inited = True Morphology.initialize(lang) Explanatory.initialize(lang) Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = True NounPhraseItem._initialize() NumberHelper._initialize() NumberExHelper._initialize() BlockLine.initialize() Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = False
def percent(self) -> float: """ Процент от общего количества """ val = self.getStringValue(FundsReferent.ATTR_PERCENT) if (val is None): return 0 res = NumberHelper.stringToDouble(val) if (res is None): return 0 return res
def real_value(self, value_) -> float: val = NumberHelper.doubleToString(value_) ii = val.find('.') if (ii > 0): val = val[0:0 + ii] self.addSlot(MoneyReferent.ATTR_VALUE, val, True, 0) re = ((value_ - self.value)) * (100) self.addSlot(MoneyReferent.ATTR_REST, str((math.floor((re + .0001)))), True, 0) return value_
def __attach_spec_postfix(t: 'Token') -> 'NumberExToken': if (t is None): return None if (t.is_char_of("%")): return NumberExToken(t, t, "", NumberSpellingType.DIGIT, NumberExType.PERCENT) money = NumberHelper._is_money_char(t) if (money is not None): return NumberExToken._new415(t, t, "", NumberSpellingType.DIGIT, NumberExType.MONEY, money) return None
def __attachSpecPostfix(t: 'Token') -> 'NumberExToken': if (t is None): return None if (t.isCharOf("%")): return NumberExToken(t, t, "", NumberSpellingType.DIGIT, NumberExType.PERCENT) money = NumberHelper._isMoneyChar(t) if (money is not None): return NumberExToken._new478(t, t, "", NumberSpellingType.DIGIT, NumberExType.MONEY, money) return None
def value(self, value_) -> str: from pullenti.ner.core.NumberHelper import NumberHelper self.__m_value = (Utils.ifNotNull(value_, "")) if (len(self.__m_value) > 2 and self.__m_value.endswith(".0")): self.__m_value = self.__m_value[0:0 + len(self.__m_value) - 2] while len(self.__m_value) > 1 and self.__m_value[ 0] == '0' and self.__m_value[1] != '.': self.__m_value = self.__m_value[1:] wrapn2664 = RefOutArgWrapper(0) inoutres2665 = Utils.tryParseInt(self.__m_value, wrapn2664) n = wrapn2664.value if (inoutres2665): self.__m_int_val = n else: self.__m_int_val = (None) d = NumberHelper.stringToDouble(self.__m_value) if (d is None): self.__m_real_val = math.nan else: self.__m_real_val = d return value_
def create(t: 'Token', names: 'TerminCollection') -> 'BlockLine': if (t is None): return None res = BlockLine(t, t) tt = t while tt is not None: if (tt != t and tt.is_newline_before): break else: res.end_token = tt tt = tt.next0_ nums = 0 while t is not None and t.next0_ is not None and t.end_char <= res.end_char: if (isinstance(t, NumberToken)): pass else: rom = NumberHelper.tryParseRoman(t) if (rom is not None and rom.end_token.next0_ is not None): t = rom.end_token else: break if (t.next0_.isChar('.')): pass elif ((isinstance(t.next0_, TextToken)) and not t.next0_.chars.is_all_lower): pass else: break res.number_end = t t = t.next0_ if (t.isChar('.') and t.next0_ is not None): res.number_end = t t = t.next0_ if (t.is_newline_before): return res nums += 1 tok = BlockLine.__m_ontology.tryParse(t, TerminParseAttr.NO) if (tok is None): npt1 = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt1 is not None and npt1.end_token != npt1.begin_token): tok = BlockLine.__m_ontology.tryParse(npt1.noun.begin_token, TerminParseAttr.NO) if (tok is not None): if (t.previous is not None and t.previous.isChar(':')): tok = (None) if (tok is not None): typ_ = Utils.valToEnum(tok.termin.tag, BlkTyps) if (typ_ == BlkTyps.CONSLUSION): if (t.is_newline_after): pass elif (t.next0_ is not None and t.next0_.morph.class0_.is_preposition and t.next0_.next0_ is not None): tok2 = BlockLine.__m_ontology.tryParse( t.next0_.next0_, TerminParseAttr.NO) if (tok2 is not None and (Utils.valToEnum( tok2.termin.tag, BlkTyps)) == BlkTyps.CHAPTER): pass else: tok = (None) else: tok = (None) if (t.kit.base_language != t.morph.language): tok = (None) if (typ_ == BlkTyps.INDEX and not t.isValue("ОГЛАВЛЕНИЕ", None)): if (not t.is_newline_after and t.next0_ is not None): npt = NounPhraseHelper.tryParse(t.next0_, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.is_newline_after and npt.morph.case_.is_genitive): tok = (None) elif (npt is None): tok = (None) if ((typ_ == BlkTyps.INTRO and tok is not None and not tok.is_newline_after) and t.isValue("ВВЕДЕНИЕ", None)): npt = NounPhraseHelper.tryParse(t.next0_, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.morph.case_.is_genitive): tok = (None) if (tok is not None): if (res.number_end is None): res.number_end = tok.end_token if (res.number_end.end_char > res.end_char): res.end_token = res.number_end res.typ = typ_ t = tok.end_token if (t.next0_ is not None and t.next0_.isCharOf(":.")): t = t.next0_ res.end_token = t if (t.is_newline_after or t.next0_ is None): return res t = t.next0_ if (t.isChar('§') and (isinstance(t.next0_, NumberToken))): res.typ = BlkTyps.CHAPTER res.number_end = t t = t.next0_ if (names is not None): tok2 = names.tryParse(t, TerminParseAttr.NO) if (tok2 is not None and tok2.end_token.is_newline_after): res.end_token = tok2.end_token res.is_exist_name = True if (res.typ == BlkTyps.UNDEFINED): li2 = BlockLine.create((None if res.number_end is None else res.number_end.next0_), None) if (li2 is not None and ((li2.typ == BlkTyps.LITERATURE or li2.typ == BlkTyps.INTRO or li2.typ == BlkTyps.CONSLUSION))): res.typ = li2.typ else: res.typ = BlkTyps.CHAPTER return res t1 = res.end_token if ((((isinstance(t1, NumberToken)) or t1.isChar('.'))) and t1.previous is not None): t1 = t1.previous if (t1.isChar('.')): res.has_content_item_tail = True while t1 is not None and t1.begin_char > res.begin_char: if (not t1.isChar('.')): break t1 = t1.previous res.is_all_upper = True while t is not None and t.end_char <= t1.end_char: if (not ((isinstance(t, TextToken))) or not t.chars.is_letter): res.not_words += 1 else: mc = t.getMorphClassInDictionary() if (mc.is_undefined): res.not_words += 1 elif (t.length_char > 2): res.words += 1 if (not t.chars.is_all_upper): res.is_all_upper = False if ((t).is_pure_verb): if (not (t).term.endswith("ING")): res.has_verb = True t = t.next0_ if (res.typ == BlkTyps.UNDEFINED): npt = NounPhraseHelper.tryParse( (res.begin_token if res.number_end is None else res.number_end.next0_), NounPhraseParseAttr.NO, 0) if (npt is not None): if (npt.noun.isValue("ХАРАКТЕРИСТИКА", None) or npt.noun.isValue("СОДЕРЖАНИЕ", "ЗМІСТ")): ok = True tt = npt.end_token.next0_ first_pass2779 = True while True: if first_pass2779: first_pass2779 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.isChar('.')): continue npt2 = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.NO, 0) if (npt2 is None or not npt2.morph.case_.is_genitive): ok = False break tt = npt2.end_token if (tt.end_char > res.end_char): res.end_token = tt if (not tt.is_newline_after): while res.end_token.next0_ is not None: if (res.end_token.is_newline_after): break res.end_token = res.end_token.next0_ if (ok): res.typ = BlkTyps.INTRO res.is_exist_name = True elif (npt.noun.isValue("ВЫВОД", "ВИСНОВОК") or npt.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")): ok = True tt = npt.end_token.next0_ first_pass2780 = True while True: if first_pass2780: first_pass2780 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.isCharOf(",.") or tt.is_and): continue npt1 = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.NO, 0) if (npt1 is not None): if (npt1.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") or npt1.noun.isValue( "РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") or npt1.noun.isValue( "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")): tt = npt1.end_token if (tt.end_char > res.end_char): res.end_token = tt if (not tt.is_newline_after): while res.end_token.next0_ is not None: if (res.end_token.is_newline_after ): break res.end_token = res.end_token.next0_ continue ok = False break if (ok): res.typ = BlkTyps.CONSLUSION res.is_exist_name = True if (res.typ == BlkTyps.UNDEFINED and npt is not None and npt.end_char <= res.end_char): ok = False publ = 0 if (BlockLine.__isPub(npt)): ok = True publ = 1 elif ((npt.noun.isValue("СПИСОК", None) or npt.noun.isValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК") or npt.noun.isValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) or npt.noun.isValue("ВЫВОД", "ВИСНОВОК") or npt.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")): if (npt.end_char == res.end_char): return None ok = True if (ok): if (npt.begin_token == npt.end_token and npt.noun.isValue("СПИСОК", None) and npt.end_char == res.end_char): ok = False tt = npt.end_token.next0_ first_pass2781 = True while True: if first_pass2781: first_pass2781 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.isCharOf(",.:") or tt.is_and or tt.morph.class0_.is_preposition): continue if (tt.isValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")): continue npt = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.NO, 0) if (npt is None): ok = False break if (((BlockLine.__isPub(npt) or npt.noun.isValue( "РАБОТА", "РОБОТА") or npt.noun.isValue( "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) or npt.noun.isValue("АВТОР", None) or npt.noun.isValue("ТРУД", "ПРАЦЯ")) or npt.noun.isValue("ТЕМА", None) or npt.noun.isValue( "ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")): tt = npt.end_token if (BlockLine.__isPub(npt)): publ += 1 if (tt.end_char > res.end_char): res.end_token = tt if (not tt.is_newline_after): while res.end_token.next0_ is not None: if (res.end_token.is_newline_after ): break res.end_token = res.end_token.next0_ continue ok = False break if (ok): res.typ = BlkTyps.LITERATURE res.is_exist_name = True if (publ == 0 and (res.end_char < ((math.floor( (len(res.kit.sofa.text) * 2) / 3))))): if (res.number_end is not None): res.typ = BlkTyps.MISC else: res.typ = BlkTyps.UNDEFINED return res
def __parse_number(t : 'Token', res : 'InstrToken1', prev : 'InstrToken1') -> None: if (((isinstance(t, NumberToken)) and t.int_value is not None and t.typ == NumberSpellingType.DIGIT) and (t.int_value < 3000)): if (len(res.numbers) >= 4): pass if (t.morph.class0_.is_adjective and res.typ_container_rank == 0): return nwp = NumberHelper.try_parse_number_with_postfix(t) if (nwp is not None): if (nwp.end_token.is_whitespace_before): pass else: return if ((t.next0_ is not None and (t.whitespaces_after_count < 3) and t.next0_.chars.is_letter) and t.next0_.chars.is_all_lower): if (not t.is_whitespace_after and t.next0_.length_char == 1): pass elif (len(res.numbers) == 0): res.num_typ = NumberTypes.DIGIT res.numbers.append(str(t.value)) res.end_token = t res.num_end_token = res.end_token res.num_begin_token = res.num_end_token return else: return if (res.num_typ == NumberTypes.UNDEFINED): res.num_typ = NumberTypes.DIGIT else: res.num_typ = NumberTypes.COMBO if (len(res.numbers) > 0 and t.is_whitespace_before): return if (len(res.numbers) == 0): res.num_begin_token = t if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, NumberToken))) and t.next0_.next0_.int_value is not None and t.next0_.next0_.int_value > t.int_value): res.min_number = str(t.value) t = t.next0_.next0_ elif (((t.next0_ is not None and t.next0_.is_char_of(")") and t.next0_.next0_ is not None) and t.next0_.next0_.is_hiphen and (isinstance(t.next0_.next0_.next0_, NumberToken))) and t.next0_.next0_.next0_.int_value is not None and t.next0_.next0_.next0_.int_value > t.int_value): res.min_number = str(t.value) t = t.next0_.next0_.next0_ res.numbers.append(str(t.value)) res.num_end_token = t res.end_token = res.num_end_token res.num_suffix = (None) ttt = t.next0_ first_pass3281 = True while True: if first_pass3281: first_pass3281 = False else: ttt = ttt.next0_ if (not (ttt is not None and (len(res.numbers) < 4))): break ok1 = False ok2 = False if ((ttt.is_char_of("._") and not ttt.is_whitespace_after and (isinstance(ttt.next0_, NumberToken))) and ((ttt.next0_.typ == NumberSpellingType.DIGIT or (((ttt.next0_.typ == NumberSpellingType.WORDS)) and ttt.next0_.chars.is_latin_letter and not ttt.is_whitespace_after)))): ok1 = True elif ((ttt.is_char_of("(<") and (isinstance(ttt.next0_, NumberToken)) and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.is_char_of(")>")): ok2 = True if (ok1 or ok2): ttt = ttt.next0_ res.numbers.append(str(ttt.value)) res.num_typ = (NumberTypes.TWODIGITS if len(res.numbers) == 2 else ((NumberTypes.THREEDIGITS if len(res.numbers) == 3 else NumberTypes.FOURDIGITS))) if ((ttt.next0_ is not None and ttt.next0_.is_char_of(")>") and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.is_char('.')): ttt = ttt.next0_ elif (ok2): ttt = ttt.next0_ res.num_end_token = ttt res.end_token = res.num_end_token t = res.end_token continue if (((isinstance(ttt, TextToken)) and ttt.length_char == 1 and ttt.chars.is_letter) and not ttt.is_whitespace_before and len(res.numbers) == 1): res.numbers.append(ttt.term) res.num_typ = NumberTypes.COMBO res.num_end_token = ttt res.end_token = res.num_end_token t = res.end_token continue break if (t.next0_ is not None and t.next0_.is_char_of(").")): res.num_suffix = t.next0_.get_source_text() res.num_end_token = t.next0_ res.end_token = res.num_end_token t = res.end_token return if (((isinstance(t, NumberToken)) and t.typ == NumberSpellingType.WORDS and res.typ_container_rank > 0) and len(res.numbers) == 0): res.numbers.append(str(t.value)) res.num_typ = NumberTypes.DIGIT res.num_begin_token = t if (t.next0_ is not None and t.next0_.is_char('.')): t = t.next0_ res.num_suffix = "." res.num_end_token = t res.end_token = res.num_end_token return nt = NumberHelper.try_parse_roman(t) if ((nt is not None and nt.value == "10" and t.next0_ is not None) and t.next0_.is_char(')')): nt = (None) if (nt is not None and nt.value == "100"): nt = (None) if (nt is not None and nt.typ == NumberSpellingType.ROMAN): if (res.num_typ == NumberTypes.UNDEFINED): res.num_typ = NumberTypes.ROMAN else: res.num_typ = NumberTypes.COMBO if (len(res.numbers) > 0 and t.is_whitespace_before): return if (len(res.numbers) == 0): res.num_begin_token = t res.numbers.append(str(nt.value)) res.num_end_token = nt.end_token res.end_token = res.num_end_token t = res.end_token if (res.num_typ == NumberTypes.ROMAN and ((res.typ == InstrToken1.Types.CHAPTER or res.typ == InstrToken1.Types.SECTION or res.typ == InstrToken1.Types.LINE))): if ((t.next0_ is not None and t.next0_.is_char_of("._<") and (isinstance(t.next0_.next0_, NumberToken))) and t.next0_.next0_.typ == NumberSpellingType.DIGIT): t = t.next0_.next0_ res.numbers.append(str(t.value)) res.num_typ = NumberTypes.TWODIGITS if (t.next0_ is not None and t.next0_.is_char('>')): t = t.next0_ res.num_end_token = t res.end_token = res.num_end_token if ((t.next0_ is not None and t.next0_.is_char_of("._<") and (isinstance(t.next0_.next0_, NumberToken))) and t.next0_.next0_.typ == NumberSpellingType.DIGIT): t = t.next0_.next0_ res.numbers.append(str(t.value)) res.num_typ = NumberTypes.THREEDIGITS if (t.next0_ is not None and t.next0_.is_char('>')): t = t.next0_ res.num_end_token = t res.end_token = res.num_end_token if (t.next0_ is not None and t.next0_.is_char_of(").")): res.num_suffix = t.next0_.get_source_text() res.num_end_token = t.next0_ res.end_token = res.num_end_token t = res.end_token return if (((isinstance(t, TextToken)) and t.length_char == 1 and t.chars.is_letter) and t == res.begin_token): if ((not t.is_whitespace_after and (isinstance(t.next0_, NumberToken)) and t.next0_.next0_ is not None) and t.next0_.next0_.is_char('.')): res.num_begin_token = t res.num_typ = NumberTypes.DIGIT res.numbers.append(str(t.next0_.value)) res.num_suffix = (t.term + ".") res.num_end_token = t.next0_.next0_ res.end_token = res.num_end_token t = res.end_token return if (t.next0_ is not None and t.next0_.is_char_of(".)")): if (((t.next0_.is_char('.') and (isinstance(t.next0_.next0_, NumberToken)) and t.next0_.next0_.next0_ is not None) and t.next0_.next0_.next0_.is_char(')') and not t.next0_.is_whitespace_after) and not t.next0_.next0_.is_whitespace_after): res.num_typ = NumberTypes.TWODIGITS res.numbers.append(t.term) res.numbers.append(str(t.next0_.next0_.value)) res.num_suffix = ")" res.num_begin_token = t res.num_end_token = t.next0_.next0_.next0_ res.end_token = res.num_end_token t = res.end_token return if (t.next0_.is_char('.') and ((t.chars.is_all_upper or (isinstance(t.next0_.next0_, NumberToken))))): pass else: tmp1 = InstrToken1(t, t.next0_) tmp1.numbers.append(t.term) if (tmp1.last_number > 1 and t.next0_.is_char_of(".") and ((prev is None or (prev.last_number + 1) != tmp1.last_number))): pass else: if (len(res.numbers) == 0): res.num_begin_token = t res.num_typ = NumberTypes.LETTER res.numbers.append(t.term) res.num_begin_token = t res.num_end_token = t.next0_ res.end_token = res.num_end_token t = res.end_token res.num_suffix = t.get_source_text() return
def tryParseNumberWithPostfix(t: 'Token') -> 'NumberExToken': """ Выделение стандартных мер, типа: 10 кв.м. """ if (t is None): return None t0 = t is_dollar = None if (t.length_char == 1 and t.next0_ is not None): is_dollar = NumberHelper._isMoneyChar(t) if ((is_dollar) is not None): t = t.next0_ nt = Utils.asObjectOrNull(t, NumberToken) if (nt is None): if ((not ((isinstance(t.previous, NumberToken))) and t.isChar('(') and (isinstance(t.next0_, NumberToken))) and t.next0_.next0_ is not None and t.next0_.next0_.isChar(')')): toks1 = NumberExHelper._m_postfixes.tryParse( t.next0_.next0_.next0_, TerminParseAttr.NO) if (toks1 is not None and (Utils.valToEnum(toks1.termin.tag, NumberExType)) == NumberExType.MONEY): nt0 = Utils.asObjectOrNull(t.next0_, NumberToken) res = NumberExToken._new471(t, toks1.end_token, nt0.value, nt0.typ, NumberExType.MONEY, nt0.real_value, toks1.begin_token.morph) return NumberExHelper.__correctMoney( res, toks1.begin_token) tt = Utils.asObjectOrNull(t, TextToken) if (tt is None or not tt.morph.class0_.is_adjective): return None val = tt.term i = 4 first_pass2785 = True while True: if first_pass2785: first_pass2785 = False else: i += 1 if (not (i < (len(val) - 5))): break v = val[0:0 + i] li = NumberHelper._m_nums.tryAttachStr(v, tt.morph.language) if (li is None): continue vv = val[i:] lii = NumberExHelper._m_postfixes.tryAttachStr( vv, tt.morph.language) if (lii is not None and len(lii) > 0): re = NumberExToken._new472( t, t, str((li[0].tag)), NumberSpellingType.WORDS, Utils.valToEnum(lii[0].tag, NumberExType), t.morph) NumberExHelper.__correctExtTypes(re) return re break return None if (t.next0_ is None and is_dollar is None): return None f = nt.real_value t1 = nt.next0_ if (((t1 is not None and t1.isCharOf(",."))) or (((isinstance(t1, NumberToken)) and (t1.whitespaces_before_count < 3)))): tt11 = NumberHelper.tryParseRealNumber(nt, False) if (tt11 is not None): t1 = tt11.end_token.next0_ f = tt11.real_value if (t1 is None): if (is_dollar is None): return None elif ((t1.next0_ is not None and t1.next0_.isValue("С", "З") and t1.next0_.next0_ is not None) and t1.next0_.next0_.isValue("ПОЛОВИНА", None)): f += .5 t1 = t1.next0_.next0_ if (t1 is not None and t1.is_hiphen and t1.next0_ is not None): t1 = t1.next0_ det = False altf = f if (((isinstance(t1, NumberToken)) and t1.previous is not None and t1.previous.is_hiphen) and (t1).int_value == 0 and t1.length_char == 2): t1 = t1.next0_ if ((t1 is not None and t1.next0_ is not None and t1.isChar('(')) and (((isinstance(t1.next0_, NumberToken)) or t1.next0_.isValue("НОЛЬ", None))) and t1.next0_.next0_ is not None): nt1 = Utils.asObjectOrNull(t1.next0_, NumberToken) val = 0 if (nt1 is not None): val = nt1.real_value if (math.floor(f) == math.floor(val)): ttt = t1.next0_.next0_ if (ttt.isChar(')')): t1 = ttt.next0_ det = True if ((isinstance(t1, NumberToken)) and (t1).int_value is not None and (t1).int_value == 0): t1 = t1.next0_ elif ( ((((isinstance(ttt, NumberToken)) and ((ttt).real_value < 100) and ttt.next0_ is not None) and ttt.next0_.isChar('/') and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.getSourceText() == "100" and ttt.next0_.next0_.next0_ is not None) and ttt.next0_.next0_.next0_.isChar(')')): rest = NumberExHelper.__getDecimalRest100(f) if ((ttt).int_value is not None and rest == (ttt).int_value): t1 = ttt.next0_.next0_.next0_.next0_ det = True elif ((ttt.isValue("ЦЕЛЫХ", None) and (isinstance(ttt.next0_, NumberToken)) and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.next0_ is not None and ttt.next0_.next0_.next0_.isChar(')')): num2 = Utils.asObjectOrNull(ttt.next0_, NumberToken) altf = num2.real_value if (ttt.next0_.next0_.isValue("ДЕСЯТЫЙ", None)): altf /= (10) elif (ttt.next0_.next0_.isValue("СОТЫЙ", None)): altf /= (100) elif (ttt.next0_.next0_.isValue("ТЫСЯЧНЫЙ", None)): altf /= (1000) elif (ttt.next0_.next0_.isValue("ДЕСЯТИТЫСЯЧНЫЙ", None)): altf /= (10000) elif (ttt.next0_.next0_.isValue("СТОТЫСЯЧНЫЙ", None)): altf /= (100000) elif (ttt.next0_.next0_.isValue("МИЛЛИОННЫЙ", None)): altf /= (1000000) if (altf < 1): altf += val t1 = ttt.next0_.next0_.next0_.next0_ det = True else: toks1 = NumberExHelper._m_postfixes.tryParse( ttt, TerminParseAttr.NO) if (toks1 is not None): if ((Utils.valToEnum( toks1.termin.tag, NumberExType)) == NumberExType.MONEY): if (toks1.end_token.next0_ is not None and toks1.end_token.next0_.isChar(')')): res = NumberExToken._new473( t, toks1.end_token.next0_, nt.value, nt.typ, NumberExType.MONEY, f, altf, toks1.begin_token.morph) return NumberExHelper.__correctMoney( res, toks1.begin_token) res2 = NumberExHelper.tryParseNumberWithPostfix(t1.next0_) if (res2 is not None and res2.end_token.next0_ is not None and res2.end_token.next0_.isChar(')')): if (res2.int_value is not None): res2.begin_token = t res2.end_token = res2.end_token.next0_ res2.alt_real_value = res2.real_value res2.real_value = f NumberExHelper.__correctExtTypes(res2) if (res2.whitespaces_after_count < 2): toks2 = NumberExHelper._m_postfixes.tryParse( res2.end_token.next0_, TerminParseAttr.NO) if (toks2 is not None): if ((Utils.valToEnum( toks2.termin.tag, NumberExType) ) == NumberExType.MONEY): res2.end_token = toks2.end_token return res2 elif (nt1 is not None and nt1.typ == NumberSpellingType.WORDS and nt.typ == NumberSpellingType.DIGIT): altf = nt1.real_value ttt = t1.next0_.next0_ if (ttt.isChar(')')): t1 = ttt.next0_ det = True if (not det): altf = f if ((t1 is not None and t1.isChar('(') and t1.next0_ is not None) and t1.next0_.isValue("СУММА", None)): br = BracketHelper.tryParse(t1, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token.next0_ if (is_dollar is not None): te = None if (t1 is not None): te = t1.previous else: t1 = t0 while t1 is not None: if (t1.next0_ is None): te = t1 t1 = t1.next0_ if (te is None): return None if (te.is_hiphen and te.next0_ is not None): if (te.next0_.isValue("МИЛЛИОННЫЙ", None)): f *= (1000000) altf *= (1000000) te = te.next0_ elif (te.next0_.isValue("МИЛЛИАРДНЫЙ", None)): f *= (1000000000) altf *= (1000000000) te = te.next0_ if (not te.is_whitespace_after and (isinstance(te.next0_, TextToken))): if (te.next0_.isValue("M", None)): f *= (1000000) altf *= (1000000) te = te.next0_ elif (te.next0_.isValue("BN", None)): f *= (1000000000) altf *= (1000000000) te = te.next0_ return NumberExToken._new474(t0, te, "", nt.typ, NumberExType.MONEY, f, altf, is_dollar) if (t1 is None or ((t1.is_newline_before and not det))): return None toks = NumberExHelper._m_postfixes.tryParse(t1, TerminParseAttr.NO) if ((toks is None and det and (isinstance(t1, NumberToken))) and (t1).value == "0"): toks = NumberExHelper._m_postfixes.tryParse( t1.next0_, TerminParseAttr.NO) if (toks is not None): t1 = toks.end_token if (not t1.isChar('.') and t1.next0_ is not None and t1.next0_.isChar('.')): if ((isinstance(t1, TextToken)) and t1.isValue( toks.termin.terms[0].canonical_text, None)): pass elif (not t1.chars.is_letter): pass else: t1 = t1.next0_ if (toks.termin.canonic_text == "LTL"): return None if (toks.begin_token == t1): if (t1.morph.class0_.is_preposition or t1.morph.class0_.is_conjunction): if (t1.is_whitespace_before and t1.is_whitespace_after): return None ty = Utils.valToEnum(toks.termin.tag, NumberExType) res = NumberExToken._new473(t, t1, nt.value, nt.typ, ty, f, altf, toks.begin_token.morph) if (ty != NumberExType.MONEY): NumberExHelper.__correctExtTypes(res) return res return NumberExHelper.__correctMoney(res, toks.begin_token) pfx = NumberExHelper.__attachSpecPostfix(t1) if (pfx is not None): pfx.begin_token = t pfx.value = nt.value pfx.typ = nt.typ pfx.real_value = f pfx.alt_real_value = altf return pfx if (t1.next0_ is not None and ((t1.morph.class0_.is_preposition or t1.morph.class0_.is_conjunction))): if (t1.isValue("НА", None)): pass else: nn = NumberExHelper.tryParseNumberWithPostfix(t1.next0_) if (nn is not None): return NumberExToken._new476(t, t, nt.value, nt.typ, nn.ex_typ, f, altf, nn.ex_typ2, nn.ex_typ_param) if (not t1.is_whitespace_after and (isinstance(t1.next0_, NumberToken)) and (isinstance(t1, TextToken))): term = (t1).term ty = NumberExType.UNDEFINED if (term == "СМХ" or term == "CMX"): ty = NumberExType.SANTIMETER elif (term == "MX" or term == "МХ"): ty = NumberExType.METER elif (term == "MMX" or term == "ММХ"): ty = NumberExType.MILLIMETER if (ty != NumberExType.UNDEFINED): return NumberExToken._new477(t, t1, nt.value, nt.typ, ty, f, altf, True) return None
def try_parse(t : 'Token') -> 'ReferentToken': if (t is None): return None if (not (isinstance(t, NumberToken)) and t.length_char != 1): return None nex = NumberHelper.try_parse_number_with_postfix(t) if (nex is None or nex.ex_typ != NumberExType.MONEY): if ((isinstance(t, NumberToken)) and (isinstance(t.next0_, TextToken)) and (isinstance(t.next0_.next0_, NumberToken))): if (t.next0_.is_hiphen or t.next0_.morph.class0_.is_preposition): res1 = NumberHelper.try_parse_number_with_postfix(t.next0_.next0_) if (res1 is not None and res1.ex_typ == NumberExType.MONEY): res0 = MoneyReferent() if ((t.next0_.is_hiphen and res1.real_value == 0 and res1.end_token.next0_ is not None) and res1.end_token.next0_.is_char('(')): nex2 = NumberHelper.try_parse_number_with_postfix(res1.end_token.next0_.next0_) if ((nex2 is not None and nex2.ex_typ_param == res1.ex_typ_param and nex2.end_token.next0_ is not None) and nex2.end_token.next0_.is_char(')')): if (nex2.value == t.value): res0.currency = nex2.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0) return ReferentToken(res0, t, nex2.end_token.next0_) if (isinstance(t.previous, NumberToken)): if (nex2.value == (((t.previous.real_value * (1000)) + t.value))): res0.currency = nex2.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0) return ReferentToken(res0, t.previous, nex2.end_token.next0_) elif (isinstance(t.previous.previous, NumberToken)): if (nex2.real_value == (((t.previous.previous.real_value * (1000000)) + (t.previous.real_value * (1000)) + t.real_value))): res0.currency = nex2.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0) return ReferentToken(res0, t.previous.previous, nex2.end_token.next0_) res0.currency = res1.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, t.value, False, 0) return ReferentToken(res0, t, t) return None res = MoneyReferent() res.currency = nex.ex_typ_param val = nex.value if (val.find('.') > 0): val = val[0:0+val.find('.')] res.add_slot(MoneyReferent.ATTR_VALUE, val, True, 0) re = math.floor(round(((nex.real_value - res.value)) * (100), 6)) if (re != 0): res.add_slot(MoneyReferent.ATTR_REST, str(re), True, 0) if (nex.real_value != nex.alt_real_value): if (math.floor(res.value) != math.floor(nex.alt_real_value)): val = NumberHelper.double_to_string(nex.alt_real_value) if (val.find('.') > 0): val = val[0:0+val.find('.')] res.add_slot(MoneyReferent.ATTR_ALTVALUE, val, True, 0) re = (math.floor(round(((nex.alt_real_value - (math.floor(nex.alt_real_value)))) * (100), 6))) if (re != res.rest and re != 0): res.add_slot(MoneyReferent.ATTR_ALTREST, str(re), True, 0) if (nex.alt_rest_money > 0): res.add_slot(MoneyReferent.ATTR_ALTREST, str(nex.alt_rest_money), True, 0) t1 = nex.end_token if (t1.next0_ is not None and t1.next0_.is_char('(')): rt = MoneyAnalyzer.try_parse(t1.next0_.next0_) if ((rt is not None and rt.referent.can_be_equals(res, ReferentsEqualType.WITHINONETEXT) and rt.end_token.next0_ is not None) and rt.end_token.next0_.is_char(')')): t1 = rt.end_token.next0_ else: rt = MoneyAnalyzer.try_parse(t1.next0_) if (rt is not None and rt.referent.can_be_equals(res, ReferentsEqualType.WITHINONETEXT)): t1 = rt.end_token if (res.alt_value is not None and res.alt_value > res.value): if (t.whitespaces_before_count == 1 and (isinstance(t.previous, NumberToken))): delt = math.floor((res.alt_value - res.value)) if ((((res.value < 1000) and ((delt % 1000)) == 0)) or (((res.value < 1000000) and ((delt % 1000000)) == 0))): t = t.previous res.add_slot(MoneyReferent.ATTR_VALUE, res.get_string_value(MoneyReferent.ATTR_ALTVALUE), True, 0) res.add_slot(MoneyReferent.ATTR_ALTVALUE, None, True, 0) return ReferentToken(res, t, t1)
def __try_parse(t: 'Token', prev: 'PersonIdToken') -> 'PersonIdToken': if (t.is_value("СВИДЕТЕЛЬСТВО", None)): tt1 = t ip = False reg = False tt = t.next0_ first_pass3372 = True while True: if first_pass3372: first_pass3372 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma_and or tt.morph.class0_.is_preposition): continue if (tt.is_value("РЕГИСТРАЦИЯ", None) or tt.is_value("РЕЕСТР", None) or tt.is_value("ЗАРЕГИСТРИРОВАТЬ", None)): reg = True tt1 = tt elif (tt.is_value("ИНДИВИДУАЛЬНЫЙ", None) or tt.is_value("ИП", None)): ip = True tt1 = tt elif ((tt.is_value("ВНЕСЕНИЕ", None) or tt.is_value("ГОСУДАРСТВЕННЫЙ", None) or tt.is_value("ЕДИНЫЙ", None)) or tt.is_value("ЗАПИСЬ", None) or tt.is_value("ПРЕДПРИНИМАТЕЛЬ", None)): tt1 = tt elif (tt.get_referent() is not None and tt.get_referent().type_name == "DATERANGE"): tt1 = tt else: break if (reg and ip): return PersonIdToken._new2505( t, tt1, PersonIdToken.Typs.KEYWORD, "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ" ) tok = PersonIdToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO) if (tok is not None): ty = Utils.valToEnum(tok.termin.tag, PersonIdToken.Typs) res = PersonIdToken._new2505(tok.begin_token, tok.end_token, ty, tok.termin.canonic_text) if (prev is None): if (ty != PersonIdToken.Typs.KEYWORD): return None t = tok.end_token.next0_ first_pass3373 = True while True: if first_pass3373: first_pass3373 = False else: t = t.next0_ if (not (t is not None)): break r = t.get_referent() if (r is not None and (isinstance(r, GeoReferent))): res.referent = r res.end_token = t continue if (t.is_value("ГРАЖДАНИН", None) and t.next0_ is not None and (isinstance(t.next0_.get_referent(), GeoReferent))): res.referent = t.next0_.get_referent() res.end_token = t.next0_ t = res.end_token continue if (r is not None): break ait = PersonAttrToken.try_attach( t, None, PersonAttrToken.PersonAttrAttachAttrs.NO) if (ait is not None): if (ait.referent is not None): for s in ait.referent.slots: if (s.type_name == PersonPropertyReferent.ATTR_REF and (isinstance(s.value, GeoReferent))): res.referent = (Utils.asObjectOrNull( s.value, Referent)) res.end_token = ait.end_token break if (t.is_value("ДАННЫЙ", None)): res.end_token = t continue break if ((isinstance(res.referent, GeoReferent)) and not res.referent.is_state): res.referent = (None) return res if (ty == PersonIdToken.Typs.NUMBER): tmp = io.StringIO() tt = tok.end_token.next0_ if (tt is not None and tt.is_char(':')): tt = tt.next0_ while tt is not None: if (tt.is_newline_before): break if (not (isinstance(tt, NumberToken))): break print(tt.get_source_text(), end="", file=tmp) res.end_token = tt tt = tt.next0_ if (tmp.tell() < 1): return None res.value = Utils.toStringStringIO(tmp) res.has_prefix = True return res if (ty == PersonIdToken.Typs.SERIA): tmp = io.StringIO() tt = tok.end_token.next0_ if (tt is not None and tt.is_char(':')): tt = tt.next0_ next_num = False first_pass3374 = True while True: if first_pass3374: first_pass3374 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (MiscHelper.check_number_prefix(tt) is not None): next_num = True break if (not (isinstance(tt, NumberToken))): if (not (isinstance(tt, TextToken))): break if (not tt.chars.is_all_upper): break nu = NumberHelper.try_parse_roman(tt) if (nu is not None): print(nu.get_source_text(), end="", file=tmp) tt = nu.end_token elif (tt.length_char != 2): break else: print(tt.term, end="", file=tmp) res.end_token = tt if (tt.next0_ is not None and tt.next0_.is_hiphen): tt = tt.next0_ continue if (tmp.tell() >= 4): break print(tt.get_source_text(), end="", file=tmp) res.end_token = tt if (tmp.tell() < 4): if (tmp.tell() < 2): return None tt1 = res.end_token.next0_ if (tt1 is not None and tt1.is_comma): tt1 = tt1.next0_ next0__ = PersonIdToken.__try_parse(tt1, res) if (next0__ is not None and next0__.typ == PersonIdToken.Typs.NUMBER): pass else: return None res.value = Utils.toStringStringIO(tmp) res.has_prefix = True return res if (ty == PersonIdToken.Typs.CODE): tt = res.end_token.next0_ first_pass3375 = True while True: if first_pass3375: first_pass3375 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_char_of(":") or tt.is_hiphen): continue if (isinstance(tt, NumberToken)): res.end_token = tt continue break if (ty == PersonIdToken.Typs.ADDRESS): if (isinstance(t.get_referent(), AddressReferent)): res.referent = t.get_referent() res.end_token = t return res tt = res.end_token.next0_ first_pass3376 = True while True: if first_pass3376: first_pass3376 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_char_of(":") or tt.is_hiphen or tt.morph.class0_.is_preposition): continue if (isinstance(tt.get_referent(), AddressReferent)): res.referent = tt.get_referent() res.end_token = tt break if (res.referent is None): return None return res elif (prev is None): return None t0 = t t1 = MiscHelper.check_number_prefix(t0) if (t1 is not None): t = t1 if (isinstance(t, NumberToken)): tmp = io.StringIO() res = PersonIdToken._new2507(t0, t, PersonIdToken.Typs.NUMBER) tt = t while tt is not None: if (tt.is_newline_before or not (isinstance(tt, NumberToken))): break print(tt.get_source_text(), end="", file=tmp) res.end_token = tt tt = tt.next0_ if (tmp.tell() < 4): if (tmp.tell() < 2): return None if (prev is None or prev.typ != PersonIdToken.Typs.KEYWORD): return None ne = PersonIdToken.__try_parse(res.end_token.next0_, prev) if (ne is not None and ne.typ == PersonIdToken.Typs.NUMBER): res.typ = PersonIdToken.Typs.SERIA else: return None res.value = Utils.toStringStringIO(tmp) if (t0 != t): res.has_prefix = True return res if (isinstance(t, ReferentToken)): r = t.get_referent() if (r is not None): if (r.type_name == "DATE"): return PersonIdToken._new2508(t, t, PersonIdToken.Typs.DATE, r) if (r.type_name == "ORGANIZATION"): return PersonIdToken._new2508(t, t, PersonIdToken.Typs.ORG, r) if (r.type_name == "ADDRESS"): return PersonIdToken._new2508(t, t, PersonIdToken.Typs.ADDRESS, r) if ((prev is not None and prev.typ == PersonIdToken.Typs.KEYWORD and (isinstance(t, TextToken))) and not t.chars.is_all_lower and t.chars.is_letter): rr = PersonIdToken.__try_parse(t.next0_, prev) if (rr is not None and rr.typ == PersonIdToken.Typs.NUMBER): return PersonIdToken._new2505(t, t, PersonIdToken.Typs.SERIA, t.term) if ((t is not None and t.is_value("ОТ", "ВІД") and (isinstance(t.next0_, ReferentToken))) and t.next0_.get_referent().type_name == "DATE"): return PersonIdToken._new2508(t, t.next0_, PersonIdToken.Typs.DATE, t.next0_.get_referent()) return None
def real_value(self, value_) -> float: from pullenti.ner.core.NumberHelper import NumberHelper self.value = NumberHelper.double_to_string(value_) return value_
def __try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken': if (t is None): return None if (BracketHelper.is_bracket(t, True)): wit = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, attach_high) if (wit is not None): if (wit.end_token.next0_ is None): wit.begin_token = t return wit if (BracketHelper.is_bracket(wit.end_token.next0_, True)): wit.begin_token = t wit.end_token = wit.end_token.next0_ return wit tok = WeaponItemToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO) if (tok is not None): res = WeaponItemToken(t, tok.end_token) res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs)) if (res.typ == WeaponItemToken.Typs.NOUN): res.value = tok.termin.canonic_text if (tok.termin.tag2 is not None): res.is_doubt = True tt = res.end_token.next0_ first_pass3426 = True while True: if first_pass3426: first_pass3426 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.whitespaces_before_count > 2): break wit = WeaponItemToken.__try_parse(tt, None, False, False) if (wit is not None): if (wit.typ == WeaponItemToken.Typs.BRAND): res.__inner_tokens.append(wit) tt = wit.end_token res.end_token = tt continue break if (not (isinstance(tt, TextToken))): break mc = tt.get_morph_class_in_dictionary() if (mc == MorphClass.ADJECTIVE): if (res.alt_value is None): res.alt_value = res.value if (res.alt_value.endswith(res.value)): res.alt_value = res.alt_value[0:0+len(res.alt_value) - len(res.value)] res.alt_value = "{0}{1} {2}".format(res.alt_value, tt.term, res.value) res.end_token = tt continue break return res if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): res.value = tok.termin.canonic_text return res if (res.typ == WeaponItemToken.Typs.MODEL): res.value = tok.termin.canonic_text if (isinstance(tok.termin.tag2, list)): li = Utils.asObjectOrNull(tok.termin.tag2, list) for to in li: wit = WeaponItemToken._new2758(t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token) res.__inner_tokens.append(wit) if (to.additional_vars is not None and len(to.additional_vars) > 0): wit.alt_value = to.additional_vars[0].canonic_text res.__correct_model() return res nnn = MiscHelper.check_number_prefix(t) if (nnn is not None): tit = TransItemToken._attach_number(nnn, True) if (tit is not None): res = WeaponItemToken._new2759(t, tit.end_token, WeaponItemToken.Typs.NUMBER) res.value = tit.value res.alt_value = tit.alt_value return res if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.is_char('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): res = WeaponItemToken._new2760(t, t.next0_, WeaponItemToken.Typs.MODEL, True) res.value = t.term res.__correct_model() return res if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.MODEL, True) res.value = t.term res.__correct_model() return res if (t.term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): pp = WeaponItemToken.__try_parse(t.next0_, None, False, False) if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): res = WeaponItemToken._new2759(t, t, WeaponItemToken.Typs.NOUN) res.value = "ПИСТОЛЕТ" res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ" return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): ok = False if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): ok = True elif (prev is None and t.previous is not None and t.previous.is_comma_and): ok = True if (ok): res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.NAME, True) res.value = t.term if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): res.value = "{0}-{1}".format(res.value, t.next0_.next0_.term) res.end_token = t.next0_.next0_ if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): res.typ = WeaponItemToken.Typs.BRAND if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correct_model() elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correct_model() return res if (t.is_value("МАРКА", None)): res = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, False) if (res is not None and res.typ == WeaponItemToken.Typs.BRAND): res.begin_token = t return res if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100) if (br is not None): return WeaponItemToken._new2764(t, br.end_token, WeaponItemToken.Typs.BRAND, MiscHelper.get_text_value(br.begin_token, br.end_token, GetTextAttr.NO)) if (((isinstance(t, TextToken)) and (isinstance(t.next0_, TextToken)) and t.next0_.length_char > 1) and not t.next0_.chars.is_all_lower): return WeaponItemToken._new2764(t, t.next0_, WeaponItemToken.Typs.BRAND, t.term) if (t.is_value("КАЛИБР", "КАЛІБР")): tt1 = t.next0_ if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): tt1 = tt1.next0_ num = NumbersWithUnitToken.try_parse(tt1, None, False, False, False, False) if (num is not None and num.single_val is not None): return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (isinstance(t, NumberToken)): num = NumbersWithUnitToken.try_parse(t, None, False, False, False, False) if (num is not None and num.single_val is not None): if (len(num.units) == 1 and num.units[0].unit is not None and num.units[0].unit.name_cyr == "мм"): return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (num.end_token.next0_ is not None and num.end_token.next0_.is_value("КАЛИБР", "КАЛІБР")): return WeaponItemToken._new2764(t, num.end_token.next0_, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (t.is_value("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")): tt1 = t.next0_ if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): tt1 = tt1.next0_ if (isinstance(tt1, ReferentToken)): if ((isinstance(tt1.get_referent(), OrganizationReferent)) or (isinstance(tt1.get_referent(), GeoReferent))): return WeaponItemToken._new2769(t, tt1, WeaponItemToken.Typs.DEVELOPER, tt1.get_referent()) return None
def _to_string(self, short_variant: bool, lang: 'MorphLang', lev: int, from_range: int) -> str: from pullenti.ner.date.internal.DateRelHelper import DateRelHelper res = io.StringIO() p = self.pointer if (lang is None): lang = MorphLang.RU if (self.is_relative): if (self.pointer == DatePointerType.TODAY): print("сейчас".format(), end="", file=res, flush=True) if (not short_variant): DateRelHelper.append_to_string(self, res) return Utils.toStringStringIO(res) word = None val = 0 back = False is_local_rel = self.get_string_value( DateReferent.ATTR_ISRELATIVE) == "true" for s in self.slots: if (s.type_name == DateReferent.ATTR_CENTURY): word = "век" wrapval784 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval784) val = wrapval784.value elif (s.type_name == DateReferent.ATTR_YEAR): word = "год" wrapval785 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval785) val = wrapval785.value elif (s.type_name == DateReferent.ATTR_MONTH): word = "месяц" wrapval786 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval786) val = wrapval786.value if (not is_local_rel and val >= 1 and val <= 12): print(DateReferent.__m_month0[val - 1], end="", file=res) elif (s.type_name == DateReferent.ATTR_DAY): word = "день" wrapval787 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval787) val = wrapval787.value if ((not is_local_rel and self.month > 0 and self.month <= 12) and self.higher is not None and self.higher.get_string_value( DateReferent.ATTR_ISRELATIVE) != "true"): print("{0} {1}".format( val, DateReferent.__m_month[self.month - 1]), end="", file=res, flush=True) elif (not is_local_rel): print("{0} число".format(val), end="", file=res, flush=True) elif (s.type_name == DateReferent.ATTR_QUARTAL): word = "квартал" wrapval788 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval788) val = wrapval788.value elif (s.type_name == DateReferent.ATTR_WEEK): word = "неделя" wrapval789 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval789) val = wrapval789.value elif (s.type_name == DateReferent.ATTR_HOUR): word = "час" wrapval790 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval790) val = wrapval790.value if (not is_local_rel): print("{0}:{1}".format("{:02d}".format(val), "{:02d}".format(self.minute)), end="", file=res, flush=True) elif (s.type_name == DateReferent.ATTR_MINUTE): word = "минута" wrapval791 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval791) val = wrapval791.value elif (s.type_name == DateReferent.ATTR_DAYOFWEEK): wrapval792 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval792) val = wrapval792.value if (not is_local_rel): print((DateReferent.__m_week_day_ex[val - 1] if val >= 1 and val <= 7 else "?"), end="", file=res) else: if (val < 0): val = (-val) back = True if (val >= 0 and val <= 7): print("{0} {1}".format( ((("прошлое" if back else "будущее")) if val == 7 else ((("прошлая" if back else "будущая")) if (val == 3 or val == 6) else (("прошлый" if back else "будущий")))), DateReferent.__m_week_day_ex[val - 1]), end="", file=res, flush=True) break if (word is not None and is_local_rel): if (val == 0): print("{0} {1}".format( ("текущая" if word == "неделя" or word == "минута" else "текущий"), word), end="", file=res, flush=True) elif (val > 0 and not back): print("{0} {1} вперёд".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) else: val = (-val) print("{0} {1} назад".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) elif (not is_local_rel and res.tell() == 0): print("{0} {1}".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) if (not short_variant): DateRelHelper.append_to_string(self, res) if (from_range == 1): Utils.insertStringIO( res, 0, "{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с")))) elif (from_range == 2): Utils.insertStringIO(res, 0, ("to " if lang.is_en else "по ")) return Utils.toStringStringIO(res) if (from_range == 1): print("{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с"))), end="", file=res, flush=True) elif (from_range == 2): print(("to " if lang.is_en else "по ").format(), end="", file=res, flush=True) if (p != DatePointerType.NO): val = MetaDate.POINTER.convert_inner_value_to_outer_value( Utils.enumToString(p), lang) if (from_range == 0 or lang.is_en): pass elif (from_range == 1): if (p == DatePointerType.BEGIN): val = ("початку" if lang.is_ua else "начала") elif (p == DatePointerType.CENTER): val = ("середини" if lang.is_ua else "середины") elif (p == DatePointerType.END): val = ("кінця" if lang.is_ua else "конца") elif (p == DatePointerType.TODAY): val = ("цього часу" if lang.is_ua else "настоящего времени") elif (from_range == 2): if (p == DatePointerType.BEGIN): val = ("початок" if lang.is_ua else "начало") elif (p == DatePointerType.CENTER): val = ("середину" if lang.is_ua else "середину") elif (p == DatePointerType.END): val = ("кінець" if lang.is_ua else "конец") elif (p == DatePointerType.TODAY): val = ("теперішній час" if lang.is_ua else "настоящее время") print("{0} ".format(val), end="", file=res, flush=True) if (self.day_of_week > 0): if (lang.is_en): print("{0}, ".format( DateReferent.__m_week_day_en[self.day_of_week - 1]), end="", file=res, flush=True) else: print("{0}, ".format( DateReferent.__m_week_day[self.day_of_week - 1]), end="", file=res, flush=True) y = self.year m = self.month d = self.day cent = self.century if (y == 0 and cent != 0): is_bc = cent < 0 if (cent < 0): cent = (-cent) print(NumberHelper.get_number_roman(cent), end="", file=res) if (lang.is_ua): print(" century", end="", file=res) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print((" віка" if lang.is_ua else " века"), end="", file=res) else: print((" вік" if lang.is_ua else " век"), end="", file=res) if (is_bc): print((" до н.е." if lang.is_ua else " до н.э."), end="", file=res) return Utils.toStringStringIO(res) if (d > 0): print(d, end="", file=res) if (m > 0 and m <= 12): if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang.is_ua): print((DateReferent.__m_monthua[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0ua[m - 1]), end="", file=res) elif (lang.is_en): print(DateReferent.__m_monthen[m - 1], end="", file=res) else: print((DateReferent.__m_month[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0[m - 1]), end="", file=res) if (y != 0): is_bc = y < 0 if (y < 0): y = (-y) if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang is not None and lang.is_en): print("{0}".format(y), end="", file=res, flush=True) elif (short_variant): print("{0}{1}".format(y, ("р" if lang.is_ua else "г")), end="", file=res, flush=True) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print("{0} {1}".format(y, ("року" if lang.is_ua else "года")), end="", file=res, flush=True) else: print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")), end="", file=res, flush=True) if (is_bc): print((" до н.е." if lang.is_ua else ("BC" if lang.is_en else " до н.э.")), end="", file=res) h = self.hour mi = self.minute se = self.second if (h >= 0 and mi >= 0): if (res.tell() > 0): print(' ', end="", file=res) print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)), end="", file=res, flush=True) if (se >= 0): print(":{0}".format("{:02d}".format(se)), end="", file=res, flush=True) if (res.tell() == 0): if (self.quartal != 0): print("{0}-й квартал".format(self.quartal), end="", file=res, flush=True) if (res.tell() == 0): return "?" while Utils.getCharAtStringIO( res, res.tell() - 1) == ' ' or Utils.getCharAtStringIO( res, res.tell() - 1) == ',': Utils.setLengthStringIO(res, res.tell() - 1) if (not short_variant and self.is_relative): DateRelHelper.append_to_string(self, res) return Utils.toStringStringIO(res).strip()
def tryParse(t: 'Token', add_units: 'TerminCollection', can_be_set: bool = True, can_units_absent: bool = False) -> 'MeasureToken': """ Выделение вместе с наименованием Args: t(Token): """ if (not ((isinstance(t, TextToken)))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1516 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516) minmax = wrapminmax1516.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.tryParse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt is None): whd = NumbersWithUnitToken._tryParseWHL(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.isValue("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.getMorphClassInDictionary().is_undefined): npt = NounPhraseToken(t0, t0) else: return None elif (NumberHelper.tryParseRealNumber(t, True) is not None): return None else: dtok = DateItemToken.tryAttach(t, None) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3037 = True while True: if first_pass3037: first_pass3037 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1510 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510) minmax = wrapminmax1510.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None) or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None) or ((tt.isValue("СОСТАВЛЯТЬ", None) and not tt.getMorphClassInDictionary().is_adjective))): t = tt t1 = t if (tt.previous.isValue("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._tryParseWHL(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (len(internals_) > 0 and tt.is_comma_and): continue if (tt.isValue("ПРИ", None) or len(internals_) > 0): mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and (tt).typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False) if (mt0 is not None): break if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._tryParseWHL(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.isChar(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.isChar('(') and t1.next0_ is not None and t1.next0_.isChar(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.isChar('(')): uu = UnitToken.tryParseList(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.isChar(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (BracketHelper.canBeStartOfSequence(tt, False, False)): br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.isValue("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.getMorphClassInDictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.isValue("ЯМЗ", None)): pass npt2 = NounPhraseHelper.tryParse( tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.tryParse( tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.getMorphClassInDictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.tryParseList(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.isChar('.')): if (not MiscHelper.canBeStartOfSentence(tt.next0_)): continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None) or t.isValue("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t1 = t1.next0_ first_pass3038 = True while True: if first_pass3038: first_pass3038 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.isCharOf(":,_")): www = NumbersWithUnitToken._tryParseWHL(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_) if (mts is None): return None mt = mts[0] if (name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1506(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.isChar('±')): nn = NumbersWithUnitToken._tryParse(tt1, add_units, True, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.isCharOf(":),") or mt.begin_token.previous.is_table_control_char): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (-units[j].pow0_) j += 1 del mt.units[i:i + len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.tryParseList(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parseInternals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append( MeasureToken._new1506(mt.begin_token, mt.end_token, mt)) res.internals.append( MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def __try_attach(t0: 'Token') -> 'PhoneItemToken': if (t0 is None): return None if (isinstance(t0, NumberToken)): if (NumberHelper.try_parse_number_with_postfix(t0) is not None and not t0.is_whitespace_after): rt = t0.kit.process_referent("PHONE", t0.next0_) if (rt is None): return None if (t0.typ == NumberSpellingType.DIGIT and not t0.morph.class0_.is_adjective): return PhoneItemToken._new2625( t0, t0, PhoneItemToken.PhoneItemType.NUMBER, t0.get_source_text()) return None if (t0.is_char('.')): return PhoneItemToken._new2625(t0, t0, PhoneItemToken.PhoneItemType.DELIM, ".") if (t0.is_hiphen): return PhoneItemToken._new2625(t0, t0, PhoneItemToken.PhoneItemType.DELIM, "-") if (t0.is_char('+')): if (not (isinstance(t0.next0_, NumberToken)) or t0.next0_.typ != NumberSpellingType.DIGIT): return None else: val = t0.next0_.get_source_text() i = 0 i = 0 while i < len(val): if (val[i] != '0'): break i += 1 if (i >= len(val)): return None if (i > 0): val = val[i:] return PhoneItemToken._new2625( t0, t0.next0_, PhoneItemToken.PhoneItemType.COUNTRYCODE, val) if (t0.is_char(chr(0x2011)) and (isinstance(t0.next0_, NumberToken)) and t0.next0_.length_char == 2): return PhoneItemToken._new2625(t0, t0, PhoneItemToken.PhoneItemType.DELIM, "-") if (t0.is_char_of("(")): if (isinstance(t0.next0_, NumberToken)): et = t0.next0_ val = io.StringIO() while et is not None: if (et.is_char(')')): break if ((isinstance(et, NumberToken)) and et.typ == NumberSpellingType.DIGIT): print(et.get_source_text(), end="", file=val) elif (not et.is_hiphen and not et.is_char('.')): return None et = et.next0_ if (et is None or val.tell() == 0): return None else: return PhoneItemToken._new2630( t0, et, PhoneItemToken.PhoneItemType.CITYCODE, Utils.toStringStringIO(val), True) else: tt1 = PhoneItemToken.M_PHONE_TERMINS.try_parse( t0.next0_, TerminParseAttr.NO) if (tt1 is None or tt1.termin.tag is not None): pass elif (tt1.end_token.next0_ is None or not tt1.end_token.next0_.is_char(')')): pass else: return PhoneItemToken._new2631( t0, tt1.end_token.next0_, PhoneItemToken.PhoneItemType.PREFIX, True, "") return None if ((t0.is_char('/') and (isinstance(t0.next0_, NumberToken)) and t0.next0_.next0_ is not None) and t0.next0_.next0_.is_char('/') and t0.next0_.length_char == 3): return PhoneItemToken._new2630( t0, t0.next0_.next0_, PhoneItemToken.PhoneItemType.CITYCODE, str(t0.next0_.value), True) t1 = None ki = PhoneKind.UNDEFINED if ((t0.is_value("Т", None) and t0.next0_ is not None and t0.next0_.is_char_of("\\/")) and t0.next0_.next0_ is not None and ((t0.next0_.next0_.is_value("Р", None) or t0.next0_.next0_.is_value("М", None)))): t1 = t0.next0_.next0_ ki = (PhoneKind.WORK if t1.is_value("Р", None) else PhoneKind.MOBILE) else: tt = PhoneItemToken.M_PHONE_TERMINS.try_parse( t0, TerminParseAttr.NO) if (tt is None or tt.termin.tag is not None): if (t0.is_value("НОМЕР", None)): rr = PhoneItemToken.__try_attach(t0.next0_) if (rr is not None and rr.item_type == PhoneItemToken.PhoneItemType.PREFIX): rr.begin_token = t0 return rr return None if (isinstance(tt.termin.tag2, PhoneKind)): ki = (Utils.valToEnum(tt.termin.tag2, PhoneKind)) t1 = tt.end_token res = PhoneItemToken._new2633(t0, t1, PhoneItemToken.PhoneItemType.PREFIX, "", ki) while True: if (t1.next0_ is not None and t1.next0_.is_char_of(".:")): t1 = t1.next0_ res.end_token = t1 elif (t1.next0_ is not None and t1.next0_.is_table_control_char): t1 = t1.next0_ else: break if (t0 == t1 and ((t0.begin_char == t0.end_char or t0.chars.is_all_upper))): if (not t0.is_whitespace_after): return None return res
def __correctTailAttributes(p : 'PersonReferent', t0 : 'Token') -> 'Token': res = t0 t = t0 if (t is not None and t.isChar(',')): t = t.next0_ born = False die = False if (t is not None and ((t.isValue("РОДИТЬСЯ", "НАРОДИТИСЯ") or t.isValue("BORN", None)))): t = t.next0_ born = True elif (t is not None and ((t.isValue("УМЕРЕТЬ", "ПОМЕРТИ") or t.isValue("СКОНЧАТЬСЯ", None) or t.isValue("DIED", None)))): t = t.next0_ die = True elif ((t is not None and t.isValue("ДАТА", None) and t.next0_ is not None) and t.next0_.isValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")): t = t.next0_.next0_ born = True while t is not None: if (t.morph.class0_.is_preposition or t.is_hiphen or t.isChar(':')): t = t.next0_ else: break if (t is not None and t.getReferent() is not None): r = t.getReferent() if (r.type_name == "DATE"): t1 = t if (t.next0_ is not None and ((t.next0_.isValue("Р", None) or t.next0_.isValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")))): born = True t1 = t.next0_ if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ if (born): if (p is not None): p.addSlot(PersonReferent.ATTR_BORN, r, False, 0) res = t1 t = t1 elif (die): if (p is not None): p.addSlot(PersonReferent.ATTR_DIE, r, False, 0) res = t1 t = t1 if (die and t is not None): ag = NumberHelper.tryParseAge(t.next0_) if (ag is not None): if (p is not None): p.addSlot(PersonReferent.ATTR_AGE, str(ag.value), False, 0) t = ag.end_token.next0_ res = ag.end_token if (t is None): return res if (t.isChar('(')): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): t1 = t.next0_ born = False if (t1.isValue("РОД", None)): born = True t1 = t1.next0_ if (t1 is not None and t1.isChar('.')): t1 = t1.next0_ if (isinstance(t1, ReferentToken)): r = t1.getReferent() if (r.type_name == "DATERANGE" and t1.next0_ == br.end_token): bd = Utils.asObjectOrNull(r.getSlotValue("FROM"), Referent) to = Utils.asObjectOrNull(r.getSlotValue("TO"), Referent) if (bd is not None and to is not None): if (p is not None): p.addSlot(PersonReferent.ATTR_BORN, bd, False, 0) p.addSlot(PersonReferent.ATTR_DIE, to, False, 0) res = br.end_token t = res elif (r.type_name == "DATE" and t1.next0_ == br.end_token): if (p is not None): p.addSlot(PersonReferent.ATTR_BORN, r, False, 0) res = br.end_token t = res return res
def tryParse(t: 'Token', prev: 'FundsItemToken' = None) -> 'FundsItemToken': if (t is None): return None typ0 = FundsItemTyp.UNDEFINED tt = t first_pass2766 = True while True: if first_pass2766: first_pass2766 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_adverb): continue if ((tt.isValue("СУММА", None) or tt.isValue("ОКОЛО", None) or tt.isValue("БОЛЕЕ", None)) or tt.isValue("МЕНЕЕ", None) or tt.isValue("СВЫШЕ", None)): continue if ((tt.isValue("НОМИНАЛ", None) or tt.isValue("ЦЕНА", None) or tt.isValue("СТОИМОСТЬ", None)) or tt.isValue("СТОИТЬ", None)): typ0 = FundsItemTyp.PRICE continue if (tt.isValue("НОМИНАЛЬНАЯ", None) or tt.isValue("ОБЩАЯ", None)): continue if (tt.isValue("СОСТАВЛЯТЬ", None)): continue re = tt.getReferent() if (isinstance(re, OrganizationReferent)): return FundsItemToken._new428(t, tt, FundsItemTyp.ORG, re) if (isinstance(re, MoneyReferent)): if (typ0 == FundsItemTyp.UNDEFINED): typ0 = FundsItemTyp.SUM if ((tt.next0_ is not None and tt.next0_.isValue("ЗА", None) and tt.next0_.next0_ is not None) and ((tt.next0_.next0_.isValue("АКЦИЯ", None) or tt.next0_.next0_.isValue("АКЦІЯ", None)))): typ0 = FundsItemTyp.PRICE res = FundsItemToken._new428(t, tt, typ0, re) return res if (re is not None): break npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.noun.isValue("ПАКЕТ", None)): npt = NounPhraseHelper.tryParse(npt.end_token.next0_, NounPhraseParseAttr.NO, 0) if (npt is not None): res = None if (npt.noun.isValue("АКЦІЯ", None) or npt.noun.isValue("АКЦИЯ", None)): res = FundsItemToken._new430(t, npt.end_token, FundsItemTyp.NOUN, FundsKind.STOCK) if (len(npt.adjectives) > 0): for v in FundsItemToken.__m_act_types: if (npt.adjectives[0].isValue(v, None)): res.string_val = npt.getNormalCaseText( None, True, MorphGender.UNDEFINED, False).lower() if (res.string_val == "голосовавшая акция"): res.string_val = "голосующая акция" break elif (((npt.noun.isValue("БУМАГА", None) or npt.noun.isValue("ПАПІР", None))) and npt.end_token.previous is not None and ((npt.end_token.previous.isValue("ЦЕННЫЙ", None) or npt.end_token.previous.isValue("ЦІННИЙ", None)))): res = FundsItemToken._new431(t, npt.end_token, FundsItemTyp.NOUN, FundsKind.STOCK, "ценные бумаги") elif (((npt.noun.isValue("КАПИТАЛ", None) or npt.noun.isValue("КАПІТАЛ", None))) and len(npt.adjectives) > 0 and ((npt.adjectives[0].isValue("УСТАВНОЙ", None) or npt.adjectives[0].isValue("УСТАВНЫЙ", None) or npt.adjectives[0].isValue("СТАТУТНИЙ", None)))): res = FundsItemToken._new430(t, npt.end_token, FundsItemTyp.NOUN, FundsKind.CAPITAL) if (res is not None): rt = res.kit.processReferent( OrganizationAnalyzer.ANALYZER_NAME, res.end_token.next0_) if (rt is not None): res.ref = rt.referent res.end_token = rt.end_token return res if (prev is not None and prev.typ == FundsItemTyp.COUNT): val = None for v in FundsItemToken.__m_act_types: if (tt.isValue(v, None)): val = v break if (val is not None): cou = 0 ok = False ttt = tt.previous first_pass2767 = True while True: if first_pass2767: first_pass2767 = False else: ttt = ttt.previous if (not (ttt is not None)): break cou += 1 if ((cou) > 100): break refs = ttt.getReferents() if (refs is None): continue for r in refs: if (isinstance(r, FundsReferent)): ok = True break if (ok): break cou = 0 if (not ok): ttt = tt.next0_ while ttt is not None: cou += 1 if ((cou) > 100): break fi = FundsItemToken.tryParse(ttt, None) if (fi is not None and fi.kind == FundsKind.STOCK): ok = True break ttt = ttt.next0_ if (ok): res = FundsItemToken._new433(t, tt, FundsKind.STOCK, FundsItemTyp.NOUN) res.string_val = "{0}ая акция".format( val[0:0 + len(val) - 2].lower()) return res if (isinstance(tt, NumberToken)): num = NumberHelper.tryParseNumberWithPostfix(tt) if (num is not None): if (tt.previous is not None and tt.previous.isValue("НА", None)): break if (num.ex_typ == NumberExType.PERCENT): res = FundsItemToken._new434(t, num.end_token, FundsItemTyp.PERCENT, num) t = num.end_token.next0_ if (t is not None and ((t.isChar('+') or t.isValue("ПЛЮС", None))) and (isinstance(t.next0_, NumberToken))): res.end_token = t.next0_ t = res.end_token.next0_ if ((t is not None and t.is_hiphen and t.next0_ is not None) and t.next0_.chars.is_all_lower and not t.is_whitespace_after): t = t.next0_.next0_ if (t is not None and ((t.isValue("ДОЛЯ", None) or t.isValue("ЧАСТКА", None)))): res.end_token = t return res break t1 = tt if (t1.next0_ is not None and t1.next0_.isValue("ШТУКА", None)): t1 = t1.next0_ return FundsItemToken._new434( t, t1, FundsItemTyp.COUNT, Utils.asObjectOrNull(tt, NumberToken)) break return None
def get_normal_case_text(self, mc: 'MorphClass' = None, num: 'MorphNumber' = MorphNumber.UNDEFINED, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: if ((isinstance(self.begin_token, ReferentToken)) and self.begin_token == self.end_token): return self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) res = None max_coef = 0 def_coef = -1 for it in self.morph.items: v = Utils.asObjectOrNull(it, NounPhraseItemTextVar) if (v is None): continue if (v.undef_coef > 0 and (((v.undef_coef < max_coef) or def_coef >= 0))): continue if (num == MorphNumber.SINGULAR and v.single_number_value is not None): if (mc is not None and ((gender == MorphGender.NEUTER or gender == MorphGender.FEMINIE)) and mc.is_adjective): bi = MorphBaseInfo._new401(MorphClass._new53(mc.value), gender, MorphNumber.SINGULAR, MorphCase.NOMINATIVE, self.morph.language) str0_ = MorphologyService.get_wordform( v.single_number_value, bi) if (str0_ is not None): res = str0_ else: res = v.single_number_value if (v.undef_coef == 0): break max_coef = v.undef_coef continue if (Utils.isNullOrEmpty(v.normal_value)): continue if (str.isdigit(v.normal_value[0]) and mc is not None and mc.is_adjective): val = 0 wrapval402 = RefOutArgWrapper(0) inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402) val = wrapval402.value if (inoutres403): str0_ = NumberHelper.get_number_adjective( val, gender, (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR or val == 1 else MorphNumber.PLURAL)) if (str0_ is not None): res = str0_ if (v.undef_coef == 0): break max_coef = v.undef_coef continue res1 = it.normal_value if (num == MorphNumber.SINGULAR): if (res1 == "ДЕТИ"): res1 = "РЕБЕНОК" elif (res1 == "ЛЮДИ"): res1 = "ЧЕЛОВЕК" max_coef = v.undef_coef if (v.undef_coef > 0): res = res1 continue def_co = 0 if (mc is not None and mc.is_adjective and v.undef_coef == 0): pass elif ( ((isinstance(self.begin_token, TextToken)) and res1 == self.begin_token.term and it.case_.is_nominative) and it.number == MorphNumber.SINGULAR): def_co = 1 if (num == MorphNumber.PLURAL and ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)): def_co += 3 if (res is None or def_co > def_coef): res = res1 def_coef = def_co if (def_co > 0): break if (res is not None): return self.__corr_chars(res, keep_chars) if (res is None and self.begin_token == self.end_token): res = self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) elif (res is None): res = self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) if (res is None): res = MiscHelper.get_text_value_of_meta_token( self, (GetTextAttr.KEEPREGISTER if keep_chars else GetTextAttr.NO)) else: res = "{0} {1}".format( res, MiscHelper.get_text_value( self.begin_token.next0_, self.end_token, (GetTextAttr.KEEPREGISTER if keep_chars else GetTextAttr.NO))) return Utils.ifNotNull(res, "?")
def __try_attach(t: 'Token', prev: typing.List['DateItemToken'], detail_regime: bool) -> 'DateItemToken': from pullenti.ner.measure.internal.MeasureToken import MeasureToken if (t is None): return None nt = Utils.asObjectOrNull(t, NumberToken) begin = t end = t is_in_brack = False if ((BracketHelper.can_be_start_of_sequence(t, False, False) and t.next0_ is not None and (isinstance(t.next0_, NumberToken))) and BracketHelper.can_be_end_of_sequence( t.next0_.next0_, False, None, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if ((t.is_newline_before and BracketHelper.is_bracket(t, False) and (isinstance(t.next0_, NumberToken))) and BracketHelper.is_bracket(t.next0_.next0_, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if (nt is not None): if (nt.int_value is None): return None if (nt.typ == NumberSpellingType.WORDS): if (nt.morph.class0_.is_noun and not nt.morph.class0_.is_adjective): if (t.next0_ is not None and ((t.next0_.is_value("КВАРТАЛ", None) or t.next0_.is_value("ПОЛУГОДИЕ", None) or t.next0_.is_value("ПІВРІЧЧЯ", None)))): pass else: return None if (NumberHelper.try_parse_age(nt) is not None): return None tt = None res = DateItemToken._new628(begin, end, DateItemToken.DateItemType.NUMBER, nt.int_value, nt.morph) if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken)) and nt.next0_.int_value is not None) and nt.next0_.length_char == 2 and prev is not None): num = 2000 + nt.next0_.int_value if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): ok = False if (nt.whitespaces_after_count == 1): ok = True elif (nt.is_newline_after and nt.is_newline_after): ok = True if (ok): nt = (Utils.asObjectOrNull(nt.next0_, NumberToken)) res.end_token = nt res.int_value = num if (res.int_value == 20 or res.int_value == 201): tt = t.next0_ if (tt is not None and tt.is_char('_')): while tt is not None: if (not tt.is_char('_')): break tt = tt.next0_ tt = DateItemToken.__test_year_rus_word(tt, False) if (tt is not None): res.int_value = 0 res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR return res if (res.int_value <= 12 and t.next0_ is not None and (t.whitespaces_after_count < 3)): tt = t.next0_ if (tt.is_value("ЧАС", None)): if (((isinstance(t.previous, TextToken)) and not t.previous.chars.is_letter and not t.is_whitespace_before) and (isinstance(t.previous.previous, NumberToken)) and not t.previous.is_whitespace_before): pass else: res.typ = DateItemToken.DateItemType.HOUR res.end_token = tt tt = tt.next0_ if (tt is not None and tt.is_char('.')): res.end_token = tt tt = tt.next0_ first_pass3072 = True while True: if first_pass3072: first_pass3072 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_value("УТРО", "РАНОК")): res.end_token = tt res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_value("ВЕЧЕР", "ВЕЧІР")): res.end_token = tt res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_value("ДЕНЬ", None)): res.end_token = tt if (res.int_value < 10): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_value("НОЧЬ", "НІЧ")): res.end_token = tt if (res.int_value == 12): res.int_value = 0 elif (res.int_value > 9): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_comma or tt.morph.class0_.is_adverb): continue break if (res.typ == DateItemToken.DateItemType.HOUR): return res can_be_year_ = True if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): pass elif ((prev is not None and len(prev) >= 4 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM) and prev[len(prev) - 2].can_by_month): pass elif (nt.next0_ is not None and ((nt.next0_.is_value("ГОД", None) or nt.next0_.is_value("РІК", None)))): if (res.int_value < 1000): can_be_year_ = False tt = DateItemToken.__test_year_rus_word(nt.next0_, False) if (tt is not None and DateItemToken.__is_new_age(tt.next0_)): res.typ = DateItemToken.DateItemType.YEAR res.end_token = tt elif (can_be_year_): if (res.can_be_year or res.typ == DateItemToken.DateItemType.NUMBER): tt = DateItemToken.__test_year_rus_word( nt.next0_, res.is_newline_before) if ((tt) is not None): if ((tt.is_value("Г", None) and not tt.is_whitespace_before and t.previous is not None) and ((t.previous.is_value("КОРПУС", None) or t.previous.is_value("КОРП", None)))): pass elif ( (((nt.next0_.is_value("Г", None) and (t.whitespaces_before_count < 3) and t.previous is not None) and t.previous.is_value("Я", None) and t.previous.previous is not None) and t.previous.previous.is_char_of("\\/") and t.previous.previous.previous is not None) and t.previous.previous.previous.is_value( "А", None)): return None elif (nt.next0_.length_char == 1 and not res.can_be_year and ((prev is None or ((len(prev) > 0 and prev[len(prev) - 1].typ != DateItemToken.DateItemType.DELIM))))): pass else: res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language elif (tt is not None and (nt.whitespaces_after_count < 2) and (nt.end_char - nt.begin_char) == 1): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language if (nt.previous is not None): if (nt.previous.is_value("В", "У") or nt.previous.is_value("К", None) or nt.previous.is_value("ДО", None)): tt = DateItemToken.__test_year_rus_word(nt.next0_, False) if ((tt) is not None): ok = False if ((res.int_value < 100) and (isinstance(tt, TextToken)) and ((tt.term == "ГОДА" or tt.term == "РОКИ"))): pass else: ok = True if (nt.previous.is_value("ДО", None) and nt.next0_.is_value("Г", None)): cou = 0 ttt = nt.previous.previous while ttt is not None and (cou < 10): mt = MeasureToken.try_parse( ttt, None, False, False, False, False) if (mt is not None and mt.end_char > nt.end_char): ok = False break ttt = ttt.previous cou += 1 if (ok): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language res.begin_token = nt.previous elif (((nt.previous.is_value("IN", None) or nt.previous.is_value("SINCE", None))) and res.can_be_year): uu = (NumbersWithUnitToken.try_parse( nt, None, False, False, False, False) if nt.previous.is_value("IN", None) else None) if (uu is not None and len(uu.units) > 0): pass else: res.typ = DateItemToken.DateItemType.YEAR res.begin_token = nt.previous elif (nt.previous.is_value("NEL", None) or nt.previous.is_value("DEL", None)): if (res.can_be_year): res.typ = DateItemToken.DateItemType.YEAR res.lang = MorphLang.IT res.begin_token = nt.previous elif (nt.previous.is_value("IL", None) and res.can_be_day): res.lang = MorphLang.IT res.begin_token = nt.previous t1 = res.end_token.next0_ if (t1 is not None): if (t1.is_value("ЧАС", "ГОДИНА") or t1.is_value("HOUR", None)): if ((((prev is not None and len(prev) == 2 and prev[0].can_be_hour) and prev[1].typ == DateItemToken.DateItemType.DELIM and not prev[1].is_whitespace_after) and not prev[1].is_whitespace_after and res.int_value >= 0) and (res.int_value < 59)): prev[0].typ = DateItemToken.DateItemType.HOUR res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif (res.int_value < 24): if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.HOUR res.end_token = t1 elif ((res.int_value < 60) and ((t1.is_value("МИНУТА", "ХВИЛИНА") or t1.is_value( "МИН", None) or t.is_value("MINUTE", None)))): if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif ( (res.int_value < 60) and ((t1.is_value("СЕКУНДА", None) or t1.is_value("СЕК", None) or t1.is_value("SECOND", None)))): if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.SECOND res.end_token = t1 elif ((res.int_value < 30) and ((t1.is_value("ВЕК", "ВІК") or t1.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")))): res.typ = DateItemToken.DateItemType.CENTURY res.end_token = t1 elif (res.int_value <= 4 and t1.is_value("КВАРТАЛ", None)): res.typ = DateItemToken.DateItemType.QUARTAL res.end_token = t1 elif (res.int_value <= 2 and ((t1.is_value("ПОЛУГОДИЕ", None) or t1.is_value("ПІВРІЧЧЯ", None)))): res.typ = DateItemToken.DateItemType.HALFYEAR res.end_token = t1 return res t0 = Utils.asObjectOrNull(t, TextToken) if (t0 is None): return None txt = t0.get_source_text() if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х') or txt[0] == 'V'): lat = NumberHelper.try_parse_roman(t) if (lat is not None and lat.end_token.next0_ is not None and lat.int_value is not None): val = lat.int_value tt = lat.end_token.next0_ if (tt.is_value("КВАРТАЛ", None) and val > 0 and val <= 4): return DateItemToken._new629( t, tt, DateItemToken.DateItemType.QUARTAL, val) if (tt.is_value("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0 and val <= 2): return DateItemToken._new629( t, lat.end_token.next0_, DateItemToken.DateItemType.HALFYEAR, val) if (tt.is_value("ВЕК", "ВІК") or tt.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")): return DateItemToken._new629( t, lat.end_token.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.is_value("В", None) and tt.next0_ is not None and tt.next0_.is_char('.')): if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.POINTER): return DateItemToken._new629( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (DateItemToken.__is_new_age(tt.next0_.next0_)): return DateItemToken._new629( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.is_hiphen): lat2 = NumberHelper.try_parse_roman(tt.next0_) if (lat2 is not None and lat2.int_value is not None and lat2.end_token.next0_ is not None): if (lat2.end_token.next0_.is_value("ВЕК", "ВІК") or lat2.end_token.next0_.is_value( "СТОЛЕТИЕ", "СТОЛІТТЯ")): ddd = DateItemToken.try_attach( tt.next0_, None, False) return DateItemToken._new634( t, lat.end_token, DateItemToken.DateItemType.CENTURY, val, ((ddd.new_age if ddd is not None else 0))) if (t is not None and t.is_value("НАПРИКІНЦІ", None)): return DateItemToken._new635(t, t, DateItemToken.DateItemType.POINTER, "конец") if (t is not None and t.is_value("ДОНЕДАВНА", None)): return DateItemToken._new635(t, t, DateItemToken.DateItemType.POINTER, "сегодня") if (prev is None): if (t is not None): if (t.is_value("ОКОЛО", "БІЛЯ") or t.is_value("ПРИМЕРНО", "ПРИБЛИЗНО") or t.is_value("ABOUT", None)): return DateItemToken._new635( t, t, DateItemToken.DateItemType.POINTER, "около") if (t.is_value("ОК", None) or t.is_value("OK", None)): if (t.next0_ is not None and t.next0_.is_char('.')): return DateItemToken._new635( t, t.next0_, DateItemToken.DateItemType.POINTER, "около") return DateItemToken._new635( t, t, DateItemToken.DateItemType.POINTER, "около") tok = DateItemToken.M_SEASONS.try_parse(t, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = t.term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new629( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None): tok = DateItemToken.M_SEASONS.try_parse(npt.end_token, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = t.term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new629( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) typ_ = DateItemToken.DateItemType.NUMBER if (npt.noun.is_value("КВАРТАЛ", None)): typ_ = DateItemToken.DateItemType.QUARTAL elif (npt.end_token.is_value("ПОЛУГОДИЕ", None) or npt.end_token.is_value("ПІВРІЧЧЯ", None)): typ_ = DateItemToken.DateItemType.HALFYEAR elif (npt.end_token.is_value("НАЧАЛО", None) or npt.end_token.is_value("ПОЧАТОК", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "начало") elif (npt.end_token.is_value("СЕРЕДИНА", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "середина") elif (npt.end_token.is_value("КОНЕЦ", None) or npt.end_token.is_value("КІНЕЦЬ", None) or npt.end_token.is_value("НАПРИКІНЕЦЬ", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "конец") elif (npt.end_token.is_value("ВРЕМЯ", None) and len(npt.adjectives) > 0 and npt.end_token.previous.is_value("НАСТОЯЩЕЕ", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") elif (npt.end_token.is_value("ЧАС", None) and len(npt.adjectives) > 0 and npt.end_token.previous.is_value("ДАНИЙ", None)): return DateItemToken._new635( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") if (typ_ != DateItemToken.DateItemType.NUMBER or detail_regime): delta = 0 if (len(npt.adjectives) > 0): if (npt.adjectives[0].is_value("ПОСЛЕДНИЙ", "ОСТАННІЙ")): return DateItemToken._new629( t0, npt.end_token, typ_, (4 if typ_ == DateItemToken.DateItemType.QUARTAL else 2)) if (npt.adjectives[0].is_value("ПРЕДЫДУЩИЙ", "ПОПЕРЕДНІЙ") or npt.adjectives[0].is_value("ПРОШЛЫЙ", None)): delta = -1 elif (npt.adjectives[0].is_value("СЛЕДУЮЩИЙ", None) or npt.adjectives[0].is_value("ПОСЛЕДУЮЩИЙ", None) or npt.adjectives[0].is_value("НАСТУПНИЙ", None)): delta = 1 else: return None cou = 0 tt = t.previous first_pass3073 = True while True: if first_pass3073: first_pass3073 = False else: tt = tt.previous if (not (tt is not None)): break if (cou > 200): break dr = Utils.asObjectOrNull(tt.get_referent(), DateRangeReferent) if (dr is None): continue if (typ_ == DateItemToken.DateItemType.QUARTAL): ii = dr.quarter_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 4): continue return DateItemToken._new629(t0, npt.end_token, typ_, ii) if (typ_ == DateItemToken.DateItemType.HALFYEAR): ii = dr.halfyear_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 2): continue return DateItemToken._new629(t0, npt.end_token, typ_, ii) term = t0.term if (not str.isalnum(term[0])): if (t0.is_char_of(".\\/:") or t0.is_hiphen): return DateItemToken._new635(t0, t0, DateItemToken.DateItemType.DELIM, term) elif (t0.is_char(',')): return DateItemToken._new635(t0, t0, DateItemToken.DateItemType.DELIM, term) else: return None if (term == "O" or term == "О"): if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after and len(t.next0_.value) == 1): return DateItemToken._new629(t, t.next0_, DateItemToken.DateItemType.NUMBER, t.next0_.int_value) if (str.isalpha(term[0])): inf = DateItemToken.M_MONTHES.try_parse(t, TerminParseAttr.NO) if (inf is not None and inf.termin.tag is None): inf = DateItemToken.M_MONTHES.try_parse( inf.end_token.next0_, TerminParseAttr.NO) if (inf is not None and (isinstance(inf.termin.tag, int))): return DateItemToken._new653(inf.begin_token, inf.end_token, DateItemToken.DateItemType.MONTH, inf.termin.tag, inf.termin.lang) return None
def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken': if (not (isinstance(t, TextToken))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1625 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625) minmax = wrapminmax1625.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None) if (npt is None): whd = NumbersWithUnitToken._try_parsewhl(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.is_value("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): npt = NounPhraseToken(t0, t0) elif (t0.is_value("T", None) and t0.chars.is_all_lower): npt = NounPhraseToken(t0, t0) t = t0 if (t.next0_ is not None and t.next0_.is_char('=')): npt.end_token = t.next0_ elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): return None npt = NounPhraseToken(t0, t0) t = t0.next0_ while t is not None: if (t.whitespaces_before_count > 2): break elif (not (isinstance(t, TextToken))): break elif (not t.chars.is_letter): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token npt.end_token = t else: break elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): break else: npt.end_token = t t = t.next0_ else: return None elif (NumberHelper.try_parse_real_number(t, True, False) is not None): return None else: dtok = DateItemToken.try_attach(t, None, False) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3305 = True while True: if first_pass3305: first_pass3305 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1617 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617) minmax = wrapminmax1617.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): t = tt t1 = t if (tt.previous.is_value("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._try_parsewhl(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (tt.is_value("ПРИ", None)): mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False) if (n1 is not None and len(n1.units) > 0): mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1) internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): tt = tt.next0_ t = tt t1 = t continue if (len(internals_) > 0): if (tt.is_char(':')): break mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt.next0_ tt = t t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): tt = tt.next0_.next0_ t = tt t1 = t npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_char > tt.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((isinstance(tt, NumberToken)) and tt.previous is not None): if (tt.previous.is_value("USB", None)): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = tt.next0_ while ttt is not None: if (ttt.is_whitespace_before): break if (ttt.is_char_of(",:")): break tt = ttt t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = ttt.next0_ continue mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False) if (mt0 is not None): npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None) if (npt1 is not None and npt1.end_char > mt0.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue break if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._try_parsewhl(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.is_char(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.is_char('(')): uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (t1.next0_ is not None): if (t1.next0_.is_table_control_char or t1.is_newline_after): break units = (None) if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.is_value("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.get_morph_class_in_dictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.is_value("ЯМЗ", None)): pass npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.get_morph_class_in_dictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.try_parse_list(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.is_char('.')): if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t11 = t1 t1 = t1.next0_ first_pass3306 = True while True: if first_pass3306: first_pass3306 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.is_char_of(":,_")): if (is_resctriction): return None www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue uu = UnitToken.try_parse_list(t1.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t continue if (t1.is_char(':')): li = list() ttt = t1.next0_ first_pass3307 = True while True: if first_pass3307: first_pass3307 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_hiphen or ttt.is_table_control_char): continue if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): continue mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True) if (mt1 is None): break li.append(mt1) ttt = mt1.end_token if (ttt.next0_ is not None and ttt.next0_.is_char(';')): ttt = ttt.next0_ if (ttt.is_char(';')): pass elif (ttt.is_newline_after and mt1.is_newline_before): pass else: break if (len(li) > 1): res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True) if (internals_ is not None and len(internals_) > 0): res0.internal_ex = internals_[0] nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) li[0].begin_token = t0 for v in li: v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip() if (v.nums is not None and len(v.nums.units) == 0 and units is not None): v.nums.units = units return res0 elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction) if (mts is None): if (units is not None and len(units) > 0): if (t1 is None or t1.previous.is_char(':')): mts = list() if (t1 is None): t1 = t11 while t1 is not None and t1.next0_ is not None: pass t1 = t1.next0_ else: t1 = t1.previous mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan)) if (mts is None): return None mt = mts[0] if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): return None if (not is_subval and name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (mt.whl is not None): whd = mt.whl for kk in range(10): if (whd is not None and whd.end_token == name_.end_token): name_.end_token = whd.begin_token.previous continue if (units is not None): if (units[len(units) - 1].end_token == name_.end_token): name_.end_token = units[0].begin_token.previous continue break if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.is_char('±')): nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn if (len(nn.units) > 0 and units is None and len(mt.units) == 0): for m in mts: m.units = nn.units return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): pass elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (- units[j].pow0_) j += 1 del mt.units[i:i+len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO)) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parse_internals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt)) res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def __init__(self, sofa_ : 'SourceOfAnalysis'=None, only_tokenizing : bool=False, lang : 'MorphLang'=None, progress : EventHandler=None) -> None: self._start_date = datetime.datetime(1, 1, 1, 0, 0, 0) self.corrected_tokens = None self.first_token = None; self.__m_entities = list() self.ontology = None; self.base_language = MorphLang() self.__m_sofa = None; self.statistics = None; self.__m_datas = dict() self.misc_data = dict() self.processor = None; self.recurse_level = 0 self._m_analyzer_stack = list() if (sofa_ is None): return self.__m_sofa = sofa_ self._start_date = datetime.datetime.now() tokens = Morphology.process(sofa_.text, lang, None) t0 = None if (tokens is not None): ii = 0 while ii < len(tokens): mt = tokens[ii] if (mt.begin_char == 733860): pass tt = TextToken(mt, self) if (sofa_.correction_dict is not None): wrapcorw539 = RefOutArgWrapper(None) inoutres540 = Utils.tryGetValue(sofa_.correction_dict, mt.term, wrapcorw539) corw = wrapcorw539.value if (inoutres540): ccc = Morphology.process(corw, lang, None) if (ccc is not None and len(ccc) == 1): tt1 = TextToken._new538(ccc[0], self, tt.term) tt1.begin_char = tt.begin_char tt1.end_char = tt.end_char tt1.chars = tt.chars tt = tt1 if (self.corrected_tokens is None): self.corrected_tokens = dict() self.corrected_tokens[tt] = tt.getSourceText() if (t0 is None): self.first_token = (tt) else: t0.next0_ = tt t0 = (tt) ii += 1 if (sofa_.clear_dust): self.__clearDust() if (sofa_.do_words_merging_by_morph): self.__correctWordsByMerging(lang) if (sofa_.do_word_correction_by_morph): self.__correctWordsByMorph(lang) self.__mergeLetters() self.__defineBaseLanguage() t = self.first_token first_pass2794 = True while True: if first_pass2794: first_pass2794 = False else: t = t.next0_ if (not (t is not None)): break nt = NumberHelper._tryParseNumber(t) if (nt is None): continue self.embedToken(nt) t = (nt) if (only_tokenizing): return t = self.first_token first_pass2795 = True while True: if first_pass2795: first_pass2795 = False else: t = t.next0_ if (not (t is not None)): break if (t.morph.class0_.is_preposition): continue mc = t.getMorphClassInDictionary() if (mc.is_undefined and t.chars.is_cyrillic_letter and t.length_char > 4): tail = sofa_.text[t.end_char - 1:t.end_char - 1+2] tte = None tt = t.previous if (tt is not None and ((tt.is_comma_and or tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction))): tt = tt.previous if ((tt is not None and not tt.getMorphClassInDictionary().is_undefined and (((tt.morph.class0_.value) & (t.morph.class0_.value))) != 0) and tt.length_char > 4): tail2 = sofa_.text[tt.end_char - 1:tt.end_char - 1+2] if (tail2 == tail): tte = tt if (tte is None): tt = t.next0_ if (tt is not None and ((tt.is_comma_and or tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction))): tt = tt.next0_ if ((tt is not None and not tt.getMorphClassInDictionary().is_undefined and (((tt.morph.class0_.value) & (t.morph.class0_.value))) != 0) and tt.length_char > 4): tail2 = sofa_.text[tt.end_char - 1:tt.end_char - 1+2] if (tail2 == tail): tte = tt if (tte is not None): t.morph.removeItemsEx(tte.morph, tte.getMorphClassInDictionary()) continue self.__createStatistics()
def try_attach_list(t: 'Token', max_count: int = 20) -> typing.List['DateItemToken']: p = DateItemToken.try_attach(t, None, False) if (p is None): return None if (p.typ == DateItemToken.DateItemType.DELIM): return None res = list() res.append(p) tt = p.end_token.next0_ while tt is not None: if (isinstance(tt, TextToken)): if (tt.check_value(DateItemToken.M_EMPTY_WORDS) is not None): tt = tt.next0_ continue p0 = DateItemToken.try_attach(tt, res, False) if (p0 is None): if (tt.is_newline_before): break if (tt.chars.is_latin_letter): break if (tt.morph is not None and tt.morph.check((MorphClass.ADJECTIVE) | MorphClass.PRONOUN)): tt = tt.next0_ continue break if (tt.is_newline_before): if (p.typ == DateItemToken.DateItemType.MONTH and p0.can_be_year): pass elif (p.typ == DateItemToken.DateItemType.NUMBER and p.can_be_day and p0.typ == DateItemToken.DateItemType.MONTH): pass else: break if (p0.can_be_year and p0.typ == DateItemToken.DateItemType.NUMBER): if (p.typ == DateItemToken.DateItemType.HALFYEAR or p.typ == DateItemToken.DateItemType.QUARTAL): p0.typ = DateItemToken.DateItemType.YEAR elif (p.typ == DateItemToken.DateItemType.POINTER and p0.int_value > 1990): p0.typ = DateItemToken.DateItemType.YEAR p = p0 res.append(p) if (max_count > 0 and len(res) >= max_count): break tt = p.end_token.next0_ for i in range(len(res) - 1, -1, -1): if (res[i].typ == DateItemToken.DateItemType.DELIM): del res[i] else: break if (len(res) > 0 and res[len(res) - 1].typ == DateItemToken.DateItemType.NUMBER): nex = NumberHelper.try_parse_number_with_postfix( res[len(res) - 1].begin_token) if (nex is not None and nex.ex_typ != NumberExType.HOUR): if (len(res) > 3 and res[len(res) - 2].typ == DateItemToken.DateItemType.DELIM and res[len(res) - 2].string_value == ":"): pass else: del res[len(res) - 1] if (len(res) == 0): return None i = 1 while i < (len(res) - 1): if (res[i].typ == DateItemToken.DateItemType.DELIM and res[i].begin_token.is_comma): if ((i == 1 and res[i - 1].typ == DateItemToken.DateItemType.MONTH and res[i + 1].can_be_year) and (i + 1) == (len(res) - 1)): del res[i] i += 1 if (res[len(res) - 1].typ == DateItemToken.DateItemType.NUMBER): rr = res[len(res) - 1] npt = NounPhraseHelper.try_parse(rr.begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.end_char > rr.end_char): del res[len(res) - 1] if (len(res) > 0 and res[len(res) - 1].typ == DateItemToken.DateItemType.DELIM): del res[len(res) - 1] if (len(res) == 0): return None if (len(res) == 2 and not res[0].is_whitespace_after): if (not res[0].is_whitespace_before and not res[1].is_whitespace_after): return None return res
def try_attach(t: 'Token', must_has_prefix: bool = False) -> 'OrgItemEponymToken': from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): if (t is None): return None r1 = t.get_referent() if (r1 is not None and r1.type_name == "DATE"): str0_ = str(r1).upper() if ((str0_ == "1 МАЯ" or str0_ == "7 ОКТЯБРЯ" or str0_ == "9 МАЯ") or str0_ == "8 МАРТА"): dt = OrgItemEponymToken._new1797(t, t, list()) dt.eponyms.append(str0_) return dt age = NumberHelper.try_parse_age(t) if ((age is not None and (((isinstance(age.end_token.next0_, TextToken)) or (isinstance(age.end_token.next0_, ReferentToken)))) and (age.whitespaces_after_count < 3)) and not age.end_token.next0_.chars.is_all_lower and age.end_token.next0_.chars.is_cyrillic_letter): dt = OrgItemEponymToken._new1797(t, age.end_token.next0_, list()) dt.eponyms.append("{0} {1}".format( age.value, dt.end_token.get_source_text().upper())) return dt return None t1 = None full = False has_name = False if (tt.term == "ИМЕНИ" or tt.term == "ІМЕНІ"): t1 = t.next0_ full = True has_name = True elif (((tt.term == "ИМ" or tt.term == "ІМ")) and tt.next0_ is not None): if (tt.next0_.is_char('.')): t1 = tt.next0_.next0_ full = True elif ((isinstance(tt.next0_, TextToken)) and tt.chars.is_all_lower and not tt.next0_.chars.is_all_lower): t1 = tt.next0_ has_name = True elif (tt.previous is not None and ((tt.previous.is_value("ФОНД", None) or tt.previous.is_value("ХРАМ", None) or tt.previous.is_value("ЦЕРКОВЬ", "ЦЕРКВА")))): if ((not tt.chars.is_cyrillic_letter or tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction) or not tt.chars.is_letter): return None if (tt.whitespaces_before_count != 1): return None if (tt.chars.is_all_lower): return None if (tt.morph.class0_.is_adjective): npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.begin_token != npt.end_token): return None na = OrgItemNameToken.try_attach(tt, None, False, True) if (na is not None): if (na.is_empty_word or na.is_std_name or na.is_std_tail): return None t1 = (tt) if (t1 is None or ((t1.is_newline_before and not full))): return None if (tt.previous is not None and tt.previous.morph.class0_.is_preposition): return None if (must_has_prefix and not has_name): return None r = t1.get_referent() if ((r is not None and r.type_name == "DATE" and full) and r.find_slot("DAY", None, True) is not None and r.find_slot("YEAR", None, True) is None): dt = OrgItemEponymToken._new1797(t, t1, list()) dt.eponyms.append(str(r).upper()) return dt holy = False if ((t1.is_value("СВЯТОЙ", None) or t1.is_value("СВЯТИЙ", None) or t1.is_value("СВ", None)) or t1.is_value("СВЯТ", None)): t1 = t1.next0_ holy = True if (t1 is not None and t1.is_char('.')): t1 = t1.next0_ if (t1 is None): return None cl = t1.get_morph_class_in_dictionary() if (cl.is_noun or cl.is_adjective): rt = t1.kit.process_referent("PERSON", t1) if (rt is not None and rt.referent.type_name == "PERSON" and rt.begin_token != rt.end_token): e0_ = rt.referent.get_string_value("LASTNAME") if (e0_ is not None): if (rt.end_token.is_value(e0_, None)): re = OrgItemEponymToken(t, rt.end_token) re.eponyms.append(rt.end_token.get_source_text()) return re nt = NumberHelper.try_parse_anniversary(t1) if (nt is not None and nt.typ == NumberSpellingType.AGE): npt = NounPhraseHelper.try_parse(nt.end_token.next0_, NounPhraseParseAttr.NO, 0, None) if (npt is not None): s = "{0}-{1} {2}".format( nt.value, ("РОКІВ" if t.kit.base_language.is_ua else "ЛЕТ"), MiscHelper.get_text_value(npt.begin_token, npt.end_token, GetTextAttr.NO)) res = OrgItemEponymToken(t, npt.end_token) res.eponyms.append(s) return res its = OrgItemEponymToken.PersonItemToken.try_attach(t1) if (its is None): if ((isinstance(t1, ReferentToken)) and (isinstance(t1.get_referent(), GeoReferent))): s = MiscHelper.get_text_value(t1, t1, GetTextAttr.NO) re = OrgItemEponymToken(t, t1) re.eponyms.append(s) return re return None eponims = list() i = 0 j = 0 if (its[i].typ == OrgItemEponymToken.PersonItemType.LOCASEWORD): i += 1 if (i >= len(its)): return None if (not full): if (its[i].begin_token.morph.class0_.is_adjective and not its[i].begin_token.morph.class0_.is_proper_surname): return None if (its[i].typ == OrgItemEponymToken.PersonItemType.INITIAL): i += 1 while True: if ((i < len(its)) and its[i].typ == OrgItemEponymToken.PersonItemType.INITIAL): i += 1 if (i >= len(its) or ((its[i].typ != OrgItemEponymToken.PersonItemType.SURNAME and its[i].typ != OrgItemEponymToken.PersonItemType.NAME))): break eponims.append(its[i].value) t1 = its[i].end_token if ((i + 2) >= len(its) or its[i + 1].typ != OrgItemEponymToken.PersonItemType.AND or its[i + 2].typ != OrgItemEponymToken.PersonItemType.INITIAL): break i += 3 elif (((i + 1) < len(its)) and its[i].typ == OrgItemEponymToken.PersonItemType.NAME and its[i + 1].typ == OrgItemEponymToken.PersonItemType.SURNAME): eponims.append(its[i + 1].value) t1 = its[i + 1].end_token i += 2 if ((((i + 2) < len(its)) and its[i].typ == OrgItemEponymToken.PersonItemType.AND and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME) and its[i + 2].typ == OrgItemEponymToken.PersonItemType.SURNAME): eponims.append(its[i + 2].value) t1 = its[i + 2].end_token elif (its[i].typ == OrgItemEponymToken.PersonItemType.SURNAME): if (len(its) == (i + 2) and its[i].chars == its[i + 1].chars): its[i].value += (" " + its[i + 1].value) its[i].end_token = its[i + 1].end_token del its[i + 1] eponims.append(its[i].value) if (((i + 1) < len(its)) and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME): if ((i + 2) == len(its)): i += 1 elif (its[i + 2].typ != OrgItemEponymToken.PersonItemType.SURNAME): i += 1 elif (((i + 1) < len(its)) and its[i + 1].typ == OrgItemEponymToken.PersonItemType.INITIAL): if ((i + 2) == len(its)): i += 1 elif (its[i + 2].typ == OrgItemEponymToken.PersonItemType.INITIAL and (i + 3) == len(its)): i += 2 elif (((i + 2) < len(its)) and its[i + 1].typ == OrgItemEponymToken.PersonItemType.AND and its[i + 2].typ == OrgItemEponymToken.PersonItemType.SURNAME): ok = True npt = NounPhraseHelper.try_parse(its[i + 2].begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None and not npt.morph.case_.is_genitive and not npt.morph.case_.is_undefined): ok = False if (ok): eponims.append(its[i + 2].value) i += 2 t1 = its[i].end_token elif (its[i].typ == OrgItemEponymToken.PersonItemType.NAME and holy): t1 = its[i].end_token sec = False if (((i + 1) < len(its)) and its[i].chars == its[i + 1].chars and its[i + 1].typ != OrgItemEponymToken.PersonItemType.INITIAL): sec = True t1 = its[i + 1].end_token if (sec): eponims.append("СВЯТ.{0} {1}".format(its[i].value, its[i + 1].value)) else: eponims.append("СВЯТ.{0}".format(its[i].value)) elif (full and (i + 1) == len(its) and ((its[i].typ == OrgItemEponymToken.PersonItemType.NAME or its[i].typ == OrgItemEponymToken.PersonItemType.SURNAME))): t1 = its[i].end_token eponims.append(its[i].value) elif ((its[i].typ == OrgItemEponymToken.PersonItemType.NAME and len(its) == 3 and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME) and its[i + 2].typ == OrgItemEponymToken.PersonItemType.SURNAME): t1 = its[i + 2].end_token eponims.append("{0} {1} {2}".format(its[i].value, its[i + 1].value, its[i + 2].value)) i += 2 if (len(eponims) == 0): return None return OrgItemEponymToken._new1797(t, t1, eponims)