def __str__(self) -> str: return "{0} {1}".format(Utils.enumToString(self.typ), (self.string_value if self.int_value == 0 else str(self.int_value)))
def can_be_equals(self, obj: 'Referent', typ: 'ReferentsEqualType') -> bool: uri_ = Utils.asObjectOrNull(obj, UriReferent) if (uri_ is None): return False return Utils.compareStrings(self.value, uri_.value, True) == 0
def parent_referent(self) -> 'Referent': return Utils.asObjectOrNull( self.get_slot_value(UnitReferent.ATTR_BASEUNIT), Referent)
def __calc_agent(self, noplural: bool) -> float: if (not Utils.isNullOrEmpty(self.from_prep)): self.coef = -1 return self.coef vf = self.to_verb.first_verb.verb_morph if (vf is None): self.coef = -1 return self.coef vf2 = self.to_verb.last_verb.verb_morph if (vf2 is None): self.coef = -1 return self.coef if (vf.misc.mood == MorphMood.IMPERATIVE): self.coef = -1 return self.coef morph_ = self.from_morph if (vf2.misc.voice == MorphVoice.PASSIVE or self.to_verb.last_verb.morph.contains_attr( "страд.з.", None)): if (not morph_.case_.is_undefined): if (morph_.case_.is_instrumental): self.coef = SemanticService.PARAMS.transitive_coef if (vf2.case_.is_instrumental): self.coef /= (2) return self.coef self.coef = -1 return self.coef self.coef = 0 return self.coef if ("инф." in vf.misc.attrs): self.coef = -1 return self.coef if (NGLink.__is_rev_verb(vf2)): ag_case = MorphCase.UNDEFINED grs = DerivateService.find_derivates( Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm_rev.agent is not None): ag_case = gr.cm_rev.agent.case_ break if (not morph_.case_.is_undefined): if (ag_case.is_dative): if (morph_.case_.is_dative): self.coef = SemanticService.PARAMS.transitive_coef if (morph_.case_.is_genitive): self.coef /= (2) return self.coef self.coef = -1 return self.coef if (ag_case.is_instrumental): if (morph_.case_.is_instrumental): if (morph_.case_.is_nominative): self.coef = 0 return self.coef self.coef = SemanticService.PARAMS.transitive_coef return self.coef self.coef = -1 return self.coef if (not morph_.case_.is_nominative): self.coef = -1 return self.coef else: self.coef = 0 return self.coef if (vf.number == MorphNumber.PLURAL): if (not morph_.case_.is_undefined): if (vf.case_.is_undefined): if (not morph_.case_.is_nominative): self.coef = -1 return self.coef elif (((vf.case_) & morph_.case_).is_undefined): self.coef = -1 return self.coef if (noplural): if (self.from_is_plural): pass elif (((morph_.number) & (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED)): self.coef = -1 return self.coef elif (not NGLink.__check_morph_accord(morph_, False, vf)): self.coef = -1 return self.coef elif (len(morph_.items) > 0 and not vf.case_.is_undefined): ok = False for it in morph_.items: if (((it.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)): if (not it.case_.is_undefined and ((it.case_) & vf.case_).is_undefined): continue ok = True break if (not ok): self.coef = -1 return self.coef self.plural = 1 self.coef = SemanticService.PARAMS.verb_plural if (vf2.normal_case == "БЫТЬ"): if (morph_.case_.is_undefined and self.from0_.source.begin_token.begin_char > self.to_verb.end_char): self.coef /= (2) else: if (vf.number == MorphNumber.SINGULAR): self.plural = 0 if (self.from_is_plural): self.coef = -1 return self.coef if (not NGLink.__check_morph_accord(morph_, False, vf)): self.coef = -1 return self.coef if (not morph_.case_.is_undefined): if (not morph_.case_.is_nominative): if (self.to_verb.first_verb.is_participle): pass else: self.coef = -1 return self.coef if (vf.misc.person != MorphPerson.UNDEFINED): if (((vf.misc.person) & (MorphPerson.THIRD)) == (MorphPerson.UNDEFINED)): if (((vf.misc.person) & (MorphPerson.FIRST)) == (MorphPerson.FIRST)): if (not morph_.contains_attr("1 л.", None)): self.coef = -1 return self.coef if (((vf.misc.person) & (MorphPerson.SECOND)) == (MorphPerson.SECOND)): if (not morph_.contains_attr("2 л.", None)): self.coef = -1 return self.coef self.coef = SemanticService.PARAMS.morph_accord if (morph_.case_.is_undefined): self.coef /= (4) return self.coef
def from_prep(self) -> str: return Utils.ifNotNull(self.from0_.source.prep, "")
def __analizeAgreement(self, bfi: 'BusinessFactItem') -> 'ReferentToken': first = None second = None t0 = bfi.begin_token t1 = bfi.end_token max_lines = 1 t = bfi.begin_token.previous first_pass2775 = True while True: if first_pass2775: first_pass2775 = False else: t = t.previous if (not (t is not None)): break if (t.isChar('.') or t.is_newline_after): max_lines -= 1 if ((max_lines) == 0): break continue if (t.isValue("СТОРОНА", None) and t.previous is not None and ((t.previous.isValue("МЕЖДУ", None) or t.previous.isValue("МІЖ", None)))): max_lines = 2 t = t.previous t0 = t continue r = t.getReferent() if (isinstance(r, BusinessFactReferent)): b = Utils.asObjectOrNull(r, BusinessFactReferent) if (b.who is not None and ((b.who2 is not None or b.whom is not None))): first = b.who second = (Utils.ifNotNull(b.who2, b.whom)) break if (not ((isinstance(r, OrganizationReferent)))): continue if ((t.previous is not None and ((t.previous.is_and or t.previous.isValue("К", None))) and t.previous.previous is not None) and (isinstance(t.previous.previous.getReferent(), OrganizationReferent))): t0 = t.previous.previous first = t0.getReferent() second = r break else: t0 = t first = r break if (second is None): t = bfi.end_token.next0_ first_pass2776 = True while True: if first_pass2776: first_pass2776 = False else: t = t.next0_ if (not (t is not None)): break if (t.isChar('.')): break if (t.is_newline_before): break r = t.getReferent() if (not ((isinstance(r, OrganizationReferent)))): continue if ((t.next0_ is not None and ((t.next0_.is_and or t.next0_.isValue("К", None))) and t.next0_.next0_ is not None) and (isinstance(t.next0_.next0_.getReferent(), OrganizationReferent))): t1 = t.next0_.next0_ first = r second = t1.getReferent() break else: t1 = t second = r break if (first is None or second is None): return None bf = BusinessFactReferent._new436(bfi.base_kind) bf.who = first if (bfi.base_kind == BusinessFactKind.LAWSUIT): bf.whom = second else: bf.who2 = second self.__findDate(bf, bfi.begin_token) self.__findSum(bf, bfi.begin_token) return ReferentToken(bf, t0, t1)
def __tryParse(t : 'Token', lev : int) -> 'BookLinkToken': if (t is None or lev > 3): return None if (t.isChar('[')): re = BookLinkToken.__tryParse(t.next0_, lev + 1) if (re is not None and re.end_token.next0_ is not None and re.end_token.next0_.isChar(']')): re.begin_token = t re.end_token = re.end_token.next0_ return re if (re is not None and re.end_token.isChar(']')): re.begin_token = t return re if (re is not None): if (re.typ == BookLinkTyp.SOSTAVITEL or re.typ == BookLinkTyp.EDITORS): return re br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): if ((isinstance(br.end_token.previous, NumberToken)) and (br.length_char < 30)): return BookLinkToken._new346(t, br.end_token, BookLinkTyp.NUMBER, MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)) t0 = t if (isinstance(t, ReferentToken)): if (isinstance(t.getReferent(), PersonReferent)): return BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED) if (isinstance(t.getReferent(), GeoReferent)): return BookLinkToken._new343(t, t, BookLinkTyp.GEO, t.getReferent()) if (isinstance(t.getReferent(), DateReferent)): dr = Utils.asObjectOrNull(t.getReferent(), DateReferent) if (len(dr.slots) == 1 and dr.year > 0): return BookLinkToken._new346(t, t, BookLinkTyp.YEAR, str(dr.year)) if (dr.year > 0 and t.previous is not None and t.previous.is_comma): return BookLinkToken._new346(t, t, BookLinkTyp.YEAR, str(dr.year)) if (isinstance(t.getReferent(), OrganizationReferent)): org0_ = Utils.asObjectOrNull(t.getReferent(), OrganizationReferent) if (org0_.kind == OrganizationKind.PRESS): return BookLinkToken._new343(t, t, BookLinkTyp.PRESS, org0_) if (isinstance(t.getReferent(), UriReferent)): uri = Utils.asObjectOrNull(t.getReferent(), UriReferent) if ((uri.scheme == "http" or uri.scheme == "https" or uri.scheme == "ftp") or uri.scheme is None): return BookLinkToken._new343(t, t, BookLinkTyp.URL, uri) tok_ = BookLinkToken.__m_termins.tryParse(t, TerminParseAttr.NO) if (tok_ is not None): typ_ = Utils.valToEnum(tok_.termin.tag, BookLinkTyp) ok = True if (typ_ == BookLinkTyp.TYPE or typ_ == BookLinkTyp.NAMETAIL or typ_ == BookLinkTyp.ELECTRONRES): if (t.previous is not None and ((t.previous.isCharOf(".:[") or t.previous.is_hiphen))): pass else: ok = False if (ok): return BookLinkToken._new346(t, tok_.end_token, typ_, tok_.termin.canonic_text) if (typ_ == BookLinkTyp.ELECTRONRES): tt = tok_.end_token.next0_ first_pass2751 = True while True: if first_pass2751: first_pass2751 = False else: tt = tt.next0_ if (not (tt is not None)): break if ((isinstance(tt, TextToken)) and not tt.chars.is_letter): continue if (isinstance(tt.getReferent(), UriReferent)): return BookLinkToken._new343(t, tt, BookLinkTyp.ELECTRONRES, tt.getReferent()) break if (t.isChar('/')): res = BookLinkToken._new346(t, t, BookLinkTyp.DELIMETER, "/") if (t.next0_ is not None and t.next0_.isChar('/')): res.end_token = t.next0_ res.value = "//" if (not t.is_whitespace_before and not t.is_whitespace_after): coo = 3 no = True tt = t.next0_ while tt is not None and coo > 0: vvv = BookLinkToken.tryParse(tt, lev + 1) if (vvv is not None and vvv.typ != BookLinkTyp.NUMBER): no = False break tt = tt.next0_; coo -= 1 if (no): return None return res if ((isinstance(t, NumberToken)) and (t).int_value is not None and (t).typ == NumberSpellingType.DIGIT): res = BookLinkToken._new346(t, t, BookLinkTyp.NUMBER, str((t).value)) val = (t).int_value if (val >= 1930 and (val < 2030)): res.typ = BookLinkTyp.YEAR if (t.next0_ is not None and t.next0_.isChar('.')): res.end_token = t.next0_ elif ((t.next0_ is not None and t.next0_.length_char == 1 and not t.next0_.chars.is_letter) and t.next0_.is_whitespace_after): res.end_token = t.next0_ elif (isinstance(t.next0_, TextToken)): term = (t.next0_).term if (((term == "СТР" or term == "C" or term == "С") or term == "P" or term == "S") or term == "PAGES"): res.end_token = t.next0_ res.typ = BookLinkTyp.PAGES res.value = str((t).value) return res if (isinstance(t, TextToken)): term = (t).term if (((((((term == "СТР" or term == "C" or term == "С") or term == "ТОМ" or term == "T") or term == "Т" or term == "P") or term == "PP" or term == "V") or term == "VOL" or term == "S") or term == "СТОР" or t.isValue("PAGE", None)) or t.isValue("СТРАНИЦА", "СТОРІНКА")): tt = t.next0_ while tt is not None: if (tt.isCharOf(".:~")): tt = tt.next0_ else: break if (isinstance(tt, NumberToken)): res = BookLinkToken._new345(t, tt, BookLinkTyp.PAGERANGE) tt0 = tt tt1 = tt tt = tt.next0_ first_pass2752 = True while True: if first_pass2752: first_pass2752 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.isCharOf(",") or tt.is_hiphen): if (isinstance(tt.next0_, NumberToken)): tt = tt.next0_ res.end_token = tt tt1 = tt continue break res.value = MiscHelper.getTextValue(tt0, tt1, GetTextAttr.NO) return res if ((term == "M" or term == "М" or term == "СПБ") or term == "K" or term == "К"): if (t.next0_ is not None and t.next0_.isCharOf(":;")): re = BookLinkToken._new345(t, t.next0_, BookLinkTyp.GEO) return re if (t.next0_ is not None and t.next0_.isCharOf(".")): res = BookLinkToken._new345(t, t.next0_, BookLinkTyp.GEO) if (t.next0_.next0_ is not None and t.next0_.next0_.isCharOf(":;")): res.end_token = t.next0_.next0_ elif (t.next0_.next0_ is not None and (isinstance(t.next0_.next0_, NumberToken))): pass elif (t.next0_.next0_ is not None and t.next0_.next0_.is_comma and (isinstance(t.next0_.next0_.next0_, NumberToken))): pass else: return None return res if (term == "ПЕР" or term == "ПЕРЕВ" or term == "ПЕРЕВОД"): tt = t if (tt.next0_ is not None and tt.next0_.isChar('.')): tt = tt.next0_ if (tt.next0_ is not None and ((tt.next0_.isValue("C", None) or tt.next0_.isValue("С", None)))): tt = tt.next0_ if (tt.next0_ is None or tt.whitespaces_after_count > 2): return None re = BookLinkToken._new345(t, tt.next0_, BookLinkTyp.TRANSLATE) return re if (term == "ТАМ" or term == "ТАМЖЕ"): res = BookLinkToken._new345(t, t, BookLinkTyp.TAMZE) if (t.next0_ is not None and t.next0_.isValue("ЖЕ", None)): res.end_token = t.next0_ return res if (((term == "СМ" or term == "CM" or term == "НАПР") or term == "НАПРИМЕР" or term == "SEE") or term == "ПОДРОБНЕЕ" or term == "ПОДРОБНО"): res = BookLinkToken._new345(t, t, BookLinkTyp.SEE) t = t.next0_ first_pass2753 = True while True: if first_pass2753: first_pass2753 = False else: t = t.next0_ if (not (t is not None)): break if (t.isCharOf(".:") or t.isValue("ALSO", None)): res.end_token = t continue if (t.isValue("В", None) or t.isValue("IN", None)): res.end_token = t continue vvv = BookLinkToken.__tryParse(t, lev + 1) if (vvv is not None and vvv.typ == BookLinkTyp.SEE): res.end_token = vvv.end_token break break return res if (term == "БОЛЕЕ"): vvv = BookLinkToken.__tryParse(t.next0_, lev + 1) if (vvv is not None and vvv.typ == BookLinkTyp.SEE): vvv.begin_token = t return vvv no = MiscHelper.checkNumberPrefix(t) if (isinstance(no, NumberToken)): return BookLinkToken._new345(t, no, BookLinkTyp.N) if (((term == "B" or term == "В")) and (isinstance(t.next0_, NumberToken)) and (isinstance(t.next0_.next0_, TextToken))): term2 = (t.next0_.next0_).term if (((term2 == "Т" or term2 == "T" or term2.startswith("ТОМ")) or term2 == "TT" or term2 == "ТТ") or term2 == "КН" or term2.startswith("КНИГ")): return BookLinkToken._new345(t, t.next0_.next0_, BookLinkTyp.VOLUME) if (t.isChar('(')): if (((isinstance(t.next0_, NumberToken)) and (t.next0_).int_value is not None and t.next0_.next0_ is not None) and t.next0_.next0_.isChar(')')): num = (t.next0_).int_value if (num > 1900 and num <= 2040): if (num <= datetime.datetime.now().year): return BookLinkToken._new346(t, t.next0_.next0_, BookLinkTyp.YEAR, str(num)) if (((isinstance(t.next0_, ReferentToken)) and (isinstance(t.next0_.getReferent(), DateReferent)) and t.next0_.next0_ is not None) and t.next0_.next0_.isChar(')')): num = (t.next0_.getReferent()).year if (num > 0): return BookLinkToken._new346(t, t.next0_.next0_, BookLinkTyp.YEAR, str(num)) return None
def __str__(self) -> str: return "{0}{1} {2}: {3}".format( ("(1) " if self.must_be_first_line else ""), self.lev, Utils.enumToString(self.typ), self.getSourceText())
def __str__(self) -> str: return "{0}: {1}".format(Utils.enumToString(self.typ), Utils.ifNotNull(self.value, ""))
def run(self, text: str, only_tokenizing: bool, dlang: 'MorphLang', progress: EventHandler, good_text: bool) -> typing.List['MorphToken']: """ Произвести морфологический анализ текста Args: text(str): исходный текст lang: язык (если null, то попробует определить) Returns: typing.List[MorphToken]: последовательность результирующих морфем """ if (Utils.isNullOrEmpty(text)): return None twr = TextWrapper(text, good_text) twrch = twr.chars res = list() uni_lex = dict() term0 = None pure_rus_words = 0 pure_ukr_words = 0 pure_by_words = 0 pure_kz_words = 0 tot_rus_words = 0 tot_ukr_words = 0 tot_by_words = 0 tot_kz_words = 0 i = 0 first_pass2884 = True while True: if first_pass2884: first_pass2884 = False else: i += 1 if (not (i < twr.length)): break ty = InnerMorphology._get_char_typ(twrch[i]) if (ty == 0): continue if (ty > 2): j = (i + 1) else: j = (i + 1) while j < twr.length: if (InnerMorphology._get_char_typ(twrch[j]) != ty): break j += 1 wstr = text[i:i + j - i] term = None if (good_text): term = wstr else: trstr = LanguageHelper.transliteral_correction( wstr, term0, False) term = LanguageHelper.correct_word(trstr) if (Utils.isNullOrEmpty(term)): i = (j - 1) continue lang = InnerMorphology.__detect_lang(twr, i, j - 1, term) if (lang == MorphLang.UA): pure_ukr_words += 1 elif (lang == MorphLang.RU): pure_rus_words += 1 elif (lang == MorphLang.BY): pure_by_words += 1 elif (lang == MorphLang.KZ): pure_kz_words += 1 if ((((lang) & MorphLang.RU)) != MorphLang.UNKNOWN): tot_rus_words += 1 if ((((lang) & MorphLang.UA)) != MorphLang.UNKNOWN): tot_ukr_words += 1 if ((((lang) & MorphLang.BY)) != MorphLang.UNKNOWN): tot_by_words += 1 if ((((lang) & MorphLang.KZ)) != MorphLang.UNKNOWN): tot_kz_words += 1 if (ty == 1): term0 = term lemmas = None if (ty == 1 and not only_tokenizing): wraplemmas12 = RefOutArgWrapper(None) inoutres13 = Utils.tryGetValue(uni_lex, term, wraplemmas12) lemmas = wraplemmas12.value if (not inoutres13): lemmas = InnerMorphology.UniLexWrap._new11(lang) uni_lex[term] = lemmas tok = MorphToken() tok.term = term tok.begin_char = i if (i == 733860): pass tok.end_char = (j - 1) tok.tag = (lemmas) res.append(tok) i = (j - 1) def_lang = MorphLang(dlang) if (pure_rus_words > pure_ukr_words and pure_rus_words > pure_by_words and pure_rus_words > pure_kz_words): def_lang = MorphLang.RU elif (tot_rus_words > tot_ukr_words and tot_rus_words > tot_by_words and tot_rus_words > tot_kz_words): def_lang = MorphLang.RU elif (pure_ukr_words > pure_rus_words and pure_ukr_words > pure_by_words and pure_ukr_words > pure_kz_words): def_lang = MorphLang.UA elif (tot_ukr_words > tot_rus_words and tot_ukr_words > tot_by_words and tot_ukr_words > tot_kz_words): def_lang = MorphLang.UA elif (pure_kz_words > pure_rus_words and pure_kz_words > pure_ukr_words and pure_kz_words > pure_by_words): def_lang = MorphLang.KZ elif (tot_kz_words > tot_rus_words and tot_kz_words > tot_ukr_words and tot_kz_words > tot_by_words): def_lang = MorphLang.KZ elif (pure_by_words > pure_rus_words and pure_by_words > pure_ukr_words and pure_by_words > pure_kz_words): def_lang = MorphLang.BY elif (tot_by_words > tot_rus_words and tot_by_words > tot_ukr_words and tot_by_words > tot_kz_words): if (tot_rus_words > 10 and tot_by_words > (tot_rus_words + 20)): def_lang = MorphLang.BY elif (tot_rus_words == 0 or tot_by_words >= (tot_rus_words * 2)): def_lang = MorphLang.BY if (((def_lang.is_undefined or def_lang.is_ua)) and tot_rus_words > 0): if (((tot_ukr_words > tot_rus_words and InnerMorphology.M_ENGINE_UA.language.is_ua)) or ((tot_by_words > tot_rus_words and InnerMorphology.M_ENGINE_BY.language.is_by)) or ((tot_kz_words > tot_rus_words and InnerMorphology.M_ENGINE_KZ.language.is_kz))): cou0 = 0 tot_kz_words = 0 tot_ukr_words = tot_kz_words tot_by_words = tot_ukr_words tot_rus_words = tot_by_words for kp in uni_lex.items(): lang = MorphLang() wraplang14 = RefOutArgWrapper(lang) kp[1].word_forms = self.__process_one_word( kp[0], wraplang14) lang = wraplang14.value if (kp[1].word_forms is not None): for wf in kp[1].word_forms: lang |= wf.language kp[1].lang = lang if (lang.is_ru): tot_rus_words += 1 if (lang.is_ua): tot_ukr_words += 1 if (lang.is_by): tot_by_words += 1 if (lang.is_kz): tot_kz_words += 1 if (lang.is_cyrillic): cou0 += 1 if (cou0 >= 100): break if (tot_rus_words > ((math.floor(tot_by_words / 2))) and tot_rus_words > ((math.floor(tot_ukr_words / 2)))): def_lang = MorphLang.RU elif (tot_ukr_words > ((math.floor(tot_rus_words / 2))) and tot_ukr_words > ((math.floor(tot_by_words / 2)))): def_lang = MorphLang.UA elif (tot_by_words > ((math.floor(tot_rus_words / 2))) and tot_by_words > ((math.floor(tot_ukr_words / 2)))): def_lang = MorphLang.BY elif (def_lang.is_undefined): def_lang = MorphLang.RU cou = 0 tot_kz_words = 0 tot_ukr_words = tot_kz_words tot_by_words = tot_ukr_words tot_rus_words = tot_by_words for kp in uni_lex.items(): lang = def_lang if (lang.is_undefined): if (tot_rus_words > tot_by_words and tot_rus_words > tot_ukr_words and tot_rus_words > tot_kz_words): lang = MorphLang.RU elif (tot_ukr_words > tot_rus_words and tot_ukr_words > tot_by_words and tot_ukr_words > tot_kz_words): lang = MorphLang.UA elif (tot_by_words > tot_rus_words and tot_by_words > tot_ukr_words and tot_by_words > tot_kz_words): lang = MorphLang.BY elif (tot_kz_words > tot_rus_words and tot_kz_words > tot_ukr_words and tot_kz_words > tot_by_words): lang = MorphLang.KZ wraplang15 = RefOutArgWrapper(lang) kp[1].word_forms = self.__process_one_word(kp[0], wraplang15) lang = wraplang15.value kp[1].lang = lang if ((((lang) & MorphLang.RU)) != MorphLang.UNKNOWN): tot_rus_words += 1 if ((((lang) & MorphLang.UA)) != MorphLang.UNKNOWN): tot_ukr_words += 1 if ((((lang) & MorphLang.BY)) != MorphLang.UNKNOWN): tot_by_words += 1 if ((((lang) & MorphLang.KZ)) != MorphLang.UNKNOWN): tot_kz_words += 1 if (progress is not None): self.__on_progress(cou, len(uni_lex), progress) cou += 1 debug_token = None empty_list = None for r in res: uni = Utils.asObjectOrNull(r.tag, InnerMorphology.UniLexWrap) r.tag = None if (uni is None or uni.word_forms is None or len(uni.word_forms) == 0): if (empty_list is None): empty_list = list() r.word_forms = empty_list if (uni is not None): r.language = uni.lang else: r.word_forms = uni.word_forms if (r.begin_char == 733860): debug_token = r if (not good_text): i = 0 first_pass2885 = True while True: if first_pass2885: first_pass2885 = False else: i += 1 if (not (i < (len(res) - 2))): break ui0 = twrch[res[i].begin_char] ui1 = twrch[res[i + 1].begin_char] ui2 = twrch[res[i + 2].begin_char] if (ui1.is_quot): p = res[i + 1].begin_char if ((p >= 2 and "БбТт".find(text[p - 1]) >= 0 and ((p + 3) < len(text))) and "ЕеЯяЁё".find(text[p + 1]) >= 0): wstr = LanguageHelper.transliteral_correction( LanguageHelper.correct_word("{0}Ъ{1}".format( res[i].get_source_text(text), res[i + 2].get_source_text(text))), None, False) li = self.__process_one_word0(wstr) if (li is not None and len(li) > 0 and li[0].is_in_dictionary): res[i].end_char = res[i + 2].end_char res[i].term = wstr res[i].word_forms = li del res[i + 1:i + 1 + 2] elif ((ui1.is_apos and p > 0 and str.isalpha(text[p - 1])) and ((p + 1) < len(text)) and str.isalpha(text[p + 1])): if (def_lang == MorphLang.UA or (((res[i].language) & MorphLang.UA)) != MorphLang.UNKNOWN or (((res[i + 2].language) & MorphLang.UA)) != MorphLang.UNKNOWN): wstr = LanguageHelper.transliteral_correction( LanguageHelper.correct_word("{0}{1}".format( res[i].get_source_text(text), res[i + 2].get_source_text(text))), None, False) li = self.__process_one_word0(wstr) okk = True if (okk): res[i].end_char = res[i + 2].end_char res[i].term = wstr if (li is None): li = list() res[i].word_forms = li if (li is not None and len(li) > 0): res[i].language = li[0].language del res[i + 1:i + 1 + 2] elif (((ui1.uni_char == '3' or ui1.uni_char == '4')) and res[i + 1].length == 1): src = ("З" if ui1.uni_char == '3' else "Ч") i0 = i + 1 if ((res[i].end_char + 1) == res[i + 1].begin_char and ui0.is_cyrillic): i0 -= 1 src = (res[i0].get_source_text(text) + src) i1 = i + 1 if ((res[i + 1].end_char + 1) == res[i + 2].begin_char and ui2.is_cyrillic): i1 += 1 src += res[i1].get_source_text(text) if (len(src) > 2): wstr = LanguageHelper.transliteral_correction( LanguageHelper.correct_word(src), None, False) li = self.__process_one_word0(wstr) if (li is not None and len(li) > 0 and li[0].is_in_dictionary): res[i0].end_char = res[i1].end_char res[i0].term = wstr res[i0].word_forms = li del res[i0 + 1:i0 + 1 + i1 - i0] elif ((ui1.is_hiphen and ui0.is_letter and ui2.is_letter) and res[i].end_char > res[i].begin_char and res[i + 2].end_char > res[i + 2].begin_char): newline = False sps = 0 j = (res[i + 1].end_char + 1) while j < res[i + 2].begin_char: if (text[j] == '\r' or text[j] == '\n'): newline = True sps += 1 elif (not Utils.isWhitespace(text[j])): break else: sps += 1 j += 1 full_word = LanguageHelper.correct_word( res[i].get_source_text(text) + res[i + 2].get_source_text(text)) if (not newline): if (full_word in uni_lex or full_word == "ИЗЗА"): newline = True elif (text[res[i + 1].begin_char] == (chr(0x00AD))): newline = True elif (LanguageHelper.ends_with_ex( res[i].get_source_text(text), "О", "о", None, None) and len(res[i + 2].word_forms) > 0 and res[i + 2].word_forms[0].is_in_dictionary): if (text[res[i + 1].begin_char] == '¬'): li = self.__process_one_word0(full_word) if (li is not None and len(li) > 0 and li[0].is_in_dictionary): newline = True elif ((res[i].end_char + 2) == res[i + 2].begin_char): if (not str.isupper(text[res[i + 2].begin_char]) and (sps < 2) and len(full_word) > 4): newline = True if ((i + 3) < len(res)): ui3 = twrch[res[i + 3].begin_char] if (ui3.is_hiphen): newline = False elif (((res[i].end_char + 1) == res[i + 1].begin_char and sps > 0 and (sps < 3)) and len(full_word) > 4): newline = True if (newline): li = self.__process_one_word0(full_word) if (li is not None and len(li) > 0 and ((li[0].is_in_dictionary or full_word in uni_lex))): res[i].end_char = res[i + 2].end_char res[i].term = full_word res[i].word_forms = li del res[i + 1:i + 1 + 2] else: pass elif ((ui1.is_letter and ui0.is_letter and res[i].length > 2) and res[i + 1].length > 1): if (ui0.is_upper != ui1.is_upper): continue if (not ui0.is_cyrillic or not ui1.is_cyrillic): continue newline = False j = (res[i].end_char + 1) while j < res[i + 1].begin_char: if (twrch[j].code == 0xD or twrch[j].code == 0xA): newline = True break j += 1 if (not newline): continue full_word = LanguageHelper.correct_word( res[i].get_source_text(text) + res[i + 1].get_source_text(text)) if (not full_word in uni_lex): continue li = self.__process_one_word0(full_word) if (li is not None and len(li) > 0 and li[0].is_in_dictionary): res[i].end_char = res[i + 1].end_char res[i].term = full_word res[i].word_forms = li del res[i + 1] i = 0 first_pass2886 = True while True: if first_pass2886: first_pass2886 = False else: i += 1 if (not (i < len(res))): break mt = res[i] mt.char_info = CharsInfo() ui0 = twrch[mt.begin_char] ui00 = UnicodeInfo.ALL_CHARS[ord((res[i].term[0]))] j = (mt.begin_char + 1) while j <= mt.end_char: if (ui0.is_letter): break ui0 = twrch[j] j += 1 if (ui0.is_letter): res[i].char_info.is_letter = True if (ui00.is_latin): res[i].char_info.is_latin_letter = True elif (ui00.is_cyrillic): res[i].char_info.is_cyrillic_letter = True if (res[i].language == MorphLang.UNKNOWN): if (LanguageHelper.is_cyrillic(mt.term)): res[i].language = (MorphLang.RU if def_lang.is_undefined else def_lang) if (good_text): continue all_up = True all_lo = True j = mt.begin_char while j <= mt.end_char: if (twrch[j].is_upper or twrch[j].is_digit): all_lo = False else: all_up = False j += 1 if (all_up): mt.char_info.is_all_upper = True elif (all_lo): mt.char_info.is_all_lower = True elif (((ui0.is_upper or twrch[mt.begin_char].is_digit)) and mt.end_char > mt.begin_char): all_lo = True j = (mt.begin_char + 1) while j <= mt.end_char: if (twrch[j].is_upper or twrch[j].is_digit): all_lo = False break j += 1 if (all_lo): mt.char_info.is_capital_upper = True elif (twrch[mt.end_char].is_lower and (mt.end_char - mt.begin_char) > 1): all_up = True j = mt.begin_char while j < mt.end_char: if (twrch[j].is_lower): all_up = False break j += 1 if (all_up): mt.char_info.is_last_lower = True if (mt.char_info.is_last_lower and mt.length > 2 and mt.char_info.is_cyrillic_letter): pref = text[mt.begin_char:mt.begin_char + mt.end_char - mt.begin_char] ok = False for wf in mt.word_forms: if (wf.normal_case == pref or wf.normal_full == pref): ok = True break if (not ok): mt.word_forms = list(mt.word_forms) mt.word_forms.insert( 0, MorphWordForm._new16(pref, MorphClass.NOUN, 1)) if (good_text or only_tokenizing): return res i = 0 first_pass2887 = True while True: if first_pass2887: first_pass2887 = False else: i += 1 if (not (i < len(res))): break if (res[i].length == 1 and res[i].char_info.is_latin_letter): ch = res[i].term[0] if (ch == 'C' or ch == 'A' or ch == 'P'): pass else: continue is_rus = False for ii in range(i - 1, -1, -1): if ((res[ii].end_char + 1) != res[ii + 1].begin_char): break elif (res[ii].char_info.is_letter): is_rus = res[ii].char_info.is_cyrillic_letter break if (not is_rus): ii = i + 1 while ii < len(res): if ((res[ii - 1].end_char + 1) != res[ii].begin_char): break elif (res[ii].char_info.is_letter): is_rus = res[ii].char_info.is_cyrillic_letter break ii += 1 if (is_rus): res[i].term = LanguageHelper.transliteral_correction( res[i].term, None, True) res[i].char_info.is_cyrillic_letter = True res[i].char_info.is_latin_letter = True for r in res: if (r.char_info.is_all_upper or r.char_info.is_capital_upper): if (r.language.is_cyrillic): ok = False for wf in r.word_forms: if (wf.class0_.is_proper_surname): ok = True break if (not ok): r.word_forms = list(r.word_forms) InnerMorphology.M_ENGINE_RU.process_surname_variants( r.term, r.word_forms) for r in res: for mv in r.word_forms: if (mv.normal_case is None): mv.normal_case = r.term i = 0 while i < (len(res) - 2): if (res[i].char_info.is_latin_letter and res[i].char_info.is_all_upper and res[i].length == 1): if (twrch[res[i + 1].begin_char].is_quot and res[i + 2].char_info.is_latin_letter and res[i + 2].length > 2): if ((res[i].end_char + 1) == res[i + 1].begin_char and (res[i + 1].end_char + 1) == res[i + 2].begin_char): wstr = "{0}{1}".format(res[i].term, res[i + 2].term) li = self.__process_one_word0(wstr) if (li is not None): res[i].word_forms = li res[i].end_char = res[i + 2].end_char res[i].term = wstr if (res[i + 2].char_info.is_all_lower): res[i].char_info.is_all_upper = False res[i].char_info.is_capital_upper = True elif (not res[i + 2].char_info.is_all_upper): res[i].char_info.is_all_upper = False del res[i + 1:i + 1 + 2] i += 1 i = 0 first_pass2888 = True while True: if first_pass2888: first_pass2888 = False else: i += 1 if (not (i < (len(res) - 1))): break if (not res[i].char_info.is_letter and not res[i + 1].char_info.is_letter and (res[i].end_char + 1) == res[i + 1].begin_char): if (twrch[res[i].begin_char].is_hiphen and twrch[res[i + 1].begin_char].is_hiphen): if (i == 0 or not twrch[res[i - 1].begin_char].is_hiphen): pass else: continue if ((i + 2) == len(res) or not twrch[res[i + 2].begin_char].is_hiphen): pass else: continue res[i].end_char = res[i + 1].end_char del res[i + 1] return res
def is_real_from(self) -> bool: tt = Utils.asObjectOrNull(self.begin_token, TextToken) if (tt is None): return False return tt.term == "FROM" or tt.term == "ОТ"
def address(self) -> 'Referent': """ Адрес регистрации """ return Utils.asObjectOrNull( self.get_slot_value(PersonIdentityReferent.ATTR_ADDRESS), Referent)
def state(self) -> 'Referent': """ Государство """ return Utils.asObjectOrNull( self.get_slot_value(PersonIdentityReferent.ATTR_STATE), Referent)
def __str__(self) -> str: return "{0} {1} ({2})".format(self.number, Utils.ifNotNull(self.noun, "?"), Utils.ifNotNull(self.nouns_genetive, "?"))
def __analizeGet(self, bfi: 'BusinessFactItem') -> 'ReferentToken': bef = self.__FindRefBefore(bfi.begin_token.previous) if (bef is None): return None t1 = bfi.end_token.next0_ if (t1 is None): return None first_pass2772 = True while True: if first_pass2772: first_pass2772 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.morph.class0_.is_adverb): continue if (t1.isValue("ПРАВО", None) or t1.isValue("РАСПОРЯЖАТЬСЯ", None) or t1.isValue("РОЗПОРЯДЖАТИСЯ", None)): continue break if (t1 is None): return None if ((isinstance(t1.getReferent(), FundsReferent)) and not ((isinstance(bef.referent, FundsReferent)))): fr = Utils.asObjectOrNull(t1.getReferent(), FundsReferent) bfr = BusinessFactReferent._new436(bfi.base_kind) bfr.who = bef.referent bef2 = self.__FindSecRefBefore(bef) if (bef2 is not None): bfr.addSlot(BusinessFactReferent.ATTR_WHO, bef2.referent, False, 0) bef = bef2 if (fr.source == bef.referent and bef2 is None): bef2 = self.__FindRefBefore(bef.begin_token.previous) if (bef2 is not None): bef = bef2 bfr.who = bef.referent if (fr.source == bef.referent): cou = 0 tt = bef.begin_token.previous first_pass2773 = True while True: if first_pass2773: first_pass2773 = False else: tt = tt.previous if (not (tt is not None)): break cou += 1 if ((cou) > 100): break refs = tt.getReferents() if (refs is None): continue for r in refs: if ((isinstance(r, OrganizationReferent)) and r != bef.referent): cou = 1000 fr.source = Utils.asObjectOrNull( r, OrganizationReferent) break bfr._addWhat(fr) bfr.typ = ( "покупка ценных бумаг" if bfi.base_kind == BusinessFactKind.GET else (("продажа ценных бумаг" if bfi.base_kind == BusinessFactKind.SELL else "владение ценными бумагами"))) self.__findDate(bfr, bef.begin_token) self.__findSum(bfr, bef.end_token) return ReferentToken(bfr, bef.begin_token, t1) if ((bfi.morph.class0_.is_noun and ((bfi.base_kind == BusinessFactKind.GET or bfi.base_kind == BusinessFactKind.SELL)) and (isinstance(t1.getReferent(), OrganizationReferent))) or (isinstance(t1.getReferent(), PersonReferent))): if ((isinstance(bef.referent, FundsReferent)) or (isinstance(bef.referent, OrganizationReferent))): bfr = BusinessFactReferent._new436(bfi.base_kind) if (bfi.base_kind == BusinessFactKind.GET): bfr.typ = ("покупка ценных бумаг" if isinstance( bef.referent, FundsReferent) else "покупка компании") elif (bfi.base_kind == BusinessFactKind.SELL): bfr.typ = ("продажа ценных бумаг" if isinstance( bef.referent, FundsReferent) else "продажа компании") bfr.who = t1.getReferent() bfr._addWhat(bef.referent) self.__findDate(bfr, bef.begin_token) self.__findSum(bfr, bef.end_token) t1 = BusinessAnalyzer.__addWhosList(t1, bfr) return ReferentToken(bfr, bef.begin_token, t1) if ((isinstance(bef.referent, OrganizationReferent)) or (isinstance(bef.referent, PersonReferent))): tt = t1 if (tt is not None and tt.morph.class0_.is_preposition): tt = tt.next0_ slav = (None if tt is None else tt.getReferent()) if ((((isinstance(slav, PersonReferent)) or (isinstance(slav, OrganizationReferent)))) and tt.next0_ is not None and (isinstance(tt.next0_.getReferent(), FundsReferent))): bfr = BusinessFactReferent._new436(bfi.base_kind) bfr.typ = ("покупка ценных бумаг" if bfi.base_kind == BusinessFactKind.GET else "продажа ценных бумаг") bfr.who = bef.referent bef2 = self.__FindSecRefBefore(bef) if (bef2 is not None): bfr.addSlot(BusinessFactReferent.ATTR_WHO, bef2.referent, False, 0) bef = bef2 bfr.whom = slav bfr._addWhat(tt.next0_.getReferent()) self.__findDate(bfr, bef.begin_token) self.__findSum(bfr, bef.end_token) return ReferentToken(bfr, bef.begin_token, tt.next0_) elif (isinstance(slav, OrganizationReferent)): bfr = BusinessFactReferent._new436(bfi.base_kind) bfr.typ = ("покупка компании" if bfi.base_kind == BusinessFactKind.GET else "продажа компании") bfr.who = bef.referent bef2 = self.__FindSecRefBefore(bef) if (bef2 is not None): bfr.addSlot(BusinessFactReferent.ATTR_WHO, bef2.referent, False, 0) bef = bef2 bfr._addWhat(slav) self.__findDate(bfr, bef.begin_token) self.__findSum(bfr, bef.end_token) return ReferentToken(bfr, bef.begin_token, tt.next0_) if ((isinstance(bef.referent, FundsReferent)) and (((isinstance(t1.getReferent(), OrganizationReferent)) or (isinstance(t1.getReferent(), PersonReferent))))): bfr = BusinessFactReferent._new436(bfi.base_kind) bfr.typ = ( "покупка ценных бумаг" if bfi.base_kind == BusinessFactKind.GET else (("продажа ценных бумаг" if bfi.base_kind == BusinessFactKind.SELL else "владение ценными бумагами"))) bfr.who = t1.getReferent() bfr._addWhat(bef.referent) self.__findDate(bfr, bef.begin_token) self.__findSum(bfr, bef.end_token) return ReferentToken(bfr, bef.begin_token, t1) return None
def try_attach(t: 'Token') -> 'TitleItemToken': tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): t1 = tt if (tt.term == "ТЕМА"): tit = TitleItemToken.try_attach(tt.next0_) if (tit is not None and tit.typ == TitleItemToken.Types.TYP): t1 = tit.end_token if (t1.next0_ is not None and t1.next0_.is_char(':')): t1 = t1.next0_ return TitleItemToken._new2655( t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value) if (tt.next0_ is not None and tt.next0_.is_char(':')): t1 = tt.next0_ return TitleItemToken(tt, t1, TitleItemToken.Types.THEME) if (tt.term == "ПО" or tt.term == "НА"): if (tt.next0_ is not None and tt.next0_.is_value("ТЕМА", None)): t1 = tt.next0_ if (t1.next0_ is not None and t1.next0_.is_char(':')): t1 = t1.next0_ return TitleItemToken(tt, t1, TitleItemToken.Types.THEME) if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"): tt2 = tt.next0_ if (tt2 is not None and tt2.is_char('.')): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): if (tt2.term == "C" or tt2.term == "С"): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): return TitleItemToken( t, tt2, TitleItemToken.Types.TRANSLATE) if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION" or tt.term == "СЕКЦІЯ"): t1 = tt.next0_ if (t1 is not None and t1.is_char(':')): t1 = t1.next0_ br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token elif (t1 != tt.next0_): while t1 is not None: if (t1.is_newline_after): break t1 = t1.next0_ if (t1 is None): return None if (t1 != tt.next0_): return TitleItemToken(tt, t1, TitleItemToken.Types.DUST) t1 = (None) if (tt.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): t1 = tt.next0_ elif (tt.morph.class0_.is_preposition and tt.next0_ is not None and tt.next0_.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): t1 = tt.next0_.next0_ elif (tt.is_char('/') and tt.is_newline_before): t1 = tt.next0_ if (t1 is not None): if (t1.is_char_of(":") or t1.is_hiphen): t1 = t1.next0_ spec = TitleItemToken.__try_attach_speciality(t1, True) if (spec is not None): spec.begin_token = t return spec sss = TitleItemToken.__try_attach_speciality(t, False) if (sss is not None): return sss if (isinstance(t, ReferentToken)): return None npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None): s = npt.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) tok = TitleItemToken.M_TERMINS.try_parse(npt.end_token, TerminParseAttr.NO) if (tok is not None): ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types) if (ty == TitleItemToken.Types.TYP): tit = TitleItemToken.try_attach(tok.end_token.next0_) if (tit is not None and tit.typ == TitleItemToken.Types.THEME): return TitleItemToken._new2655( npt.begin_token, tit.end_token, TitleItemToken.Types.TYPANDTHEME, s) if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"): return None t1 = tok.end_token if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"): err = 0 ttt = t1.next0_ first_pass3394 = True while True: if first_pass3394: first_pass3394 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.morph.class0_.is_preposition): continue if (ttt.is_value("СОИСКАНИЕ", "")): continue npt1 = NounPhraseHelper.try_parse( ttt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.noun.is_value( "СТЕПЕНЬ", "СТУПІНЬ")): ttt = npt1.end_token t1 = ttt continue rt = t1.kit.process_referent("PERSON", ttt) if (rt is not None and (isinstance( rt.referent, PersonPropertyReferent))): ppr = Utils.asObjectOrNull( rt.referent, PersonPropertyReferent) if (ppr.name == "доктор наук"): t1 = rt.end_token s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" break elif (ppr.name == "кандидат наук"): t1 = rt.end_token s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ" break elif (ppr.name == "магистр"): t1 = rt.end_token s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" break if (ttt.is_value("ДОКТОР", None) or ttt.is_value("КАНДИДАТ", None) or ttt.is_value("МАГИСТР", "МАГІСТР")): t1 = ttt npt1 = NounPhraseHelper.try_parse( ttt.next0_, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_token.is_value( "НАУК", None)): t1 = npt1.end_token s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value("МАГИСТР", "МАГІСТР") else ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value( "ДОКТОР", None) else "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")) break err += 1 if (err > 3): break if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ if (s.endswith("ОТЧЕТ") and t1.next0_ is not None and t1.next0_.is_value("О", None)): npt1 = NounPhraseHelper.try_parse( t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0, None) if (npt1 is not None and npt1.morph.case_.is_prepositional): t1 = npt1.end_token return TitleItemToken._new2655(npt.begin_token, t1, ty, s) tok1 = TitleItemToken.M_TERMINS.try_parse(t, TerminParseAttr.NO) if (tok1 is not None): t1 = tok1.end_token re = TitleItemToken( tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types)) return re if (BracketHelper.can_be_start_of_sequence(t, False, False)): tok1 = TitleItemToken.M_TERMINS.try_parse(t.next0_, TerminParseAttr.NO) if (tok1 is not None and BracketHelper.can_be_end_of_sequence( tok1.end_token.next0_, False, None, False)): t1 = tok1.end_token.next0_ return TitleItemToken( tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types)) return None
def __analizeProfit(self, bfi: 'BusinessFactItem') -> 'ReferentToken': if (bfi.end_token.next0_ is None): return None t0 = bfi.begin_token t1 = bfi.end_token typ = t1.getNormalCaseText(None, True, MorphGender.UNDEFINED, False).lower() org0_ = None org0_ = (Utils.asObjectOrNull(t1.next0_.getReferent(), OrganizationReferent)) t = t1 if (org0_ is not None): t = t.next0_ else: rt = t.kit.processReferent(OrganizationAnalyzer.ANALYZER_NAME, t.next0_) if (rt is not None): org0_ = (Utils.asObjectOrNull(rt.referent, OrganizationReferent)) t = rt.end_token dt = None sum0_ = None t = t.next0_ first_pass2774 = True while True: if first_pass2774: first_pass2774 = False else: t = t.next0_ if (not (t is not None)): break if (t.isChar('.')): break if (t.isChar('(')): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token continue if ((((t.morph.class0_.is_verb or t.isValue("ДО", None) or t.is_hiphen) or t.isValue("РАЗМЕР", None) or t.isValue("РОЗМІР", None))) and t.next0_ is not None and (isinstance(t.next0_.getReferent(), MoneyReferent))): if (sum0_ is not None): break sum0_ = (Utils.asObjectOrNull(t.next0_.getReferent(), MoneyReferent)) t = t.next0_ t1 = t continue r = t.getReferent() if ((isinstance(r, DateRangeReferent)) or (isinstance(r, DateReferent))): if (dt is None): dt = r t1 = t elif ((isinstance(r, OrganizationReferent)) and org0_ is None): org0_ = (Utils.asObjectOrNull(r, OrganizationReferent)) t1 = t if (sum0_ is None): return None if (org0_ is None): tt = t0.previous while tt is not None: if (tt.isChar('.')): break b0 = Utils.asObjectOrNull(tt.getReferent(), BusinessFactReferent) if (b0 is not None): org0_ = (Utils.asObjectOrNull(b0.who, OrganizationReferent)) break org0_ = Utils.asObjectOrNull(tt.getReferent(), OrganizationReferent) if ((org0_) is not None): break tt = tt.previous if (org0_ is None): return None bfr = BusinessFactReferent._new436(bfi.base_kind) bfr.who = org0_ bfr.typ = typ bfr.addSlot(BusinessFactReferent.ATTR_MISC, sum0_, False, 0) if (dt is not None): bfr.when = dt else: self.__findDate(bfr, bfi.begin_token) return ReferentToken(bfr, t0, t1)
def __serialize_morph_misc_info(res: io.IOBase, mi: 'MorphMiscInfo') -> None: MorphSerializeHelper.__serialize_short(res, mi._m_value) for a in mi.attrs: MorphSerializeHelper.__serialize_string(res, a) Utils.writeByteIO(res, 0xFF)
def __analizeLikelihoods( self, rt: 'ReferentToken') -> typing.List['ReferentToken']: bfr0 = Utils.asObjectOrNull(rt.referent, BusinessFactReferent) if (bfr0 is None or len(bfr0.whats) != 1 or not ((isinstance(bfr0.whats[0], FundsReferent)))): return None funds0 = Utils.asObjectOrNull(bfr0.whats[0], FundsReferent) whos = list() funds = list() t = rt.end_token.next0_ first_pass2778 = True while True: if first_pass2778: first_pass2778 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_newline_before or t.isChar('.')): break if (t.morph.class0_.is_adverb): continue if (t.is_hiphen or t.is_comma_and): continue if (t.morph.class0_.is_conjunction or t.morph.class0_.is_preposition or t.morph.class0_.is_misc): continue r = t.getReferent() if ((isinstance(r, OrganizationReferent)) or (isinstance(r, PersonReferent))): whos.append(Utils.asObjectOrNull(t, ReferentToken)) continue if (isinstance(r, FundsReferent)): funds0 = (Utils.asObjectOrNull(r, FundsReferent)) funds.append(funds0) continue it = FundsItemToken.tryParse(t, None) if (it is None): break fu = Utils.asObjectOrNull(funds0.clone(), FundsReferent) fu.occurrence.clear() fu.addOccurenceOfRefTok( ReferentToken(fu, it.begin_token, it.end_token)) if (it.typ == FundsItemTyp.PERCENT and it.num_val is not None): fu.percent = it.num_val.real_value elif (it.typ == FundsItemTyp.COUNT and it.num_val is not None and it.num_val.int_value is not None): fu.count = it.num_val.int_value elif (it.typ == FundsItemTyp.SUM): fu.sum0_ = Utils.asObjectOrNull(it.ref, MoneyReferent) else: break funds.append(fu) t = it.end_token if (len(whos) == 0 or len(whos) != len(funds)): return None res = list() i = 0 while i < len(whos): bfr = BusinessFactReferent._new447(bfr0.kind, bfr0.typ) bfr.who = whos[i].referent bfr._addWhat(funds[i]) for s in bfr0.slots: if (s.type_name == BusinessFactReferent.ATTR_MISC or s.type_name == BusinessFactReferent.ATTR_WHEN): bfr.addSlot(s.type_name, s.value, False, 0) res.append( ReferentToken(bfr, whos[i].begin_token, whos[i].end_token)) i += 1 return res
def __serialize_byte(res: io.IOBase, val: int) -> None: Utils.writeByteIO(res, val)
def __calc_genetive(self) -> None: if (not self.from0_.source.can_be_noun): return if (self.from0_.source.typ == SentItemType.FORMULA): if (self.to.source.typ != SentItemType.NOUN): return self.coef = SemanticService.PARAMS.transitive_coef return frmorph = self.from_morph if (self.to.source.typ == SentItemType.FORMULA): if (self.from0_.source.typ != SentItemType.NOUN): return if (frmorph.case_.is_genitive): self.coef = SemanticService.PARAMS.transitive_coef elif (frmorph.case_.is_undefined): self.coef = (0) return if (isinstance(self.from0_.source.source, NumbersWithUnitToken)): if (self.from0_.order != (self.to.order + 1)): return num = Utils.asObjectOrNull(self.from0_.source.source, NumbersWithUnitToken) ki = UnitToken.calc_kind(num.units) if (ki != MeasureKind.UNDEFINED): if (UnitsHelper.check_keyword(ki, self.to.source.source)): self.coef = (SemanticService.PARAMS.next_model * (3)) return if (isinstance(self.to.source.source, NumbersWithUnitToken)): return non_gen_text = False if (Utils.isNullOrEmpty(self.from_prep) and not (isinstance(self.from0_.source.source, VerbPhraseToken))): if (self.from0_.order != (self.to.order + 1)): non_gen_text = True if (self.to.source.dr_groups is not None): for gr in self.to.source.dr_groups: if (gr.cm.transitive and Utils.isNullOrEmpty(self.from_prep)): ok = False if (isinstance(self.to.source.source, VerbPhraseToken)): if (frmorph.case_.is_accusative): ok = True self.can_be_pacient = True elif (frmorph.case_.is_genitive and self.from0_.order == (self.to.order + 1)): ok = True if (ok): self.coef = SemanticService.PARAMS.transitive_coef return if ((((gr.cm.questions) & (QuestionType.WHATTODO))) != (QuestionType.UNDEFINED) and (isinstance(self.from0_.source.source, VerbPhraseToken))): self.coef = SemanticService.PARAMS.transitive_coef return if (gr.cm.nexts is not None): if (self.from_prep in gr.cm.nexts): cas = gr.cm.nexts[self.from_prep] if (not ((cas) & frmorph.case_).is_undefined): if (Utils.isNullOrEmpty(self.from_prep) and self.from0_.order != (self.to.order + 1) and ((cas) & frmorph.case_).is_genitive): pass else: self.coef = SemanticService.PARAMS.next_model return if (non_gen_text or not Utils.isNullOrEmpty(self.from_prep)): return cas0 = frmorph.case_ if (cas0.is_genitive or cas0.is_instrumental or cas0.is_dative): if ((isinstance(self.to.source.source, NumbersWithUnitToken)) and cas0.is_genitive): self.coef = SemanticService.PARAMS.transitive_coef else: self.coef = SemanticService.PARAMS.ng_link if (cas0.is_nominative or self.from0_.source.typ == SentItemType.PARTBEFORE): self.coef /= (2) if (not cas0.is_genitive): self.coef /= (2) elif (isinstance(self.from0_.source.source, VerbPhraseToken)): self.coef = 0.1 if ((isinstance(self.to.source.source, NumbersWithUnitToken)) and self.to.source.end_token.is_value("ЧЕМ", None)): self.coef = (SemanticService.PARAMS.transitive_coef * (2))
def __serialize_int(res: io.IOBase, val: int) -> None: Utils.writeByteIO(res, val) Utils.writeByteIO(res, (val >> 8)) Utils.writeByteIO(res, (val >> 16)) Utils.writeByteIO(res, (val >> 24))
def __calc_pacient(self, noplural: bool) -> float: if (not Utils.isNullOrEmpty(self.from_prep)): self.coef = -1 return self.coef vf = self.to_verb.first_verb.verb_morph if (vf is None): return -1 vf2 = self.to_verb.last_verb.verb_morph if (vf2 is None): return -1 morph_ = self.from_morph if (vf2.misc.voice == MorphVoice.PASSIVE or self.to_verb.last_verb.morph.contains_attr( "страд.з.", None)): if (vf.number == MorphNumber.PLURAL): if (noplural): if (self.from_is_plural): pass elif (not NGLink.__check_morph_accord(morph_, False, vf)): return -1 elif (len(morph_.items) > 0 and not vf.case_.is_undefined): ok = False for it in morph_.items: if (((it.number) & (MorphNumber.PLURAL)) == ( MorphNumber.PLURAL)): if (not it.case_.is_undefined and ((it.case_) & vf.case_).is_undefined): continue ok = True break if (not ok): self.coef = -1 return self.coef self.coef = SemanticService.PARAMS.verb_plural self.plural = 1 else: if (vf.number == MorphNumber.SINGULAR): self.plural = 0 if (self.from_is_plural): return -1 if (not NGLink.__check_morph_accord(morph_, False, vf)): return -1 self.coef = SemanticService.PARAMS.morph_accord return self.coef is_trans = False is_ref_dative = False grs = DerivateService.find_derivates( Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm.transitive): is_trans = True if (gr.cm_rev.agent is not None and not gr.cm_rev.agent.case_.is_nominative): is_ref_dative = True if (NGLink.__is_rev_verb(vf2)): if (not Utils.isNullOrEmpty(self.from_prep)): return -1 if (not morph_.case_.is_undefined): if (is_ref_dative): if (morph_.case_.is_nominative): self.coef = SemanticService.PARAMS.transitive_coef return self.coef elif (morph_.case_.is_instrumental): self.coef = SemanticService.PARAMS.transitive_coef return self.coef return -1 self.coef = 0 return self.coef if (vf2 != vf and not is_trans): grs = DerivateService.find_derivates( Utils.ifNotNull(vf.normal_full, vf.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm.transitive): is_trans = True if (is_trans): if (not Utils.isNullOrEmpty(self.from_prep)): return -1 if (not morph_.case_.is_undefined): if (morph_.case_.is_accusative): self.coef = SemanticService.PARAMS.transitive_coef if (morph_.case_.is_dative): self.coef /= (2) if (morph_.case_.is_genitive): self.coef /= (2) if (morph_.case_.is_instrumental): self.coef /= (2) return self.coef else: return -1 if (vf2.normal_case == "БЫТЬ"): if (not Utils.isNullOrEmpty(self.from_prep)): return -1 if (morph_.case_.is_instrumental): self.coef = SemanticService.PARAMS.transitive_coef return self.coef if (morph_.case_.is_nominative): if (self.from0_.source.begin_token.begin_char > self.to_verb.end_char): self.coef = SemanticService.PARAMS.transitive_coef return self.coef else: self.coef = SemanticService.PARAMS.transitive_coef / (2) return self.coef if (morph_.case_.is_undefined): self.coef = SemanticService.PARAMS.transitive_coef / (2) return self.coef return -1
def __str__(self) -> str: return "№ {0}".format(Utils.ifNotNull(self.number, "?"))
def __str__(self) -> str: return "{0} {1} {2}".format(Utils.enumToString(self.typ), Utils.ifNotNull(self.value, ""), self.getSourceText())
def tryParse(t: 'Token', items: typing.List['NounPhraseItem'], attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem': if (t is None): return None t0 = t _can_be_surname = False _is_doubt_adj = False rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token == rt.end_token): res = NounPhraseItem.tryParse(rt.begin_token, items, attrs) if (res is not None): res.begin_token = res.end_token = t return res if (rt is not None and items is not None and len(items) > 0): res = NounPhraseItem(t, t) for m in t.morph.items: v = NounPhraseItemTextVar(m, None) v.normal_value = str(t.getReferent()) res.noun_morph.append(v) res.can_be_noun = True return res if (isinstance(t, NumberToken)): pass has_legal_verb = False if (isinstance(t, TextToken)): if (not t.chars.is_letter): return None str0_ = (t).term if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'): for wf in t.morph.items: if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): if (wf.class0_.is_verb): mc = t.getMorphClassInDictionary() if (not mc.is_noun and (((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.NO)): if (not LanguageHelper.endsWithEx( str0_, "ОГО", "ЕГО", None, None)): return None has_legal_verb = True if (wf.class0_.is_adverb): if (t.next0_ is None or not t.next0_.is_hiphen): if ((str0_ == "ВСЕГО" or str0_ == "ДОМА" or str0_ == "НЕСКОЛЬКО") or str0_ == "МНОГО" or str0_ == "ПОРЯДКА"): pass else: return None if (wf.class0_.is_adjective): if (wf.containsAttr("к.ф.", None)): if (t.getMorphClassInDictionary() == MorphClass.ADJECTIVE): pass else: _is_doubt_adj = True mc0 = t.morph.class0_ if (mc0.is_proper_surname and not t.chars.is_all_lower): for wf in t.morph.items: if (wf.class0_.is_proper_surname and wf.number != MorphNumber.PLURAL): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue s = Utils.ifNotNull((Utils.ifNotNull( wff.normal_full, wff.normal_case)), "") if (LanguageHelper.endsWithEx(s, "ИН", "ЕН", "ЫН", None)): if (not wff.is_in_dictionary): _can_be_surname = True else: return None if (wff.is_in_dictionary and LanguageHelper.endsWith(s, "ОВ")): _can_be_surname = True if (mc0.is_proper_name and not t.chars.is_all_lower): for wff in t.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (wf.normal_case == "ГОР"): continue if (wf.class0_.is_proper_name and wf.is_in_dictionary): if (wf.normal_case is None or not wf.normal_case.startswith("ЛЮБ")): if (mc0.is_adjective and t.morph.containsAttr("неизм.", None)): pass elif ( (((attrs) & (NounPhraseParseAttr.REFERENTCANBENOUN)) ) == (NounPhraseParseAttr.REFERENTCANBENOUN)): pass else: if (items is None or (len(items) < 1)): return None if (not items[0].is_std_adjective): return None if (mc0.is_adjective and t.morph.items_count == 1): if (t.morph.getIndexerItem(0).containsAttr("в.ср.ст.", None)): return None mc1 = t.getMorphClassInDictionary() if (mc1 == MorphClass.VERB): return None if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.IGNOREPARTICIPLES) and t.morph.class0_.is_verb and not t.morph.class0_.is_noun) and not t.morph.class0_.is_proper): for wf in t.morph.items: if (wf.class0_.is_verb): if (wf.containsAttr("дейст.з.", None)): if (LanguageHelper.endsWith((t).term, "СЯ")): pass else: return None t1 = None for k in range(2): t = (Utils.ifNotNull(t1, t0)) if (k == 0): if ((((isinstance(t0, TextToken))) and t0.next0_ is not None and t0.next0_.is_hiphen) and t0.next0_.next0_ is not None): if (not t0.is_whitespace_after and not t0.morph.class0_.is_pronoun): if (not t0.next0_.is_whitespace_after): t = t0.next0_.next0_ elif (t0.next0_.next0_.chars.is_all_lower and LanguageHelper.endsWith((t0).term, "О")): t = t0.next0_.next0_ it = NounPhraseItem._new470(t0, t, _can_be_surname) if (t0 == t and (isinstance(t0, ReferentToken))): it.can_be_noun = True it.morph = MorphCollection(t0.morph) can_be_prepos = False for v in t.morph.items: wf = Utils.asObjectOrNull(v, MorphWordForm) if (v.class0_.is_preposition): can_be_prepos = True if (v.class0_.is_adjective or ((v.class0_.is_pronoun and not v.class0_.is_personal_pronoun)) or ((v.class0_.is_noun and (isinstance(t, NumberToken))))): if (NounPhraseItem.tryAccordVariant( items, (0 if items is None else len(items)), v)): is_doub = False if (v.containsAttr("к.ф.", None)): continue if (v.containsAttr("собир.", None) and not ((isinstance(t, NumberToken)))): if (wf is not None and wf.is_in_dictionary): return None continue if (v.containsAttr("сравн.", None)): continue ok = True if (isinstance(t, TextToken)): s = (t).term if (s == "ПРАВО" or s == "ПРАВА"): ok = False elif (LanguageHelper.endsWith(s, "ОВ") and t.getMorphClassInDictionary().is_noun): ok = False elif (wf is not None and ((wf.normal_case == "САМ" or wf.normal_case == "ТО"))): ok = False elif (isinstance(t, NumberToken)): if (v.class0_.is_noun and t.morph.class0_.is_adjective): ok = False elif (t.morph.class0_.is_noun and (( (attrs) & (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))) == (NounPhraseParseAttr.NO)): ok = False if (ok): it.adj_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_adj = True if (_is_doubt_adj and t0 == t): it.is_doubt_adjective = True if (has_legal_verb and wf is not None and wf.is_in_dictionary): it.can_be_noun = True can_be_noun_ = False if (isinstance(t, NumberToken)): pass elif (v.class0_.is_noun or ((wf is not None and wf.normal_case == "САМ"))): can_be_noun_ = True elif (v.class0_.is_personal_pronoun): if (items is None or len(items) == 0): can_be_noun_ = True else: for it1 in items: if (it1.is_verb): return None if (len(items) == 1): if (items[0].can_be_adj_for_personal_pronoun): can_be_noun_ = True elif ((v.class0_.is_pronoun and ((items is None or len(items) == 0 or ((len(items) == 1 and items[0].can_be_adj_for_personal_pronoun)))) and wf is not None) and ((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО" or wf.normal_case == "ТО") or wf.normal_case == "ЭТО" or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО" or wf.normal_case == "КТО"))): if (wf.normal_case == "ВСЕ"): if (t.next0_ is not None and t.next0_.isValue("РАВНО", None)): return None can_be_noun_ = True elif (wf is not None and ((Utils.ifNotNull( wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"): return None elif (v.class0_.is_proper and (isinstance(t, TextToken))): if (t.length_char > 4 or v.class0_.is_proper_name): can_be_noun_ = True if (can_be_noun_): if (NounPhraseItem.tryAccordVariant( items, (0 if items is None else len(items)), v)): it.noun_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_noun = True if (t0 != t): for v in it.adj_morph: v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), False) for v in it.noun_morph: v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), True) if (k == 1 and it.can_be_noun and not it.can_be_adj): if (t1 is not None): it.end_token = t1 else: it.end_token = t0.next0_.next0_ for v in it.noun_morph: if (v.normal_value is not None and (v.normal_value.find('-') < 0)): v.normal_value = "{0}-{1}".format( v.normal_value, it.end_token.getNormalCaseText( None, False, MorphGender.UNDEFINED, False)) if (it.can_be_adj): if (NounPhraseItem.__m_std_adjectives.tryParse( it.begin_token, TerminParseAttr.NO) is not None): it.is_std_adjective = True if (can_be_prepos and it.can_be_noun): if (items is not None and len(items) > 0): npt1 = NounPhraseHelper.tryParse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0) if (npt1 is not None and npt1.end_char > t.end_char): return None else: npt1 = NounPhraseHelper.tryParse( t.next0_, Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0) if (npt1 is not None): mc = LanguageHelper.getCaseAfterPreposition((t).lemma) if (not ((mc) & npt1.morph.case_).is_undefined): return None if (it.can_be_noun or it.can_be_adj or k == 1): if (it.begin_token.morph.class0_.is_pronoun): tt2 = it.end_token.next0_ if ((tt2 is not None and tt2.is_hiphen and not tt2.is_whitespace_after) and not tt2.is_whitespace_before): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): ss = (tt2).term if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ") or ss == "Ж"): it.end_token = tt2 elif (ss == "НИБУДЬ" or ss == "ЛИБО" or (((ss == "ТО" and tt2.previous.is_hiphen)) and it.can_be_adj)): it.end_token = tt2 for m in it.adj_morph: m.normal_value = "{0}-{1}".format( m.normal_value, ss) if (m.single_number_value is not None): m.single_number_value = "{0}-{1}".format( m.single_number_value, ss) return it if (t0 == t): if (t0.isValue("БИЗНЕС", None) and t0.next0_ is not None and t0.next0_.chars == t0.chars): t1 = t0.next0_ continue return it return None
def create(t: 'Token', names: 'TerminCollection') -> 'BlockLine': if (t is None): return None res = BlockLine(t, t) tt = t while tt is not None: if (tt != t and tt.is_newline_before): break else: res.end_token = tt tt = tt.next0_ nums = 0 while t is not None and t.next0_ is not None and t.end_char <= res.end_char: if (isinstance(t, NumberToken)): pass else: rom = NumberHelper.try_parse_roman(t) if (rom is not None and rom.end_token.next0_ is not None): t = rom.end_token else: break if (t.next0_.is_char('.')): pass elif ((isinstance(t.next0_, TextToken)) and not t.next0_.chars.is_all_lower): pass else: break res.number_end = t t = t.next0_ if (t.is_char('.') and t.next0_ is not None): res.number_end = t t = t.next0_ if (t.is_newline_before): return res nums += 1 tok = BlockLine.__m_ontology.try_parse(t, TerminParseAttr.NO) if (tok is None): npt1 = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_token != npt1.begin_token): tok = BlockLine.__m_ontology.try_parse(npt1.noun.begin_token, TerminParseAttr.NO) if (tok is not None): if (t.previous is not None and t.previous.is_char(':')): tok = (None) if (tok is not None): typ_ = Utils.valToEnum(tok.termin.tag, BlkTyps) if (typ_ == BlkTyps.CONSLUSION): if (t.is_newline_after): pass elif (t.next0_ is not None and t.next0_.morph.class0_.is_preposition and t.next0_.next0_ is not None): tok2 = BlockLine.__m_ontology.try_parse( t.next0_.next0_, TerminParseAttr.NO) if (tok2 is not None and (Utils.valToEnum( tok2.termin.tag, BlkTyps)) == BlkTyps.CHAPTER): pass else: tok = (None) else: tok = (None) if (t.kit.base_language != t.morph.language): tok = (None) if (typ_ == BlkTyps.INDEX and not t.is_value("ОГЛАВЛЕНИЕ", None)): if (not t.is_newline_after and t.next0_ is not None): npt = NounPhraseHelper.try_parse(t.next0_, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.is_newline_after and npt.morph.case_.is_genitive): tok = (None) elif (npt is None): tok = (None) if ((typ_ == BlkTyps.INTRO and tok is not None and not tok.is_newline_after) and t.is_value("ВВЕДЕНИЕ", None)): npt = NounPhraseHelper.try_parse(t.next0_, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.morph.case_.is_genitive): tok = (None) if (tok is not None): if (res.number_end is None): res.number_end = tok.end_token if (res.number_end.end_char > res.end_char): res.end_token = res.number_end res.typ = typ_ t = tok.end_token if (t.next0_ is not None and t.next0_.is_char_of(":.")): t = t.next0_ res.end_token = t if (t.is_newline_after or t.next0_ is None): return res t = t.next0_ if (t.is_char('§') and (isinstance(t.next0_, NumberToken))): res.typ = BlkTyps.CHAPTER res.number_end = t t = t.next0_ if (names is not None): tok2 = names.try_parse(t, TerminParseAttr.NO) if (tok2 is not None and tok2.end_token.is_newline_after): res.end_token = tok2.end_token res.is_exist_name = True if (res.typ == BlkTyps.UNDEFINED): li2 = BlockLine.create((None if res.number_end is None else res.number_end.next0_), None) if (li2 is not None and ((li2.typ == BlkTyps.LITERATURE or li2.typ == BlkTyps.INTRO or li2.typ == BlkTyps.CONSLUSION))): res.typ = li2.typ else: res.typ = BlkTyps.CHAPTER return res t1 = res.end_token if ((((isinstance(t1, NumberToken)) or t1.is_char('.'))) and t1.previous is not None): t1 = t1.previous if (t1.is_char('.')): res.has_content_item_tail = True while t1 is not None and t1.begin_char > res.begin_char: if (not t1.is_char('.')): break t1 = t1.previous res.is_all_upper = True while t is not None and t.end_char <= t1.end_char: if (not (isinstance(t, TextToken)) or not t.chars.is_letter): res.not_words += 1 else: mc = t.get_morph_class_in_dictionary() if (mc.is_undefined): res.not_words += 1 elif (t.length_char > 2): res.words += 1 if (not t.chars.is_all_upper): res.is_all_upper = False if (t.is_pure_verb): if (not t.term.endswith("ING")): res.has_verb = True t = t.next0_ if (res.typ == BlkTyps.UNDEFINED): npt = NounPhraseHelper.try_parse( (res.begin_token if res.number_end is None else res.number_end.next0_), NounPhraseParseAttr.NO, 0, None) if (npt is not None): if (npt.noun.is_value("ХАРАКТЕРИСТИКА", None) or npt.noun.is_value("СОДЕРЖАНИЕ", "ЗМІСТ")): ok = True tt = npt.end_token.next0_ first_pass3032 = True while True: if first_pass3032: first_pass3032 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.is_char('.')): continue npt2 = NounPhraseHelper.try_parse( tt, NounPhraseParseAttr.NO, 0, None) if (npt2 is None or not npt2.morph.case_.is_genitive): ok = False break tt = npt2.end_token if (tt.end_char > res.end_char): res.end_token = tt if (not tt.is_newline_after): while res.end_token.next0_ is not None: if (res.end_token.is_newline_after): break res.end_token = res.end_token.next0_ if (ok): res.typ = BlkTyps.INTRO res.is_exist_name = True elif (npt.noun.is_value("ВЫВОД", "ВИСНОВОК") or npt.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")): ok = True tt = npt.end_token.next0_ first_pass3033 = True while True: if first_pass3033: first_pass3033 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.is_char_of(",.") or tt.is_and): continue npt1 = NounPhraseHelper.try_parse( tt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None): if (npt1.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ") or npt1.noun.is_value( "РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ") or npt1.noun.is_value( "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")): tt = npt1.end_token if (tt.end_char > res.end_char): res.end_token = tt if (not tt.is_newline_after): while res.end_token.next0_ is not None: if (res.end_token.is_newline_after ): break res.end_token = res.end_token.next0_ continue ok = False break if (ok): res.typ = BlkTyps.CONSLUSION res.is_exist_name = True if (res.typ == BlkTyps.UNDEFINED and npt is not None and npt.end_char <= res.end_char): ok = False publ = 0 if (BlockLine.__is_pub(npt)): ok = True publ = 1 elif ((npt.noun.is_value("СПИСОК", None) or npt.noun.is_value("УКАЗАТЕЛЬ", "ПОКАЖЧИК") or npt.noun.is_value("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ")) or npt.noun.is_value("ВЫВОД", "ВИСНОВОК") or npt.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")): if (npt.end_char == res.end_char): return None ok = True if (ok): if (npt.begin_token == npt.end_token and npt.noun.is_value("СПИСОК", None) and npt.end_char == res.end_char): ok = False tt = npt.end_token.next0_ first_pass3034 = True while True: if first_pass3034: first_pass3034 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.is_char_of(",.:") or tt.is_and or tt.morph.class0_.is_preposition): continue if (tt.is_value("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")): continue npt = NounPhraseHelper.try_parse( tt, NounPhraseParseAttr.NO, 0, None) if (npt is None): ok = False break if (((BlockLine.__is_pub(npt) or npt.noun.is_value( "РАБОТА", "РОБОТА") or npt.noun.is_value( "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")) or npt.noun.is_value("АВТОР", None) or npt.noun.is_value("ТРУД", "ПРАЦЯ")) or npt.noun.is_value("ТЕМА", None) or npt.noun.is_value( "ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")): tt = npt.end_token if (BlockLine.__is_pub(npt)): publ += 1 if (tt.end_char > res.end_char): res.end_token = tt if (not tt.is_newline_after): while res.end_token.next0_ is not None: if (res.end_token.is_newline_after ): break res.end_token = res.end_token.next0_ continue ok = False break if (ok): res.typ = BlkTyps.LITERATURE res.is_exist_name = True if (publ == 0 and (res.end_char < ((math.floor( (len(res.kit.sofa.text) * 2) / 3))))): if (res.number_end is not None): res.typ = BlkTyps.MISC else: res.typ = BlkTyps.UNDEFINED return res
def getNormalCaseText(self, mc: 'MorphClass' = None, single_number: bool = False, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: if ((isinstance(self.begin_token, ReferentToken)) and self.begin_token == self.end_token): return self.begin_token.getNormalCaseText(mc, single_number, gender, keep_chars) res = None max_coef = 0 def_coef = -1 for it in self.morph.items: v = Utils.asObjectOrNull(it, NounPhraseItemTextVar) if (v.undef_coef > 0 and (((v.undef_coef < max_coef) or def_coef >= 0))): continue if (single_number and v.single_number_value is not None): if (mc is not None and ((gender == MorphGender.NEUTER or gender == MorphGender.FEMINIE)) and mc.is_adjective): bi = MorphBaseInfo._new467(MorphClass(mc), gender, MorphNumber.SINGULAR, MorphCase.NOMINATIVE, self.morph.language) str0_ = Morphology.getWordform(v.single_number_value, bi) if (str0_ is not None): res = str0_ else: res = v.single_number_value if (v.undef_coef == 0): break max_coef = v.undef_coef continue if (Utils.isNullOrEmpty(v.normal_value)): continue if (str.isdigit(v.normal_value[0]) and mc is not None and mc.is_adjective): wrapval468 = RefOutArgWrapper(0) inoutres469 = Utils.tryParseInt(v.normal_value, wrapval468) val = wrapval468.value if (inoutres469): str0_ = NumberHelper.getNumberAdjective( val, gender, (MorphNumber.SINGULAR if single_number or val == 1 else MorphNumber.PLURAL)) if (str0_ is not None): res = str0_ if (v.undef_coef == 0): break max_coef = v.undef_coef continue res1 = (it).normal_value if (single_number): if (res1 == "ДЕТИ"): res1 = "РЕБЕНОК" elif (res1 == "ЛЮДИ"): res1 = "ЧЕЛОВЕК" max_coef = v.undef_coef if (v.undef_coef > 0): res = res1 continue def_co = 0 if (mc is not None and mc.is_adjective and v.undef_coef == 0): pass elif (((isinstance(self.begin_token, TextToken)) and res1 == (self.begin_token).term and it.case_.is_nominative) and it.number == MorphNumber.SINGULAR): def_co = 1 if (res is None or def_co > def_coef): res = res1 def_coef = def_co if (def_co > 0): break if (res is not None): return self.__corrChars(res, keep_chars) if (res is None and self.begin_token == self.end_token): res = self.begin_token.getNormalCaseText(mc, single_number, gender, keep_chars) return Utils.ifNotNull(res, "?")
def canBeEquals(self, obj: 'Referent', typ: 'EqualType') -> bool: pr = Utils.asObjectOrNull(obj, PersonPropertyReferent) if (pr is None): return False n1 = self.name n2 = pr.name if (n1 is None or n2 is None): return False eq_bosses = False if (n1 != n2): if (typ == Referent.EqualType.DIFFERENTTEXTS): return False if (n1 in PersonPropertyReferent.__m_bosses0 and n2 in PersonPropertyReferent.__m_bosses1): eq_bosses = True elif (n1 in PersonPropertyReferent.__m_bosses1 and n2 in PersonPropertyReferent.__m_bosses0): eq_bosses = True else: if (not n1.startswith(n2 + " ") and not n2.startswith(n1 + " ")): return False eq_bosses = True hi = self.higher while hi is not None: PersonPropertyReferent.__tmp_stack += 1 if ((PersonPropertyReferent.__tmp_stack) > 20): pass elif (hi.canBeEquals(pr, typ)): PersonPropertyReferent.__tmp_stack -= 1 return False PersonPropertyReferent.__tmp_stack -= 1 hi = hi.higher hi = pr.higher while hi is not None: PersonPropertyReferent.__tmp_stack += 1 if ((PersonPropertyReferent.__tmp_stack) > 20): pass elif (hi.canBeEquals(self, typ)): PersonPropertyReferent.__tmp_stack -= 1 return False PersonPropertyReferent.__tmp_stack -= 1 hi = hi.higher if (self.higher is not None and pr.higher is not None): PersonPropertyReferent.__tmp_stack += 1 if ((PersonPropertyReferent.__tmp_stack) > 20): pass elif (not self.higher.canBeEquals(pr.higher, typ)): PersonPropertyReferent.__tmp_stack -= 1 return False PersonPropertyReferent.__tmp_stack -= 1 if (self.findSlot("@GENERAL", None, True) is not None or pr.findSlot("@GENERAL", None, True) is not None): return str(self) == str(pr) if (self.findSlot(PersonPropertyReferent.ATTR_REF, None, True) is not None or pr.findSlot(PersonPropertyReferent.ATTR_REF, None, True) is not None): refs1 = list() refs2 = list() for s in self.slots: if (s.type_name == PersonPropertyReferent.ATTR_REF): refs1.append(s.value) for s in pr.slots: if (s.type_name == PersonPropertyReferent.ATTR_REF): refs2.append(s.value) eq = False noeq = False i = 0 first_pass3113 = True while True: if first_pass3113: first_pass3113 = False else: i += 1 if (not (i < len(refs1))): break if (refs1[i] in refs2): eq = True continue noeq = True if (isinstance(refs1[i], Referent)): for rr in refs2: if (isinstance(rr, Referent)): if ((rr).canBeEquals( Utils.asObjectOrNull(refs1[i], Referent), typ)): noeq = False eq = True break i = 0 first_pass3114 = True while True: if first_pass3114: first_pass3114 = False else: i += 1 if (not (i < len(refs2))): break if (refs2[i] in refs1): eq = True continue noeq = True if (isinstance(refs2[i], Referent)): for rr in refs1: if (isinstance(rr, Referent)): if ((rr).canBeEquals( Utils.asObjectOrNull(refs2[i], Referent), typ)): noeq = False eq = True break if (eq and not noeq): pass elif (noeq and ((eq or len(refs1) == 0 or len(refs2) == 0))): if (typ == Referent.EqualType.DIFFERENTTEXTS or n1 != n2): return False if (self.higher is not None or pr.higher is not None): return False else: return False elif (not eq_bosses and n1 != n2): return False return True
def __TryAttach(t: 'Token', prev: typing.List['DateItemToken']) -> 'DateItemToken': from pullenti.ner.measure.internal.MeasureToken import MeasureToken if (t is None): return None nt = Utils.asObjectOrNull(t, NumberToken) begin = t end = t is_in_brack = False if ((BracketHelper.canBeStartOfSequence(t, False, False) and t.next0_ is not None and (isinstance(t.next0_, NumberToken))) and BracketHelper.canBeEndOfSequence(t.next0_.next0_, False, None, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if ((t.is_newline_before and BracketHelper.isBracket(t, False) and (isinstance(t.next0_, NumberToken))) and BracketHelper.isBracket(t.next0_.next0_, False)): nt = (Utils.asObjectOrNull(t.next0_, NumberToken)) end = t.next0_.next0_ is_in_brack = True if (nt is not None): if (nt.int_value is None): return None if (nt.typ == NumberSpellingType.WORDS): if (nt.morph.class0_.is_noun and not nt.morph.class0_.is_adjective): if (t.next0_ is not None and ((t.next0_.isValue("КВАРТАЛ", None) or t.next0_.isValue("ПОЛУГОДИЕ", None) or t.next0_.isValue("ПІВРІЧЧЯ", None)))): pass else: return None if (NumberHelper.tryParseAge(nt) is not None): return None res = DateItemToken._new653(begin, end, DateItemToken.DateItemType.NUMBER, nt.int_value, nt.morph) if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken)) and (nt.next0_).int_value is not None) and nt.next0_.length_char == 2 and prev is not None): num = 2000 + (nt.next0_).int_value if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): ok = False if (nt.whitespaces_after_count == 1): ok = True elif (nt.is_newline_after and nt.is_newline_after): ok = True if (ok): nt = (Utils.asObjectOrNull(nt.next0_, NumberToken)) res.end_token = nt res.int_value = num if (res.int_value == 20 or res.int_value == 201): tt = t.next0_ if (tt is not None and tt.isChar('_')): while tt is not None: if (not tt.isChar('_')): break tt = tt.next0_ tt = DateItemToken.__testYearRusWord(tt, False) if (tt is not None): res.int_value = 0 res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR return res if (res.int_value <= 12 and t.next0_ is not None and (t.whitespaces_after_count < 3)): tt = t.next0_ if (tt.isValue("ЧАС", None)): if (((isinstance(t.previous, TextToken)) and not t.previous.chars.is_letter and not t.is_whitespace_before) and (isinstance(t.previous.previous, NumberToken)) and not t.previous.is_whitespace_before): pass else: res.typ = DateItemToken.DateItemType.HOUR res.end_token = tt tt = tt.next0_ if (tt is not None and tt.isChar('.')): res.end_token = tt tt = tt.next0_ first_pass2816 = True while True: if first_pass2816: first_pass2816 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.isValue("УТРО", "РАНОК")): res.end_token = tt res.typ = DateItemToken.DateItemType.HOUR return res if (tt.isValue("ВЕЧЕР", "ВЕЧІР")): res.end_token = tt res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.isValue("ДЕНЬ", None)): res.end_token = tt if (res.int_value < 10): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.isValue("НОЧЬ", "НІЧ")): res.end_token = tt if (res.int_value == 12): res.int_value = 0 elif (res.int_value > 9): res.int_value += 12 res.typ = DateItemToken.DateItemType.HOUR return res if (tt.is_comma or tt.morph.class0_.is_adverb): continue break if (res.typ == DateItemToken.DateItemType.HOUR): return res can_be_year_ = True if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.MONTH): pass elif ((prev is not None and len(prev) >= 4 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM) and prev[len(prev) - 2].can_by_month): pass elif (nt.next0_ is not None and ((nt.next0_.isValue("ГОД", None) or nt.next0_.isValue("РІК", None)))): if (res.int_value < 1000): can_be_year_ = False tt = DateItemToken.__testYearRusWord(nt.next0_, False) if (tt is not None and DateItemToken.__isNewAge(tt.next0_)): res.typ = DateItemToken.DateItemType.YEAR res.end_token = tt elif (can_be_year_): if (res.can_be_year): tt = DateItemToken.__testYearRusWord( nt.next0_, res.is_newline_before) if ((tt) is not None): if ((tt.isValue("Г", None) and not tt.is_whitespace_before and t.previous is not None) and ((t.previous.isValue("КОРПУС", None) or t.previous.isValue("КОРП", None)))): pass elif ( (((nt.next0_.isValue("Г", None) and (t.whitespaces_before_count < 3) and t.previous is not None) and t.previous.isValue("Я", None) and t.previous.previous is not None) and t.previous.previous.isCharOf("\\/") and t.previous.previous.previous is not None) and t.previous.previous.previous.isValue( "А", None)): return None else: res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language elif (tt is not None and (nt.whitespaces_after_count < 2) and (nt.end_char - nt.begin_char) == 1): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language if (nt.previous is not None): if (nt.previous.isValue("В", "У") or nt.previous.isValue("К", None) or nt.previous.isValue("ДО", None)): tt = DateItemToken.__testYearRusWord(nt.next0_, False) if ((tt) is not None): ok = False if ((res.int_value < 100) and (isinstance(tt, TextToken)) and (((tt).term == "ГОДА" or (tt).term == "РОКИ"))): pass else: ok = True if (nt.previous.isValue("ДО", None) and nt.next0_.isValue("Г", None)): cou = 0 ttt = nt.previous.previous while ttt is not None and (cou < 10): mt = MeasureToken.tryParse( ttt, None, False, False) if (mt is not None and mt.end_char > nt.end_char): ok = False break ttt = ttt.previous cou += 1 if (ok): res.end_token = tt res.typ = DateItemToken.DateItemType.YEAR res.lang = tt.morph.language res.begin_token = nt.previous elif (((nt.previous.isValue("IN", None) or nt.previous.isValue("SINCE", None))) and res.can_be_year): res.typ = DateItemToken.DateItemType.YEAR res.begin_token = nt.previous elif (nt.previous.isValue("NEL", None) or nt.previous.isValue("DEL", None)): if (res.can_be_year): res.typ = DateItemToken.DateItemType.YEAR res.lang = MorphLang.IT res.begin_token = nt.previous elif (nt.previous.isValue("IL", None) and res.can_be_day): res.lang = MorphLang.IT res.begin_token = nt.previous t1 = res.end_token.next0_ if (t1 is not None): if ((t1.isValue("ЧАС", None) or t1.isValue("ГОДИНА", None))): if ((((prev is not None and len(prev) == 2 and prev[0].can_be_hour) and prev[1].typ == DateItemToken.DateItemType.DELIM and not prev[1].is_whitespace_after) and not prev[1].is_whitespace_after and res.int_value >= 0) and (res.int_value < 59)): prev[0].typ = DateItemToken.DateItemType.HOUR res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif (res.int_value < 24): if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.HOUR res.end_token = t1 elif ((res.int_value < 60) and ((t1.isValue("МИНУТА", None) or t1.isValue("МИН", None) or t.isValue("ХВИЛИНА", None)))): if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.MINUTE res.end_token = t1 elif ((res.int_value < 60) and ((t1.isValue("СЕКУНДА", None) or t1.isValue("СЕК", None)))): if (t1.next0_ is not None and t1.next0_.isChar('.')): t1 = t1.next0_ res.typ = DateItemToken.DateItemType.SECOND res.end_token = t1 elif ((res.int_value < 30) and ((t1.isValue("ВЕК", "ВІК") or t1.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")))): res.typ = DateItemToken.DateItemType.CENTURY res.end_token = t1 elif (res.int_value <= 4 and t1.isValue("КВАРТАЛ", None)): res.typ = DateItemToken.DateItemType.QUARTAL res.end_token = t1 elif (res.int_value <= 2 and ((t1.isValue("ПОЛУГОДИЕ", None) or t1.isValue("ПІВРІЧЧЯ", None)))): res.typ = DateItemToken.DateItemType.HALFYEAR res.end_token = t1 return res t0 = Utils.asObjectOrNull(t, TextToken) if (t0 is None): return None txt = t0.getSourceText() if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х') or txt[0] == 'V'): lat = NumberHelper.tryParseRoman(t) if (lat is not None and lat.end_token.next0_ is not None and lat.int_value is not None): val = lat.int_value tt = lat.end_token.next0_ if (tt.isValue("КВАРТАЛ", None) and val > 0 and val <= 4): return DateItemToken._new654( t, tt, DateItemToken.DateItemType.QUARTAL, val) if (tt.isValue("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0 and val <= 2): return DateItemToken._new654( t, lat.end_token.next0_, DateItemToken.DateItemType.HALFYEAR, val) if (tt.isValue("ВЕК", "ВІК") or tt.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")): return DateItemToken._new654( t, lat.end_token.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.isValue("В", None) and tt.next0_ is not None and tt.next0_.isChar('.')): if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ == DateItemToken.DateItemType.POINTER): return DateItemToken._new654( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (DateItemToken.__isNewAge(tt.next0_.next0_)): return DateItemToken._new654( t, tt.next0_, DateItemToken.DateItemType.CENTURY, val) if (tt.is_hiphen): lat2 = NumberHelper.tryParseRoman(tt.next0_) if ((lat2 is not None and lat2.int_value is not None and lat2.int_value > val) and lat2.end_token.next0_ is not None): if (lat2.end_token.next0_.isValue("ВЕК", "ВІК") or lat2.end_token.next0_.isValue( "СТОЛЕТИЕ", "СТОЛІТТЯ")): return DateItemToken._new654( t, lat.end_token, DateItemToken.DateItemType.CENTURY, val) if (t is not None and t.isValue("НАПРИКІНЦІ", None)): return DateItemToken._new660(t, t, DateItemToken.DateItemType.POINTER, "конец") if (t is not None and t.isValue("ДОНЕДАВНА", None)): return DateItemToken._new660(t, t, DateItemToken.DateItemType.POINTER, "сегодня") tok = DateItemToken.M_SEASONS.tryParse(t, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = (t).term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new654( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None): tok = DateItemToken.M_SEASONS.tryParse(npt.end_token, TerminParseAttr.NO) if ((tok is not None and (Utils.valToEnum(tok.termin.tag, DatePointerType)) == DatePointerType.SUMMER and t.morph.language.is_ru) and (isinstance(t, TextToken))): str0_ = (t).term if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"): tok = (None) if (tok is not None): return DateItemToken._new654( t, tok.end_token, DateItemToken.DateItemType.POINTER, Utils.valToEnum(tok.termin.tag, DatePointerType)) typ_ = DateItemToken.DateItemType.NUMBER if (npt.noun.isValue("КВАРТАЛ", None)): typ_ = DateItemToken.DateItemType.QUARTAL elif (npt.end_token.isValue("ПОЛУГОДИЕ", None) or npt.end_token.isValue("ПІВРІЧЧЯ", None)): typ_ = DateItemToken.DateItemType.HALFYEAR elif (npt.end_token.isValue("НАЧАЛО", None) or npt.end_token.isValue("ПОЧАТОК", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "начало") elif (npt.end_token.isValue("СЕРЕДИНА", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "середина") elif (npt.end_token.isValue("КОНЕЦ", None) or npt.end_token.isValue("КІНЕЦЬ", None) or npt.end_token.isValue("НАПРИКІНЕЦЬ", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "конец") elif (npt.end_token.isValue("ВРЕМЯ", None) and len(npt.adjectives) > 0 and npt.end_token.previous.isValue("НАСТОЯЩЕЕ", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") elif (npt.end_token.isValue("ЧАС", None) and len(npt.adjectives) > 0 and npt.end_token.previous.isValue("ДАНИЙ", None)): return DateItemToken._new660( t, npt.end_token, DateItemToken.DateItemType.POINTER, "сегодня") if (typ_ != DateItemToken.DateItemType.NUMBER): delta = 0 if (len(npt.adjectives) > 0): if (npt.adjectives[0].isValue("ПОСЛЕДНИЙ", None) or npt.adjectives[0].isValue("ОСТАННІЙ", None)): return DateItemToken._new654( t0, npt.end_token, typ_, (4 if typ_ == DateItemToken.DateItemType.QUARTAL else 2)) if (npt.adjectives[0].isValue("ПРЕДЫДУЩИЙ", None) or npt.adjectives[0].isValue("ПОПЕРЕДНІЙ", None)): delta = -1 elif (npt.adjectives[0].isValue("СЛЕДУЮЩИЙ", None) or npt.adjectives[0].isValue("ПОСЛЕДУЮЩИЙ", None) or npt.adjectives[0].isValue("НАСТУПНИЙ", None)): delta = 1 else: return None cou = 0 tt = t.previous first_pass2817 = True while True: if first_pass2817: first_pass2817 = False else: tt = tt.previous if (not (tt is not None)): break if (cou > 200): break dr = Utils.asObjectOrNull(tt.getReferent(), DateRangeReferent) if (dr is None): continue if (typ_ == DateItemToken.DateItemType.QUARTAL): ii = dr.quarter_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 4): continue return DateItemToken._new654(t0, npt.end_token, typ_, ii) if (typ_ == DateItemToken.DateItemType.HALFYEAR): ii = dr.halfyear_number if (ii < 1): continue ii += delta if ((ii < 1) or ii > 2): continue return DateItemToken._new654(t0, npt.end_token, typ_, ii) term = t0.term if (not str.isalnum(term[0])): if (t0.isCharOf(".\\/:") or t0.is_hiphen): return DateItemToken._new660(t0, t0, DateItemToken.DateItemType.DELIM, term) elif (t0.isChar(',')): return DateItemToken._new660(t0, t0, DateItemToken.DateItemType.DELIM, term) else: return None if (term == "O" or term == "О"): if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after and len( (t.next0_).value) == 1): return DateItemToken._new654(t, t.next0_, DateItemToken.DateItemType.NUMBER, (t.next0_).int_value) if (str.isalpha(term[0])): inf = DateItemToken.M_MONTHES.tryParse(t, TerminParseAttr.NO) if (inf is not None and inf.termin.tag is None): inf = DateItemToken.M_MONTHES.tryParse(inf.end_token.next0_, TerminParseAttr.NO) if (inf is not None and (isinstance(inf.termin.tag, int))): return DateItemToken._new675(inf.begin_token, inf.end_token, DateItemToken.DateItemType.MONTH, inf.termin.tag, inf.termin.lang) return None