def __str__(self) -> str: tmp_str = io.StringIO() if (self.is_nominative): print("именит.|", end="", file=tmp_str) if (self.is_genitive): print("родит.|", end="", file=tmp_str) if (self.is_dative): print("дател.|", end="", file=tmp_str) if (self.is_accusative): print("винит.|", end="", file=tmp_str) if (self.is_instrumental): print("творит.|", end="", file=tmp_str) if (self.is_prepositional): print("предлож.|", end="", file=tmp_str) if (self.is_vocative): print("зват.|", end="", file=tmp_str) if (self.is_partial): print("частич.|", end="", file=tmp_str) if (self.is_common): print("общ.|", end="", file=tmp_str) if (self.is_possessive): print("притяж.|", end="", file=tmp_str) if (tmp_str.tell() > 0): Utils.setLengthStringIO(tmp_str, tmp_str.tell() - 1) return Utils.toStringStringIO(tmp_str)
def _union(self, kw1: 'KeywordReferent', kw2: 'KeywordReferent', word2: str) -> None: self.typ = kw1.typ tmp = list() tmp2 = io.StringIO() for v in kw1.get_string_values(KeywordReferent.ATTR_VALUE): self.add_slot(KeywordReferent.ATTR_VALUE, "{0} {1}".format(v, word2), False, 0) norms1 = kw1.get_string_values(KeywordReferent.ATTR_NORMAL) if (len(norms1) == 0 and kw1.child_words == 1): norms1 = kw1.get_string_values(KeywordReferent.ATTR_VALUE) norms2 = kw2.get_string_values(KeywordReferent.ATTR_NORMAL) if (len(norms2) == 0 and kw2.child_words == 1): norms2 = kw2.get_string_values(KeywordReferent.ATTR_VALUE) for n1 in norms1: for n2 in norms2: tmp.clear() tmp.extend(Utils.splitString(n1, ' ', False)) for n in Utils.splitString(n2, ' ', False): if (not n in tmp): tmp.append(n) tmp.sort() Utils.setLengthStringIO(tmp2, 0) i = 0 while i < len(tmp): if (i > 0): print(' ', end="", file=tmp2) print(tmp[i], end="", file=tmp2) i += 1 self.add_slot(KeywordReferent.ATTR_NORMAL, Utils.toStringStringIO(tmp2), False, 0) self.add_slot(KeywordReferent.ATTR_REF, kw1, False, 0) self.add_slot(KeywordReferent.ATTR_REF, kw2, False, 0)
def to_string_morph_number(number: 'MorphNumber') -> str: res = io.StringIO() if ((((number) & (MorphNumber.SINGULAR))) != (MorphNumber.UNDEFINED)): print("единств.|", end="", file=res) if ((((number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): print("множеств.|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_form(form: 'MorphForm') -> str: res = io.StringIO() if ((((form) & (MorphForm.SHORT))) != (MorphForm.UNDEFINED)): print("кратк.|", end="", file=res) if ((((form) & (MorphForm.SYNONYM))) != (MorphForm.UNDEFINED)): print("синонимич.|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def __mergeLetters(self) -> None: before_word = False tmp = io.StringIO() t = self.first_token first_pass2800 = True while True: if first_pass2800: first_pass2800 = False else: t = t.next0_ if (not (t is not None)): break tt = Utils.asObjectOrNull(t, TextToken) if (not tt.chars.is_letter or tt.length_char != 1): before_word = False continue i = t.whitespaces_before_count if (i > 2 or ((i == 2 and before_word))): pass else: before_word = False continue i = 0 Utils.setLengthStringIO(tmp, 0) print(tt.getSourceText(), end="", file=tmp) t1 = t while t1.next0_ is not None: tt = (Utils.asObjectOrNull(t1.next0_, TextToken)) if (tt.length_char != 1 or tt.whitespaces_before_count != 1): break i += 1 print(tt.getSourceText(), end="", file=tmp) t1 = t1.next0_ if (i > 3 or ((i > 1 and before_word))): pass else: before_word = False continue before_word = False mt = Morphology.process(Utils.toStringStringIO(tmp), None, None) if (mt is None or len(mt) != 1): t = t1 continue for wf in mt[0].word_forms: if (wf.is_in_dictionary): before_word = True break if (not before_word): t = t1 continue tt = TextToken(mt[0], self) if (t == self.first_token): self.first_token = (tt) else: tt.previous = t.previous tt.next0_ = t1.next0_ tt.begin_char = t.begin_char tt.end_char = t1.end_char t = (tt)
def to_string_morph_aspect(aspect: 'MorphAspect') -> str: res = io.StringIO() if (((aspect) & (MorphAspect.IMPERFECTIVE)) != (MorphAspect.UNDEFINED)): print("несоверш.|", end="", file=res) if (((aspect) & (MorphAspect.PERFECTIVE)) != (MorphAspect.UNDEFINED)): print("соверш.|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_tense(tense: 'MorphTense') -> str: res = io.StringIO() if ((((tense) & (MorphTense.PAST))) != (MorphTense.UNDEFINED)): print("прошедшее|", end="", file=res) if ((((tense) & (MorphTense.PRESENT))) != (MorphTense.UNDEFINED)): print("настоящее|", end="", file=res) if ((((tense) & (MorphTense.FUTURE))) != (MorphTense.UNDEFINED)): print("будущее|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_person(person: 'MorphPerson') -> str: res = io.StringIO() if ((((person) & (MorphPerson.FIRST))) != (MorphPerson.UNDEFINED)): print("1лицо|", end="", file=res) if ((((person) & (MorphPerson.SECOND))) != (MorphPerson.UNDEFINED)): print("2лицо|", end="", file=res) if ((((person) & (MorphPerson.THIRD))) != (MorphPerson.UNDEFINED)): print("3лицо|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_gender(gender: 'MorphGender') -> str: res = io.StringIO() if ((((gender) & (MorphGender.MASCULINE))) != (MorphGender.UNDEFINED)): print("муж.|", end="", file=res) if ((((gender) & (MorphGender.FEMINIE))) != (MorphGender.UNDEFINED)): print("жен.|", end="", file=res) if ((((gender) & (MorphGender.NEUTER))) != (MorphGender.UNDEFINED)): print("средн.|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_voice(voice: 'MorphVoice') -> str: res = io.StringIO() if ((((voice) & (MorphVoice.ACTIVE))) != (MorphVoice.UNDEFINED)): print("действит.|", end="", file=res) if ((((voice) & (MorphVoice.PASSIVE))) != (MorphVoice.UNDEFINED)): print("страдат.|", end="", file=res) if ((((voice) & (MorphVoice.MIDDLE))) != (MorphVoice.UNDEFINED)): print("средн.|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_mood(mood: 'MorphMood') -> str: res = io.StringIO() if ((((mood) & (MorphMood.INDICATIVE))) != (MorphMood.UNDEFINED)): print("изъявит.|", end="", file=res) if ((((mood) & (MorphMood.IMPERATIVE))) != (MorphMood.UNDEFINED)): print("повелит.|", end="", file=res) if ((((mood) & (MorphMood.SUBJUNCTIVE))) != (MorphMood.UNDEFINED)): print("условн.|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def to_string_morph_finite(finit: 'MorphFinite') -> str: res = io.StringIO() if ((((finit) & (MorphFinite.FINITE))) != (MorphFinite.UNDEFINED)): print("finite|", end="", file=res) if ((((finit) & (MorphFinite.GERUND))) != (MorphFinite.UNDEFINED)): print("gerund|", end="", file=res) if ((((finit) & (MorphFinite.INFINITIVE))) != (MorphFinite.UNDEFINED)): print("инфинитив|", end="", file=res) if ((((finit) & (MorphFinite.PARTICIPLE))) != (MorphFinite.UNDEFINED)): print("participle|", end="", file=res) if (res.tell() > 0): Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res)
def __str__(self) -> str: tmp_str = io.StringIO() if (self.is_ru): print("RU;", end="", file=tmp_str) if (self.is_ua): print("UA;", end="", file=tmp_str) if (self.is_by): print("BY;", end="", file=tmp_str) if (self.is_en): print("EN;", end="", file=tmp_str) if (self.is_it): print("IT;", end="", file=tmp_str) if (self.is_kz): print("KZ;", end="", file=tmp_str) if (tmp_str.tell() > 0): Utils.setLengthStringIO(tmp_str, tmp_str.tell() - 1) return Utils.toStringStringIO(tmp_str)
def attachbbk(t0: 'Token') -> 'UriItemToken': txt = io.StringIO() t1 = t0 digs = 0 t = t0 first_pass3418 = True while True: if first_pass3418: first_pass3418 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_newline_before and t != t0): break if (t.is_table_control_char): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) if (nt.typ != NumberSpellingType.DIGIT or not nt.morph.class0_.is_undefined): break d = nt.get_source_text() print(d, end="", file=txt) digs += len(d) t1 = t continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break if (tt.is_char(',')): break if (tt.is_char('(')): if (not (isinstance(tt.next0_, NumberToken))): break s = tt.get_source_text() if (str.isalpha(s[0])): if (tt.is_whitespace_before): break print(s, end="", file=txt) t1 = t if ((txt.tell() < 3) or (digs < 2)): return None if (Utils.getCharAtStringIO(txt, txt.tell() - 1) == '.'): Utils.setLengthStringIO(txt, txt.tell() - 1) t1 = t1.previous return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
def __str__(self) -> str: tmp_str = io.StringIO() if (self.is_noun): print("существ.|", end="", file=tmp_str) if (self.is_adjective): print("прилаг.|", end="", file=tmp_str) if (self.is_verb): print("глагол|", end="", file=tmp_str) if (self.is_adverb): print("наречие|", end="", file=tmp_str) if (self.is_pronoun): print("местоим.|", end="", file=tmp_str) if (self.is_misc): if (self.is_conjunction or self.is_preposition or self.is_proper): pass else: print("разное|", end="", file=tmp_str) if (self.is_preposition): print("предлог|", end="", file=tmp_str) if (self.is_conjunction): print("союз|", end="", file=tmp_str) if (self.is_proper): print("собств.|", end="", file=tmp_str) if (self.is_proper_surname): print("фамилия|", end="", file=tmp_str) if (self.is_proper_name): print("имя|", end="", file=tmp_str) if (self.is_proper_secname): print("отч.|", end="", file=tmp_str) if (self.is_proper_geo): print("геогр.|", end="", file=tmp_str) if (self.is_personal_pronoun): print("личн.местоим.|", end="", file=tmp_str) if (tmp_str.tell() > 0): Utils.setLengthStringIO(tmp_str, tmp_str.tell() - 1) return Utils.toStringStringIO(tmp_str)
def correct_word_by_morph(self, word: str) -> str: vars0_ = list() tmp = Utils.newStringIO(len(word)) ch = 1 while ch < len(word): Utils.setLengthStringIO(tmp, 0) print(word, end="", file=tmp) Utils.setCharAtStringIO(tmp, ch, '*') var = self.__check_corr_var(Utils.toStringStringIO(tmp), self.m_root, 0) if (var is not None): if (not var in vars0_): vars0_.append(var) ch += 1 if (len(vars0_) == 0): ch = 1 while ch < len(word): Utils.setLengthStringIO(tmp, 0) print(word, end="", file=tmp) Utils.insertStringIO(tmp, ch, '*') var = self.__check_corr_var(Utils.toStringStringIO(tmp), self.m_root, 0) if (var is not None): if (not var in vars0_): vars0_.append(var) ch += 1 if (len(vars0_) == 0): ch = 1 while ch < (len(word) - 1): Utils.setLengthStringIO(tmp, 0) print(word, end="", file=tmp) Utils.removeStringIO(tmp, ch, 1) var = self.__check_corr_var(Utils.toStringStringIO(tmp), self.m_root, 0) if (var is not None): if (not var in vars0_): vars0_.append(var) ch += 1 if (len(vars0_) != 1): return None return vars0_[0]
def process(self, kit : 'AnalysisKit') -> None: """ Основная функция выделения объектов Args: container: lastStage: """ ad = kit.getAnalyzerData(self) t = kit.first_token first_pass3149 = True while True: if first_pass3149: first_pass3149 = False else: t = t.next0_ if (not (t is not None)): break tt = t tok = UriAnalyzer.__m_schemes.tryParse(t, TerminParseAttr.NO) if (tok is not None): i = (tok.termin.tag) tt = tok.end_token if (tt.next0_ is not None and tt.next0_.isChar('(')): tok1 = UriAnalyzer.__m_schemes.tryParse(tt.next0_.next0_, TerminParseAttr.NO) if ((tok1 is not None and tok1.termin.canonic_text == tok.termin.canonic_text and tok1.end_token.next0_ is not None) and tok1.end_token.next0_.isChar(')')): tt = tok1.end_token.next0_ if (i == 0): if ((tt.next0_ is None or ((not tt.next0_.isCharOf(":|") and not tt.is_table_control_char)) or tt.next0_.is_whitespace_before) or tt.next0_.whitespaces_after_count > 2): continue t1 = tt.next0_.next0_ while t1 is not None and t1.isCharOf("/\\"): t1 = t1.next0_ if (t1 is None or t1.whitespaces_before_count > 2): continue ut = UriItemToken.attachUriContent(t1, False) if (ut is None): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok.termin.canonic_text.lower(), ut.value)), UriReferent) rt = ReferentToken(ad.registerReferent(ur), t, ut.end_token) rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t) if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): rt.end_token = rt.end_token.next0_ kit.embedToken(rt) t = (rt) continue if (i == 10): tt = tt.next0_ if (tt is None or not tt.isChar(':')): continue tt = tt.next0_ while tt is not None: if (tt.isCharOf("/\\")): pass else: break tt = tt.next0_ if (tt is None): continue if (tt.isValue("WWW", None) and tt.next0_ is not None and tt.next0_.isChar('.')): tt = tt.next0_.next0_ if (tt is None or tt.is_newline_before): continue ut = UriItemToken.attachUriContent(tt, True) if (ut is None): continue if (len(ut.value) < 4): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok.termin.canonic_text.lower(), ut.value)), UriReferent) rt = ReferentToken(ad.registerReferent(ur), t, ut.end_token) rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t) if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): rt.end_token = rt.end_token.next0_ kit.embedToken(rt) t = (rt) continue if (i == 2): if (tt.next0_ is None or not tt.next0_.isChar('.') or tt.next0_.is_whitespace_before): continue if (tt.next0_.is_whitespace_after and tok.termin.canonic_text != "WWW"): continue ut = UriItemToken.attachUriContent(tt.next0_.next0_, True) if (ut is None): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", ut.value)), UriReferent) rt = ReferentToken(ur, t, ut.end_token) rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t) if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): rt.end_token = rt.end_token.next0_ kit.embedToken(rt) t = (rt) continue if (i == 1): sch = tok.termin.canonic_text ut = None if (sch == "ISBN"): ut = UriItemToken.attachISBN(tt.next0_) if ((ut is None and t.previous is not None and t.previous.isChar('(')) and t.next0_ is not None and t.next0_.isChar(')')): tt0 = t.previous.previous while tt0 is not None: if (tt0.whitespaces_after_count > 2): break if (tt0.is_whitespace_before): ut = UriItemToken.attachISBN(tt0) if (ut is not None and ut.end_token.next0_ != t.previous): ut = (None) break tt0 = tt0.previous elif ((sch == "RFC" or sch == "ISO" or sch == "ОКФС") or sch == "ОКОПФ"): ut = UriItemToken.attachISOContent(tt.next0_, ":") elif (sch == "ГОСТ"): ut = UriItemToken.attachISOContent(tt.next0_, "-.") elif (sch == "ТУ"): if (tok.chars.is_all_upper): ut = UriItemToken.attachISOContent(tt.next0_, "-.") if (ut is not None and (ut.length_char < 10)): ut = (None) else: ut = UriItemToken.attachBBK(tt.next0_) if (ut is None): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, sch)), UriReferent) if (ut.begin_char < t.begin_char): rt = ReferentToken(ur, ut.begin_token, t) if (t.next0_ is not None and t.next0_.isChar(')')): rt.end_token = t.next0_ else: rt = ReferentToken(ur, t, ut.end_token) if (t.previous is not None and t.previous.isValue("КОД", None)): rt.begin_token = t.previous if (ur.scheme.startswith("ОК")): UriAnalyzer.__checkDetail(rt) kit.embedToken(rt) t = (rt) if (ur.scheme.startswith("ОК")): while t.next0_ is not None: if (t.next0_.is_comma_and and (isinstance(t.next0_.next0_, NumberToken))): pass else: break ut = UriItemToken.attachBBK(t.next0_.next0_) if (ut is None): break ur = (Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, sch)), UriReferent)) rt = ReferentToken(ur, t.next0_.next0_, ut.end_token) UriAnalyzer.__checkDetail(rt) kit.embedToken(rt) t = (rt) continue if (i == 3): t0 = tt.next0_ while t0 is not None: if (t0.isCharOf(":|") or t0.is_table_control_char or t0.is_hiphen): t0 = t0.next0_ else: break if (t0 is None): continue ut = UriItemToken.attachSkype(t0) if (ut is None): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value.lower(), ("skype" if tok.termin.canonic_text == "SKYPE" else tok.termin.canonic_text))), UriReferent) rt = ReferentToken(ur, t, ut.end_token) kit.embedToken(rt) t = (rt) continue if (i == 4): t0 = tt.next0_ if (t0 is not None and ((t0.isChar(':') or t0.is_hiphen))): t0 = t0.next0_ if (t0 is None): continue ut = UriItemToken.attachIcqContent(t0) if (ut is None): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, "ICQ")), UriReferent) rt = ReferentToken(ur, t, t0) kit.embedToken(rt) t = (rt) continue if (i == 5 or i == 6): t0 = tt.next0_ has_tab_cel = False is_iban = False first_pass3150 = True while True: if first_pass3150: first_pass3150 = False else: t0 = t0.next0_ if (not (t0 is not None)): break if ((((t0.isValue("БАНК", None) or t0.morph.class0_.is_preposition or t0.is_hiphen) or t0.isCharOf(".:") or t0.isValue("РУБЛЬ", None)) or t0.isValue("РУБ", None) or t0.isValue("ДОЛЛАР", None)) or t0.isValue("№", None) or t0.isValue("N", None)): pass elif (t0.is_table_control_char): has_tab_cel = True elif (t0.isCharOf("\\/") and t0.next0_ is not None and t0.next0_.isValue("IBAN", None)): is_iban = True t0 = t0.next0_ elif (t0.isValue("IBAN", None)): is_iban = True elif (isinstance(t0, TextToken)): npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.morph.case_.is_genitive): t0 = npt.end_token continue break else: break if (t0 is None): continue ur2 = None ur2begin = None ur2end = None t00 = t0 val = t0.getSourceText() if (str.isdigit(val[0]) and ((((i == 6 or tok.termin.canonic_text == "ИНН" or tok.termin.canonic_text == "БИК") or tok.termin.canonic_text == "ОГРН" or tok.termin.canonic_text == "СНИЛС") or tok.termin.canonic_text == "ОКПО"))): if (t0.chars.is_letter): continue if (Utils.isNullOrEmpty(val) or not str.isdigit(val[0])): continue if (t0.length_char < 9): tmp = io.StringIO() print(val, end="", file=tmp) ttt = t0.next0_ first_pass3151 = True while True: if first_pass3151: first_pass3151 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.whitespaces_before_count > 1): break if (isinstance(ttt, NumberToken)): print(ttt.getSourceText(), end="", file=tmp) t0 = ttt continue if (ttt.is_hiphen or ttt.isChar('.')): if (ttt.next0_ is None or not ((isinstance(ttt.next0_, NumberToken)))): break if (ttt.is_whitespace_after or ttt.is_whitespace_before): break continue break val = (None) if (tmp.tell() == 20): val = Utils.toStringStringIO(tmp) elif (tmp.tell() == 9 and tok.termin.canonic_text == "БИК"): val = Utils.toStringStringIO(tmp) elif (((tmp.tell() == 10 or tmp.tell() == 12)) and tok.termin.canonic_text == "ИНН"): val = Utils.toStringStringIO(tmp) elif (tmp.tell() >= 15 and tok.termin.canonic_text == "Л/С"): val = Utils.toStringStringIO(tmp) elif (tmp.tell() >= 11 and ((tok.termin.canonic_text == "ОГРН" or tok.termin.canonic_text == "СНИЛС"))): val = Utils.toStringStringIO(tmp) elif (tok.termin.canonic_text == "ОКПО"): val = Utils.toStringStringIO(tmp) if (val is None): continue elif (not ((isinstance(t0, NumberToken)))): if ((isinstance(t0, TextToken)) and is_iban): tmp1 = io.StringIO() t1 = None ttt = t0 first_pass3152 = True while True: if first_pass3152: first_pass3152 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_newline_before and ttt != t0): break if (ttt.is_hiphen): continue if (not ((isinstance(ttt, NumberToken)))): if (not ((isinstance(ttt, TextToken))) or not ttt.chars.is_latin_letter): break print(ttt.getSourceText(), end="", file=tmp1) t1 = ttt if (tmp1.tell() >= 34): break if (tmp1.tell() < 10): continue ur1 = UriReferent._new2560(Utils.toStringStringIO(tmp1), tok.termin.canonic_text) ur1.addSlot(UriReferent.ATTR_DETAIL, "IBAN", False, 0) rt1 = ReferentToken(ad.registerReferent(ur1), t, t1) kit.embedToken(rt1) t = (rt1) continue if (not t0.isCharOf("/\\") or t0.next0_ is None): continue tok2 = UriAnalyzer.__m_schemes.tryParse(t0.next0_, TerminParseAttr.NO) if (tok2 is None or not ((isinstance(tok2.termin.tag, int))) or (tok2.termin.tag) != i): continue t0 = tok2.end_token.next0_ while t0 is not None: if (t0.isCharOf(":N№")): t0 = t0.next0_ elif (t0.is_table_control_char): t0 = t0.next0_ t00 = t0 has_tab_cel = True else: break if (not ((isinstance(t0, NumberToken)))): continue tmp = io.StringIO() while t0 is not None: if (not ((isinstance(t0, NumberToken)))): break print(t0.getSourceText(), end="", file=tmp) t0 = t0.next0_ if (t0 is None or not t0.isCharOf("/\\,") or not ((isinstance(t0.next0_, NumberToken)))): continue val = Utils.toStringStringIO(tmp) Utils.setLengthStringIO(tmp, 0) ur2begin = t0.next0_ t0 = t0.next0_ while t0 is not None: if (not ((isinstance(t0, NumberToken)))): break if (t0.whitespaces_before_count > 4 and tmp.tell() > 0): break print(t0.getSourceText(), end="", file=tmp) ur2end = t0 t0 = t0.next0_ ur2 = (Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok2.termin.canonic_text, Utils.toStringStringIO(tmp))), UriReferent)) if (len(val) < 5): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(val, tok.termin.canonic_text)), UriReferent) rt = ReferentToken(ur, t, (t0 if ur2begin is None else ur2begin.previous)) if (has_tab_cel): rt.begin_token = t00 if (ur.scheme.startswith("ОК")): UriAnalyzer.__checkDetail(rt) ttt = t.previous first_pass3153 = True while True: if first_pass3153: first_pass3153 = False else: ttt = ttt.previous if (not (ttt is not None)): break if (ttt.is_table_control_char): break if (ttt.morph.class0_.is_preposition): continue if (ttt.isValue("ОРГАНИЗАЦИЯ", None)): continue if (ttt.isValue("НОМЕР", None) or ttt.isValue("КОД", None)): rt.begin_token = ttt t = rt.begin_token break kit.embedToken(rt) t = (rt) if (ur2 is not None): rt2 = ReferentToken(ur2, ur2begin, ur2end) kit.embedToken(rt2) t = (rt2) continue continue if (t.isChar('@')): u1s = UriItemToken.attachMailUsers(t.previous) if (u1s is None): continue u2 = UriItemToken.attachDomainName(t.next0_, False, True) if (u2 is None): continue for ii in range(len(u1s) - 1, -1, -1): ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560("{0}@{1}".format(u1s[ii].value, u2.value).lower(), "mailto")), UriReferent) b = u1s[ii].begin_token t0 = b.previous if (t0 is not None and t0.isChar(':')): t0 = t0.previous if (t0 is not None and ii == 0): br = False ttt = t0 first_pass3154 = True while True: if first_pass3154: first_pass3154 = False else: ttt = ttt.previous if (not (ttt is not None)): break if (not ((isinstance(ttt, TextToken)))): break if (ttt != t0 and ttt.whitespaces_after_count > 1): break if (ttt.isChar(')')): br = True continue if (ttt.isChar('(')): if (not br): break br = False continue if (ttt.isValue("EMAIL", None) or ttt.isValue("MAILTO", None)): b = ttt break if (ttt.isValue("MAIL", None)): b = ttt if ((ttt.previous is not None and ttt.previous.is_hiphen and ttt.previous.previous is not None) and ((ttt.previous.previous.isValue("E", None) or ttt.previous.previous.isValue("Е", None)))): b = ttt.previous.previous break if (ttt.isValue("ПОЧТА", None) or ttt.isValue("АДРЕС", None)): b = t0 ttt = ttt.previous if (ttt is not None and ttt.isChar('.')): ttt = ttt.previous if (ttt is not None and ((t0.isValue("ЭЛ", None) or ttt.isValue("ЭЛЕКТРОННЫЙ", None)))): b = ttt if (b.previous is not None and b.previous.isValue("АДРЕС", None)): b = b.previous break if (ttt.morph.class0_.is_preposition): continue rt = ReferentToken(ur, b, (u2.end_token if ii == (len(u1s) - 1) else u1s[ii].end_token)) kit.embedToken(rt) t = (rt) continue if (not t.morph.language.is_cyrillic): if (t.is_whitespace_before or ((t.previous is not None and t.previous.isCharOf(",(")))): u1 = UriItemToken.attachUrl(t) if (u1 is not None): if (u1.is_whitespace_after or u1.end_token.next0_ is None or not u1.end_token.next0_.isChar('@')): ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", u1.value)), UriReferent) rt = ReferentToken(ur, u1.begin_token, u1.end_token) rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(u1.begin_token.previous), u1.begin_token) kit.embedToken(rt) t = (rt) continue if ((isinstance(t, TextToken)) and not t.is_whitespace_after and t.length_char > 2): if (UriAnalyzer.__siteBefore(t.previous) is not None): ut = UriItemToken.attachUriContent(t, True) if (ut is None or ut.value.find('.') <= 0 or ut.value.find('@') > 0): continue ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", ut.value)), UriReferent) rt = ReferentToken(ur, t, ut.end_token) rt.begin_token = UriAnalyzer.__siteBefore(t.previous) if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): rt.end_token = rt.end_token.next0_ kit.embedToken(rt) t = (rt) continue if ((t.chars.is_latin_letter and not t.chars.is_all_lower and t.next0_ is not None) and not t.is_whitespace_after): if (t.next0_.isChar('/')): rt = UriAnalyzer.__TryAttachLotus(Utils.asObjectOrNull(t, TextToken)) if (rt is not None): rt.referent = ad.registerReferent(rt.referent) kit.embedToken(rt) t = (rt) continue
def get_variants(rus_or_lat: str) -> typing.List[str]: res = list() if (Utils.isNullOrEmpty(rus_or_lat)): return res rus_or_lat = rus_or_lat.upper() is_rus = LanguageHelper.is_cyrillic_char(rus_or_lat[0]) stack = list() i = 0 i = 0 while i < len(rus_or_lat): li = list() maxlen = 0 for a in RusLatAccord.__get_accords(): pref = None if (is_rus and len(a.rus) > 0): pref = a.rus elif (not is_rus and len(a.lat) > 0): pref = a.lat else: continue if (len(pref) < maxlen): continue if (not RusLatAccord.__is_pref(rus_or_lat, i, pref)): continue if (a.on_tail): if ((len(pref) + i) < len(rus_or_lat)): continue if (len(pref) > maxlen): maxlen = len(pref) li.clear() li.append(a) if (len(li) == 0 or maxlen == 0): return res stack.append(li) i += (maxlen - 1) i += 1 if (len(stack) == 0): return res ind = list() i = 0 while i < len(stack): ind.append(0) i += 1 tmp = io.StringIO() while True: Utils.setLengthStringIO(tmp, 0) i = 0 while i < len(ind): a = stack[i][ind[i]] print((a.lat if is_rus else a.rus), end="", file=tmp) i += 1 ok = True if (not is_rus): i = 0 while i < tmp.tell(): if (Utils.getCharAtStringIO(tmp, i) == 'Й'): if (i == 0): ok = False break if (not LanguageHelper.is_cyrillic_vowel( Utils.getCharAtStringIO(tmp, i - 1))): ok = False break i += 1 if (ok): res.append(Utils.toStringStringIO(tmp)) for i in range(len(ind) - 1, -1, -1): ind[i] += 1 if (ind[i] < len(stack[i])): break else: ind[i] = 0 else: i = -1 if (i < 0): break return res
def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken': if (sli is None or len(sli) == 0): return None i = 0 while i < len(sli): if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): return StreetDefineHelper.__tryParseFix(sli) elif (sli[i].typ == StreetItemType.NOUN): if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): sli[i + 1].begin_token = sli[i].begin_token del sli[i] if (sli[i].termin.canonic_text == "МЕТРО"): if ((i + 1) < len(sli)): sli1 = list() ii = i + 1 while ii < len(sli): sli1.append(sli[ii]) ii += 1 str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True) if (str1 is not None): str1.begin_token = sli[i].begin_token str1.is_doubt = sli[i].is_abridge if (sli[i + 1].is_in_brackets): str1.is_doubt = False return str1 elif (i == 1 and sli[0].typ == StreetItemType.NAME): for_metro = True break if (i == 0 and len(sli) > 0): for_metro = True break return None if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0) stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): tt = sli[i].end_token.next0_ if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): tt = tt.next0_ nex = NumberHelper.tryParseNumberWithPostfix(tt) if (nex is not None): return None break i += 1 if (i >= len(sli)): return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro) name = None number = None age = None adj = None noun = sli[i] alt_noun = None is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК") before = 0 after = 0 j = 0 while j < i: if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): before += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].is_newline_after): return None if (sli[j].number.morph.class0_.is_adjective): before += 1 elif (is_micro_raion): before += 1 elif (sli[i].number_has_prefix): before += 1 else: before += 1 j += 1 j = (i + 1) while j < len(sli): if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): after += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): after += 1 elif (is_micro_raion): after += 1 elif (sli[j].number_has_prefix): after += 1 elif (ext_onto_regim): after += 1 elif (sli[j].typ == StreetItemType.NOUN): break else: after += 1 j += 1 rli = list() if (before > after): if (noun.termin.canonic_text == "МЕТРО"): return None tt = sli[0].begin_token if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): ok = False if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): ok = True elif (noun.end_token.next0_ is None): ok = True elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): ok = True if (not ok): if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): ok = True if (not ok): return None n0 = 0 n1 = (i - 1) elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): if (not sli[0].is_whitespace_after): return None number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value)) if (sli[0].is_number_km): number += "км" n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[0]) rli.append(sli[i]) elif (after > before): n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[i]) elif (after == 0): return None elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): n0 = 0 n1 = 0 num = False tt2 = sli[2].end_token.next0_ if (sli[2].is_number_km): num = True elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): sli[2].is_number_km = True num = True elif (sli[2].begin_token.previous.is_comma): pass elif (sli[2].begin_token != sli[2].end_token): num = True elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): num = True elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): num = True if (num): number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value)) if (sli[2].is_number_km): number += "км" rli.append(sli[2]) else: del sli[2:2+len(sli) - 2] else: return None sec_number = None j = n0 first_pass2732 = True while True: if first_pass2732: first_pass2732 = False else: j += 1 if (not (j <= n1)): break if (sli[j].typ == StreetItemType.NUMBER): if (age is not None or ((sli[j].is_newline_before and j > 0))): break if (number is not None): if (name is not None and name.typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue break if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): if (sli[j].whitespaces_before_count > 2 and j > 0): break if (sli[j].number is not None and sli[j].number.int_value > 20): if (j > n0): if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): pass else: break if (j == n0 and n0 > 0): pass elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): pass elif (sli[j].number_has_prefix): pass elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): pass else: break number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): number += "км" rli.append(sli[j]) elif (sli[j].typ == StreetItemType.AGE): if (number is not None or age is not None): break age = str(sli[j].number.int_value) rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDADJECTIVE): if (adj is not None): return None adj = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): if (name is not None): if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): break elif (i < j): break else: return None name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): if (name is not None): break name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NOUN): if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): alt_noun = noun noun = sli[j] rli.append(sli[j]) else: break if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value)) rli.append(sli[i + 1]) elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): alt_noun = noun noun = sli[j] rli.append(sli[j]) if (name is None): if (number is None and adj is None): return None if (noun.is_abridge): if (is_micro_raion): pass elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): pass elif (adj is None or adj.is_abridge): return None if (adj is not None and adj.is_abridge): return None if (not sli[i] in rli): rli.append(sli[i]) street = StreetReferent() if (not for_metro): street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0) if (noun.alt_termin is not None): if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): pass else: street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0) else: street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0) res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street) for r in rli: if (res.begin_char > r.begin_char): res.begin_token = r.begin_token if (res.end_char < r.end_char): res.end_token = r.end_token if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): rli.remove(noun) if (noun.is_abridge and (noun.length_char < 4)): res.is_doubt = True elif (noun.noun_is_doubt_coef > 0): res.is_doubt = True if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0) if (npt2 is not None and npt2.end_char > name.end_char): pass elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): res.is_doubt = False elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): res.is_doubt = False name_base = io.StringIO() name_alt = io.StringIO() name_alt2 = None gen = noun.termin.gender adj_gen = MorphGender.UNDEFINED if (number is not None): street.number = number if (sec_number is not None): street.sec_number = sec_number if (age is not None): if (street.number is None): street.number = age else: street.sec_number = age if (name is not None and name.value is not None): if (street.kind == StreetKind.ROAD): for r in rli: if (r.typ == StreetItemType.NAME and r != name): print(r.value, end="", file=name_alt) break if (name.alt_value is not None and name_alt.tell() == 0): print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True) print(" {0}".format(name.value), end="", file=name_base, flush=True) elif (name is not None): is_adj = False if (isinstance(name.end_token, TextToken)): for wf in name.end_token.morph.items: if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo) adj_gen = wf.gender break elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): is_adj = True if (is_adj): tmp = io.StringIO() vars0_ = list() t = name.begin_token while t is not None: tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break if (tmp.tell() > 0): print(' ', end="", file=tmp) if (t == name.end_token): is_padez = False if (not noun.is_abridge): if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): is_padez = True elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): is_padez = True if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): is_padez = True if (not is_padez): print(tt.term, end="", file=tmp) break for wf in tt.morph.items: if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): continue if (not wff.normal_case in vars0_): vars0_.append(wff.normal_case) if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): vars0_.append(tt.term) if (len(vars0_) == 0): vars0_.append(tt.term) break if (not tt.is_hiphen): print(tt.term, end="", file=tmp) t = t.next0_ if (len(vars0_) == 0): print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True) else: head = Utils.toStringStringIO(name_base) print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True) if (len(vars0_) > 1): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True) if (len(vars0_) > 2): name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2]) else: str_nam = None nits = list() has_adj = False has_proper_name = False t = name.begin_token while t is not None: if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): has_adj = True if ((isinstance(t, TextToken)) and not t.is_hiphen): if (name.termin is not None): nits.append(name.termin.canonic_text) break elif (not t.chars.is_letter and len(nits) > 0): nits[len(nits) - 1] += (t).term else: nits.append((t).term) if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): has_proper_name = True elif ((isinstance(t, ReferentToken)) and name.termin is None): nits.append(t.getSourceText().upper()) if (t == name.end_token): break t = t.next0_ if (not has_adj and not has_proper_name): nits.sort() str_nam = Utils.joinStrings(" ", list(nits)) if (has_proper_name and len(nits) == 2): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True) print(" {0}".format(str_nam), end="", file=name_base, flush=True) adj_str = None adj_can_be_initial = False if (adj is not None): if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): adj_gen = name.morph.gender if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL)) elif (adj_gen != MorphGender.UNDEFINED): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen)) elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender)) else: s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen)) adj_str = s if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): adj_can_be_initial = True s1 = Utils.toStringStringIO(name_base).strip() s2 = Utils.toStringStringIO(name_alt).strip() if (len(s1) < 3): if (street.number is not None): if (adj_str is not None): if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_str is None): if (len(s1) < 1): return None if (is_micro_raion): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) if (not Utils.isNullOrEmpty(s2)): street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0) else: return None else: if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_can_be_initial): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0) street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) elif (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt.tell() > 0): s1 = Utils.toStringStringIO(name_alt).strip() if (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt2 is not None): if (adj_str is None): if (for_metro and noun is not None): street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0) if (name is not None and name.alt_value2 is not None): street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0) if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): for n in name.exist_street.names: street.addSlot(StreetReferent.ATTR_NAME, n, False, 0) if (alt_noun is not None and not for_metro): street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0) if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): res.is_doubt = True if (name is not None and name.is_in_dictionary): res.is_doubt = False elif (alt_noun is not None or for_metro): res.is_doubt = False elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): res.is_doubt = False if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): for s in street.slots: if (s.type_name == StreetReferent.ATTR_TYP): street.uploadSlot(s, "микрорайон") elif (s.type_name == StreetReferent.ATTR_NAME): street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value)) if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0) t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma): t1 = t1.next0_ non = StreetItemToken.tryParse(t1, None, False, None, False) if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): street._correct() nams = street.names for t in street.typs: for n in nams: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0) street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0) res.end_token = non.end_token if (res.is_doubt): if (noun.is_road): if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): res.is_doubt = False elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): res.is_doubt = False elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): res.is_doubt = False elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): res.is_doubt = False tt0 = res.begin_token.previous first_pass2733 = True while True: if first_pass2733: first_pass2733 = False else: tt0 = tt0.previous if (not (tt0 is not None)): break if (tt0.isCharOf(",,") or tt0.is_comma_and): continue str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent) if (str0 is not None): res.is_doubt = False break if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None) if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0) res.end_token = ait.end_token return res
def __try_attach_(self, pli : typing.List['PhoneItemToken'], ind : int, is_phone_before : bool, prev_phone : 'PhoneReferent', lev : int=0) -> 'ReferentToken': if (ind >= len(pli) or lev > 4): return None country_code = None city_code = None j = ind if (prev_phone is not None and prev_phone._m_template is not None and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): tmp = io.StringIO() jj = j first_pass3391 = True while True: if first_pass3391: first_pass3391 = False else: jj += 1 if (not (jj < len(pli))): break if (pli[jj].item_type == PhoneItemToken.PhoneItemType.NUMBER): print(len(pli[jj].value), end="", file=tmp) elif (pli[jj].item_type == PhoneItemToken.PhoneItemType.DELIM): if (pli[jj].value == " "): break print(pli[jj].value, end="", file=tmp) continue else: break templ0 = Utils.toStringStringIO(tmp) if (templ0 == prev_phone._m_template): if ((jj + 1) < len(pli)): if (pli[jj + 1].item_type == PhoneItemToken.PhoneItemType.PREFIX and (jj + 2) == len(pli)): pass else: del pli[jj + 1:jj + 1+len(pli) - jj - 1] break if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE): country_code = pli[j].value if (country_code != "8"): cc = PhoneHelper.get_country_prefix(country_code) if (cc is not None and (len(cc) < len(country_code))): city_code = country_code[len(cc):] country_code = cc j += 1 elif ((j < len(pli)) and pli[j].can_be_country_prefix): k = j + 1 if ((k < len(pli)) and pli[k].item_type == PhoneItemToken.PhoneItemType.DELIM): k += 1 rrt = self.__try_attach_(pli, k, is_phone_before, None, lev + 1) if (rrt is not None): if ((((is_phone_before and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM and pli[j + 1].begin_token.is_hiphen) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and len(pli[j].value) == 3) and ((j + 2) < len(pli)) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER) and len(pli[j + 2].value) == 3): pass else: country_code = pli[j].value j += 1 if (((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and ((pli[j].value[0] == '8' or pli[j].value[0] == '7'))) and country_code is None): if (len(pli[j].value) == 1): country_code = pli[j].value j += 1 elif (len(pli[j].value) == 4): country_code = pli[j].value[0:0+1] if (city_code is None): city_code = pli[j].value[1:] else: city_code += pli[j].value[1:] j += 1 elif (len(pli[j].value) == 11 and j == (len(pli) - 1) and is_phone_before): ph0 = PhoneReferent() if (pli[j].value[0] != '8'): ph0.country_code = pli[j].value[0:0+1] ph0.number = pli[j].value[1:1+3] + pli[j].value[4:] return ReferentToken(ph0, pli[0].begin_token, pli[j].end_token) elif (city_code is None and len(pli[j].value) > 3 and ((j + 1) < len(pli))): sum0_ = 0 for it in pli: if (it.item_type == PhoneItemToken.PhoneItemType.NUMBER): sum0_ += len(it.value) if (sum0_ == 11): city_code = pli[j].value[1:] j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.CITYCODE): if (city_code is None): city_code = pli[j].value else: city_code += pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): j += 1 if ((country_code == "8" and city_code is None and ((j + 3) < len(pli))) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (len(pli[j].value) == 3 or len(pli[j].value) == 4): city_code = pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): j += 1 normal_num_len = 0 if (country_code == "421"): normal_num_len = 9 num = io.StringIO() templ = io.StringIO() part_length = list() delim = None ok = False additional = None std = False if (country_code is not None and ((j + 4) < len(pli)) and j > 0): if (((((pli[j - 1].value == "-" or pli[j - 1].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 3].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 4].item_type == PhoneItemToken.PhoneItemType.NUMBER): if ((((len(pli[j].value) + len(pli[j + 2].value)) == 6 or ((len(pli[j].value) == 4 and len(pli[j + 2].value) == 5)))) and ((len(pli[j + 4].value) == 4 or len(pli[j + 4].value) == 1))): print(pli[j].value, end="", file=num) print(pli[j + 2].value, end="", file=num) print(pli[j + 4].value, end="", file=num) print("{0}{1}{2}{3}{4}".format(len(pli[j].value), pli[j + 1].value, len(pli[j + 2].value), pli[j + 3].value, len(pli[j + 4].value)), end="", file=templ, flush=True) std = True ok = True j += 5 first_pass3392 = True while True: if first_pass3392: first_pass3392 = False else: j += 1 if (not (j < len(pli))): break if (std): break if (pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): if (pli[j].is_in_brackets): continue if (j > 0 and pli[j - 1].is_in_brackets): continue if (templ.tell() > 0): print(pli[j].value, end="", file=templ) if (delim is None): delim = pli[j].value elif (pli[j].value != delim): if ((len(part_length) == 2 and ((part_length[0] == 3 or part_length[0] == 4)) and city_code is None) and part_length[1] == 3): city_code = Utils.toStringStringIO(num)[0:0+part_length[0]] Utils.removeStringIO(num, 0, part_length[0]) del part_length[0] delim = pli[j].value continue if (is_phone_before and ((j + 1) < len(pli)) and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (num.tell() < 6): continue if (normal_num_len > 0 and (num.tell() + len(pli[j + 1].value)) == normal_num_len): continue break else: continue ok = False elif (pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (num.tell() == 0 and pli[j].begin_token.previous is not None and pli[j].begin_token.previous.is_table_control_char): tt = pli[len(pli) - 1].end_token.next0_ if (tt is not None and tt.is_char_of(",.")): tt = tt.next0_ if (isinstance(tt, NumberToken)): return None if ((num.tell() + len(pli[j].value)) > 13): if (j > 0 and pli[j - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): j -= 1 ok = True break print(pli[j].value, end="", file=num) part_length.append(len(pli[j].value)) print(len(pli[j].value), end="", file=templ) ok = True if (num.tell() > 10): j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): additional = pli[j].value j += 1 break elif (pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): additional = pli[j].value j += 1 break else: break if ((j == (len(pli) - 1) and pli[j].is_in_brackets and ((len(pli[j].value) == 3 or len(pli[j].value) == 4))) and additional is None): additional = pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[j].is_in_brackets): is_phone_before = True j += 1 if ((country_code is None and city_code is not None and len(city_code) > 3) and (num.tell() < 8) and city_code[0] != '8'): if ((len(city_code) + num.tell()) == 10): pass else: cc = PhoneHelper.get_country_prefix(city_code) if (cc is not None): if (len(cc) > 1 and (len(city_code) - len(cc)) > 1): country_code = cc city_code = city_code[len(cc):] if (country_code is None and city_code is not None and city_code.startswith("00")): cc = PhoneHelper.get_country_prefix(city_code[2:]) if (cc is not None): if (len(city_code) > (len(cc) + 3)): country_code = cc city_code = city_code[len(cc) + 2:] if (num.tell() == 0 and city_code is not None): if (len(city_code) == 10): print(city_code[3:], end="", file=num) part_length.append(num.tell()) city_code = city_code[0:0+3] ok = True elif (((len(city_code) == 9 or len(city_code) == 11 or len(city_code) == 8)) and ((is_phone_before or country_code is not None))): print(city_code, end="", file=num) part_length.append(num.tell()) city_code = (None) ok = True if (num.tell() < 4): ok = False if (num.tell() < 7): if (city_code is not None and (len(city_code) + num.tell()) > 7): if (not is_phone_before and len(city_code) == 3): ii = 0 ii = 0 while ii < len(part_length): if (part_length[ii] == 3): pass elif (part_length[ii] > 3): break elif ((ii < (len(part_length) - 1)) or (part_length[ii] < 2)): break ii += 1 if (ii >= len(part_length)): if (country_code == "61"): pass else: ok = False elif (((num.tell() == 6 or num.tell() == 5)) and ((len(part_length) >= 1 and len(part_length) <= 3)) and is_phone_before): if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].kind == PhoneKind.HOME): ok = False elif (prev_phone is not None and prev_phone.number is not None and ((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)))): pass elif (num.tell() > 4 and prev_phone is not None and Utils.toStringStringIO(templ) == prev_phone._m_template): ok = True else: ok = False if (delim == "." and country_code is None and city_code is None): ok = False if ((is_phone_before and country_code is None and city_code is None) and num.tell() > 10): cc = PhoneHelper.get_country_prefix(Utils.toStringStringIO(num)) if (cc is not None): if ((num.tell() - len(cc)) == 9): country_code = cc Utils.removeStringIO(num, 0, len(cc)) ok = True if (ok): if (std): pass elif (prev_phone is not None and prev_phone.number is not None and (((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)) or prev_phone._m_template == Utils.toStringStringIO(templ)))): pass elif ((len(part_length) == 3 and part_length[0] == 3 and part_length[1] == 2) and part_length[2] == 2): pass elif (len(part_length) == 3 and is_phone_before): pass elif ((len(part_length) == 4 and (((part_length[0] + part_length[1]) == 3)) and part_length[2] == 2) and part_length[3] == 2): pass elif ((len(part_length) == 4 and part_length[0] == 3 and part_length[1] == 3) and part_length[2] == 2 and part_length[3] == 2): pass elif (len(part_length) == 5 and (part_length[1] + part_length[2]) == 4 and (part_length[3] + part_length[4]) == 4): pass elif (len(part_length) > 4): ok = False elif (len(part_length) > 3 and city_code is not None): ok = False elif ((is_phone_before or city_code is not None or country_code is not None) or additional is not None): ok = True else: ok = False if (((num.tell() == 6 or num.tell() == 7)) and (len(part_length) < 4) and j > 0): next_ph = self.__get_next_phone(pli[j - 1].end_token.next0_, lev + 1) if (next_ph is not None): d = len(next_ph.number) - num.tell() if (d == 0 or d == 3 or d == 4): ok = True end = (pli[j - 1].end_token if j > 0 else None) if (end is None): ok = False if ((ok and city_code is None and country_code is None) and prev_phone is None and not is_phone_before): if (not end.is_whitespace_after and end.next0_ is not None): tt = end.next0_ if (tt.is_char_of(".,)") and tt.next0_ is not None): tt = tt.next0_ if (not tt.is_whitespace_before): ok = False if (not ok): return None if (templ.tell() > 0 and not str.isdigit(Utils.getCharAtStringIO(templ, templ.tell() - 1))): Utils.setLengthStringIO(templ, templ.tell() - 1) if ((country_code is None and city_code is not None and len(city_code) > 3) and num.tell() > 6): cc = PhoneHelper.get_country_prefix(city_code) if (cc is not None and ((len(cc) + 1) < len(city_code))): country_code = cc city_code = city_code[len(cc):] if (pli[0].begin_token.previous is not None): if (pli[0].begin_token.previous.is_value("ГОСТ", None) or pli[0].begin_token.previous.is_value("ТУ", None)): return None ph = PhoneReferent() if (country_code is not None): ph.country_code = country_code number = Utils.toStringStringIO(num) if ((city_code is None and num.tell() > 7 and len(part_length) > 0) and (part_length[0] < 5)): city_code = number[0:0+part_length[0]] number = number[part_length[0]:] if (city_code is None and num.tell() == 11 and Utils.getCharAtStringIO(num, 0) == '8'): city_code = number[1:1+3] number = number[4:] if (city_code is None and num.tell() == 10): city_code = number[0:0+3] number = number[3:] if (city_code is not None): number = (city_code + number) elif (country_code is None and prev_phone is not None): ok1 = False if (len(prev_phone.number) >= (len(number) + 2)): ok1 = True elif (templ.tell() > 0 and prev_phone._m_template is not None and LanguageHelper.ends_with(prev_phone._m_template, Utils.toStringStringIO(templ))): ok1 = True if (ok1 and len(prev_phone.number) > len(number)): number = (prev_phone.number[0:0+len(prev_phone.number) - len(number)] + number) if (ph.country_code is None and prev_phone is not None and prev_phone.country_code is not None): if (len(prev_phone.number) == len(number)): ph.country_code = prev_phone.country_code ok = False for d in number: if (d != '0'): ok = True break if (not ok): return None if (country_code is not None): if (len(number) < 7): return None else: s = PhoneHelper.get_country_prefix(number) if (s is not None): num2 = number[len(s):] if (len(num2) >= 10 and len(num2) <= 11): number = num2 if (s != "7"): ph.country_code = s if (len(number) == 8 and prev_phone is None): return None if (len(number) > 11): if ((len(number) < 14) and ((country_code == "1" or country_code == "43"))): pass else: return None ph.number = number if (additional is not None): ph.add_slot(PhoneReferent.ATTR_ADDNUMBER, additional, True, 0) if (not is_phone_before and end.next0_ is not None and not end.is_newline_after): if (end.next0_.is_char_of("+=") or end.next0_.is_hiphen): return None if (country_code is not None and country_code == "7"): if (len(number) != 10): return None ph._m_template = Utils.toStringStringIO(templ) if (j == (len(pli) - 1) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and not pli[j].is_newline_before): end = pli[j].end_token if (pli[j].kind != PhoneKind.UNDEFINED): ph.kind = pli[j].kind res = ReferentToken(ph, pli[0].begin_token, end) if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].end_token.next0_.is_table_control_char): res.begin_token = pli[1].begin_token return res
def __toFullString(self, last_name_first : bool, lang : 'MorphLang') -> str: id0_ = None for a in self.slots: if (a.type_name == PersonReferent.ATTR_IDENTITY): s = str(a.value) if (id0_ is None or len(s) > len(id0_)): id0_ = s if (id0_ is not None): return MiscHelper.convertFirstCharUpperAndOtherLower(id0_) sss = self.getStringValue("NAMETYPE") if (sss == "china"): last_name_first = True n = self.getStringValue(PersonReferent.ATTR_LASTNAME) if (n is not None): res = io.StringIO() if (last_name_first): print("{0} ".format(n), end="", file=res, flush=True) s = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, n, False) if (s is not None): print("{0}".format(s), end="", file=res, flush=True) if (PersonReferent.__isInitial(s)): print('.', end="", file=res) else: print(' ', end="", file=res) s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False) if (s is not None): print("{0}".format(s), end="", file=res, flush=True) if (PersonReferent.__isInitial(s)): print('.', end="", file=res) else: print(' ', end="", file=res) if (not last_name_first): print(n, end="", file=res) elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): Utils.setLengthStringIO(res, res.tell() - 1) if (LanguageHelper.isCyrillicChar(n[0])): nl = None for sl in self.slots: if (sl.type_name == PersonReferent.ATTR_LASTNAME): ss = Utils.asObjectOrNull(sl.value, str) if (len(ss) > 0 and LanguageHelper.isLatinChar(ss[0])): nl = ss break if (nl is not None): nal = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, nl, False) if (nal is None): print(" ({0})".format(nl), end="", file=res, flush=True) elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION): print(" ({0} {1})".format(nl, nal), end="", file=res, flush=True) else: print(" ({0} {1})".format(nal, nl), end="", file=res, flush=True) return MiscHelper.convertFirstCharUpperAndOtherLower(Utils.toStringStringIO(res)) else: n = self.getStringValue(PersonReferent.ATTR_FIRSTNAME) if ((n) is not None): s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False) if (s is not None): n = "{0} {1}".format(n, s) n = MiscHelper.convertFirstCharUpperAndOtherLower(n) nik = self.getStringValue(PersonReferent.ATTR_NICKNAME) tit = self.__findShortestKingTitul(False) if (tit is not None): n = "{0} {1}".format(tit, n) if (nik is not None): n = "{0} {1}".format(n, nik) return n return "?"
def toString(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: res = io.StringIO() str0_ = self.getStringValue(AddressReferent.ATTR_DETAIL) if (str0_ is not None): str0_ = (Utils.asObjectOrNull( MetaAddress._global_meta.detail_feature. convertInnerValueToOuterValue(str0_, lang), str)) if (str0_ is not None): print("[{0}".format(str0_.lower()), end="", file=res, flush=True) str0_ = self.getStringValue(AddressReferent.ATTR_DETAILPARAM) if ((str0_) is not None): print(", {0}".format(str0_), end="", file=res, flush=True) print(']', end="", file=res) strs = self.streets if (len(strs) == 0): if (self.metro is not None): if (res.tell() > 0): print(' ', end="", file=res) print(Utils.ifNotNull(self.metro, ""), end="", file=res) else: if (res.tell() > 0): print(' ', end="", file=res) i = 0 while i < len(strs): if (i > 0): print(", ", end="", file=res) print(strs[i].toString(True, lang, 0), end="", file=res) i += 1 if (self.kilometer is not None): print(" {0}км.".format(self.kilometer), end="", file=res, flush=True) if (self.house is not None): ty = self.house_type if (ty == AddressHouseType.ESTATE): print(" влад.", end="", file=res) elif (ty == AddressHouseType.HOUSEESTATE): print(" домовл.", end="", file=res) else: print(" д.", end="", file=res) print(("Б/Н" if self.house == "0" else self.house), end="", file=res) if (self.corpus is not None): print(" корп.{0}".format( ("Б/Н" if self.corpus == "0" else self.corpus)), end="", file=res, flush=True) if (self.building is not None): ty = self.building_type if (ty == AddressBuildingType.CONSTRUCTION): print(" сооруж.", end="", file=res) elif (ty == AddressBuildingType.LITER): print(" лит.", end="", file=res) else: print(" стр.", end="", file=res) print(("Б/Н" if self.building == "0" else self.building), end="", file=res) if (self.potch is not None): print(" под.{0}".format(self.potch), end="", file=res, flush=True) if (self.floor0_ is not None): print(" эт.{0}".format(self.floor0_), end="", file=res, flush=True) if (self.flat is not None): print(" кв.{0}".format(self.flat), end="", file=res, flush=True) if (self.corpus_or_flat is not None): print(" корп.(кв.?){0}".format(self.corpus_or_flat), end="", file=res, flush=True) if (self.office is not None): print(" оф.{0}".format(self.office), end="", file=res, flush=True) if (self.block is not None): print(" блок {0}".format(self.block), end="", file=res, flush=True) if (self.plot is not None): print(" уч.{0}".format(self.plot), end="", file=res, flush=True) if (self.box is not None): print(" бокс {0}".format(self.box), end="", file=res, flush=True) if (self.post_office_box is not None): print(" а\\я{0}".format(self.post_office_box), end="", file=res, flush=True) if (self.csp is not None): print(" ГСП-{0}".format(self.csp), end="", file=res, flush=True) kladr = self.getSlotValue(AddressReferent.ATTR_FIAS) if (isinstance(kladr, Referent)): print(" (ФИАС: {0}".format( Utils.ifNotNull((kladr).getStringValue("GUID"), "?")), end="", file=res, flush=True) for s in self.slots: if (s.type_name == AddressReferent.ATTR_FIAS and (isinstance(s.value, Referent)) and s.value != kladr): print(", {0}".format( Utils.ifNotNull((s.value).getStringValue("GUID"), "?")), end="", file=res, flush=True) print(')', end="", file=res) bti = self.getStringValue(AddressReferent.ATTR_BTI) if (bti is not None): print(" (БТИ {0})".format(bti), end="", file=res, flush=True) for g in self.geos: if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): Utils.setLengthStringIO(res, res.tell() - 1) if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) == ']'): pass elif (res.tell() > 0): print(';', end="", file=res) print(" {0}".format(g.toString(True, lang, lev + 1)), end="", file=res, flush=True) if (self.zip0_ is not None): print("; {0}".format(self.zip0_), end="", file=res, flush=True) return Utils.toStringStringIO(res).strip()
def process(self, kit : 'AnalysisKit') -> None: # Основная функция выделения телефонов ad = kit.get_analyzer_data(self) has_denoms = False for a in kit.processor.analyzers: if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): has_denoms = True if (not has_denoms): a = DenominationAnalyzer() a.process(kit) li = list() tmp = io.StringIO() tmp2 = list() max0_ = 0 t = kit.first_token while t is not None: max0_ += 1 t = t.next0_ cur = 0 t = kit.first_token first_pass3292 = True while True: if first_pass3292: first_pass3292 = False else: t = t.next0_; cur += 1 if (not (t is not None)): break r = t.get_referent() if (r is not None): t = self.__add_referents(ad, t, cur, max0_) continue if (not (isinstance(t, TextToken))): continue if (not t.chars.is_letter or (t.length_char < 3)): continue term = t.term if (term == "ЕСТЬ"): if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): pass else: continue npt = None npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None) if (npt is None): mc = t.get_morph_class_in_dictionary() if (mc.is_verb and not mc.is_preposition): if (t.is_verb_be): continue if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): continue kref = KeywordReferent._new1595(KeywordType.PREDICATE) norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if (norm is None): norm = t.lemma if (norm.endswith("ЬСЯ")): norm = norm[0:0+len(norm) - 2] kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0) drv = DerivateService.find_derivates(norm, True, t.morph.language) KeywordAnalyzer.__add_normals(kref, drv, norm) kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent)) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph) kit.embed_token(rt1) t = (rt1) continue continue if (npt.internal_noun is not None): continue if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): if (npt.preposition is not None): t = npt.end_token continue if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): t = npt.end_token continue if (npt.begin_token == npt.end_token): mc = t.get_morph_class_in_dictionary() if (mc.is_preposition): continue elif (mc.is_adverb): if (t.is_value("ПОТОМ", None)): continue else: pass li.clear() t0 = t tt = t first_pass3293 = True while True: if first_pass3293: first_pass3293 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= npt.end_char)): break if (not (isinstance(tt, TextToken))): continue if (tt.is_value("NATURAL", None)): pass if ((tt.length_char < 3) or not tt.chars.is_letter): continue mc = tt.get_morph_class_in_dictionary() if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): if (tt.is_value("ОТНОШЕНИЕ", None)): pass else: continue if (mc.is_misc): if (MiscHelper.is_eng_article(tt)): continue kref = KeywordReferent._new1595(KeywordType.OBJECT) norm = tt.lemma kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0) if (norm != "ЕСТЬ"): drv = DerivateService.find_derivates(norm, True, tt.morph.language) KeywordAnalyzer.__add_normals(kref, drv, norm) kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent)) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken._new734(kref, tt, tt, tt.morph) kit.embed_token(rt1) if (tt == t and len(li) == 0): t0 = (rt1) t = (rt1) li.append(kref) if (len(li) > 1): kref = KeywordReferent._new1595(KeywordType.OBJECT) Utils.setLengthStringIO(tmp, 0) tmp2.clear() has_norm = False for kw in li: s = kw.get_string_value(KeywordReferent.ATTR_VALUE) if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) n = kw.get_string_value(KeywordReferent.ATTR_NORMAL) if (n is not None): has_norm = True tmp2.append(n) else: tmp2.append(s) kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0) val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0) Utils.setLengthStringIO(tmp, 0) tmp2.sort() for s in tmp2: if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) norm = Utils.toStringStringIO(tmp) if (norm != val): kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0) kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent)) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken._new734(kref, t0, t, npt.morph) kit.embed_token(rt1) t = (rt1) cur = 0 t = kit.first_token first_pass3294 = True while True: if first_pass3294: first_pass3294 = False else: t = t.next0_; cur += 1 if (not (t is not None)): break kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent) if (kw is None or kw.typ != KeywordType.OBJECT): continue if (t.next0_ is None or kw.child_words > 2): continue t1 = t.next0_ if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): t1 = t1.next0_ if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): t1 = t1.next0_ elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): continue kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent) if (kw2 is None): continue if (kw == kw2): continue if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): continue kw_un = KeywordReferent() kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO)) kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent)) KeywordAnalyzer.__set_rank(kw_un, cur, max0_) rt1 = ReferentToken._new734(kw_un, t, t1, t.morph) kit.embed_token(rt1) t = (rt1) if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): all0_ = list(ad.referents) all0_.sort(key=operator.attrgetter('rank'), reverse=True) ad.referents = all0_ if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES) if (ano is not None): ad.register_referent(ano)
def _to_string(self, short_variant: bool, lang: 'MorphLang', lev: int, from_range: int) -> str: from pullenti.ner.date.internal.DateRelHelper import DateRelHelper res = io.StringIO() p = self.pointer if (lang is None): lang = MorphLang.RU if (self.is_relative): if (self.pointer == DatePointerType.TODAY): print("сейчас".format(), end="", file=res, flush=True) if (not short_variant): DateRelHelper.append_to_string(self, res) return Utils.toStringStringIO(res) word = None val = 0 back = False is_local_rel = self.get_string_value( DateReferent.ATTR_ISRELATIVE) == "true" for s in self.slots: if (s.type_name == DateReferent.ATTR_CENTURY): word = "век" wrapval784 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval784) val = wrapval784.value elif (s.type_name == DateReferent.ATTR_YEAR): word = "год" wrapval785 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval785) val = wrapval785.value elif (s.type_name == DateReferent.ATTR_MONTH): word = "месяц" wrapval786 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval786) val = wrapval786.value if (not is_local_rel and val >= 1 and val <= 12): print(DateReferent.__m_month0[val - 1], end="", file=res) elif (s.type_name == DateReferent.ATTR_DAY): word = "день" wrapval787 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval787) val = wrapval787.value if ((not is_local_rel and self.month > 0 and self.month <= 12) and self.higher is not None and self.higher.get_string_value( DateReferent.ATTR_ISRELATIVE) != "true"): print("{0} {1}".format( val, DateReferent.__m_month[self.month - 1]), end="", file=res, flush=True) elif (not is_local_rel): print("{0} число".format(val), end="", file=res, flush=True) elif (s.type_name == DateReferent.ATTR_QUARTAL): word = "квартал" wrapval788 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval788) val = wrapval788.value elif (s.type_name == DateReferent.ATTR_WEEK): word = "неделя" wrapval789 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval789) val = wrapval789.value elif (s.type_name == DateReferent.ATTR_HOUR): word = "час" wrapval790 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval790) val = wrapval790.value if (not is_local_rel): print("{0}:{1}".format("{:02d}".format(val), "{:02d}".format(self.minute)), end="", file=res, flush=True) elif (s.type_name == DateReferent.ATTR_MINUTE): word = "минута" wrapval791 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval791) val = wrapval791.value elif (s.type_name == DateReferent.ATTR_DAYOFWEEK): wrapval792 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval792) val = wrapval792.value if (not is_local_rel): print((DateReferent.__m_week_day_ex[val - 1] if val >= 1 and val <= 7 else "?"), end="", file=res) else: if (val < 0): val = (-val) back = True if (val >= 0 and val <= 7): print("{0} {1}".format( ((("прошлое" if back else "будущее")) if val == 7 else ((("прошлая" if back else "будущая")) if (val == 3 or val == 6) else (("прошлый" if back else "будущий")))), DateReferent.__m_week_day_ex[val - 1]), end="", file=res, flush=True) break if (word is not None and is_local_rel): if (val == 0): print("{0} {1}".format( ("текущая" if word == "неделя" or word == "минута" else "текущий"), word), end="", file=res, flush=True) elif (val > 0 and not back): print("{0} {1} вперёд".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) else: val = (-val) print("{0} {1} назад".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) elif (not is_local_rel and res.tell() == 0): print("{0} {1}".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) if (not short_variant): DateRelHelper.append_to_string(self, res) if (from_range == 1): Utils.insertStringIO( res, 0, "{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с")))) elif (from_range == 2): Utils.insertStringIO(res, 0, ("to " if lang.is_en else "по ")) return Utils.toStringStringIO(res) if (from_range == 1): print("{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с"))), end="", file=res, flush=True) elif (from_range == 2): print(("to " if lang.is_en else "по ").format(), end="", file=res, flush=True) if (p != DatePointerType.NO): val = MetaDate.POINTER.convert_inner_value_to_outer_value( Utils.enumToString(p), lang) if (from_range == 0 or lang.is_en): pass elif (from_range == 1): if (p == DatePointerType.BEGIN): val = ("початку" if lang.is_ua else "начала") elif (p == DatePointerType.CENTER): val = ("середини" if lang.is_ua else "середины") elif (p == DatePointerType.END): val = ("кінця" if lang.is_ua else "конца") elif (p == DatePointerType.TODAY): val = ("цього часу" if lang.is_ua else "настоящего времени") elif (from_range == 2): if (p == DatePointerType.BEGIN): val = ("початок" if lang.is_ua else "начало") elif (p == DatePointerType.CENTER): val = ("середину" if lang.is_ua else "середину") elif (p == DatePointerType.END): val = ("кінець" if lang.is_ua else "конец") elif (p == DatePointerType.TODAY): val = ("теперішній час" if lang.is_ua else "настоящее время") print("{0} ".format(val), end="", file=res, flush=True) if (self.day_of_week > 0): if (lang.is_en): print("{0}, ".format( DateReferent.__m_week_day_en[self.day_of_week - 1]), end="", file=res, flush=True) else: print("{0}, ".format( DateReferent.__m_week_day[self.day_of_week - 1]), end="", file=res, flush=True) y = self.year m = self.month d = self.day cent = self.century if (y == 0 and cent != 0): is_bc = cent < 0 if (cent < 0): cent = (-cent) print(NumberHelper.get_number_roman(cent), end="", file=res) if (lang.is_ua): print(" century", end="", file=res) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print((" віка" if lang.is_ua else " века"), end="", file=res) else: print((" вік" if lang.is_ua else " век"), end="", file=res) if (is_bc): print((" до н.е." if lang.is_ua else " до н.э."), end="", file=res) return Utils.toStringStringIO(res) if (d > 0): print(d, end="", file=res) if (m > 0 and m <= 12): if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang.is_ua): print((DateReferent.__m_monthua[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0ua[m - 1]), end="", file=res) elif (lang.is_en): print(DateReferent.__m_monthen[m - 1], end="", file=res) else: print((DateReferent.__m_month[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0[m - 1]), end="", file=res) if (y != 0): is_bc = y < 0 if (y < 0): y = (-y) if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang is not None and lang.is_en): print("{0}".format(y), end="", file=res, flush=True) elif (short_variant): print("{0}{1}".format(y, ("р" if lang.is_ua else "г")), end="", file=res, flush=True) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print("{0} {1}".format(y, ("року" if lang.is_ua else "года")), end="", file=res, flush=True) else: print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")), end="", file=res, flush=True) if (is_bc): print((" до н.е." if lang.is_ua else ("BC" if lang.is_en else " до н.э.")), end="", file=res) h = self.hour mi = self.minute se = self.second if (h >= 0 and mi >= 0): if (res.tell() > 0): print(' ', end="", file=res) print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)), end="", file=res, flush=True) if (se >= 0): print(":{0}".format("{:02d}".format(se)), end="", file=res, flush=True) if (res.tell() == 0): if (self.quartal != 0): print("{0}-й квартал".format(self.quartal), end="", file=res, flush=True) if (res.tell() == 0): return "?" while Utils.getCharAtStringIO( res, res.tell() - 1) == ' ' or Utils.getCharAtStringIO( res, res.tell() - 1) == ',': Utils.setLengthStringIO(res, res.tell() - 1) if (not short_variant and self.is_relative): DateRelHelper.append_to_string(self, res) return Utils.toStringStringIO(res).strip()
def attach_domain_name(t0: 'Token', check_: bool, can_be_whitspaces: bool) -> 'UriItemToken': txt = io.StringIO() t1 = t0 ip_count = 0 is_ip = True t = t0 first_pass3413 = True while True: if first_pass3413: first_pass3413 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_whitespace_before and t != t0): ok = False if (not t.is_newline_before and can_be_whitspaces): tt1 = t first_pass3414 = True while True: if first_pass3414: first_pass3414 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1.is_char('.') or tt1.is_hiphen): continue if (tt1.is_whitespace_before): if (tt1.is_newline_before): break if (tt1.previous is not None and ((tt1.previous.is_char('.') or tt1.previous.is_hiphen))): pass else: break if (not (isinstance(tt1, TextToken))): break if (UriItemToken.__m_std_groups.try_parse( tt1, TerminParseAttr.NO) is not None): ok = True break if (not tt1.chars.is_latin_letter): break if (not ok): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) if (nt.int_value is None): break print(nt.get_source_text(), end="", file=txt) t1 = t if (nt.typ == NumberSpellingType.DIGIT and nt.int_value >= 0 and (nt.int_value < 256)): ip_count += 1 else: is_ip = False continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break src = tt.term ch = src[0] if (not str.isalpha(ch)): if (".-_".find(ch) < 0): break if (ch != '.'): is_ip = False if (ch == '-'): if (Utils.compareStrings(Utils.toStringStringIO(txt), "vk.com", True) == 0): return UriItemToken._new2706( t0, t1, Utils.toStringStringIO(txt).lower()) else: is_ip = False print(src.lower(), end="", file=txt) t1 = t if (txt.tell() == 0): return None if (ip_count != 4): is_ip = False i = 0 points = 0 i = 0 while i < txt.tell(): if (Utils.getCharAtStringIO(txt, i) == '.'): if (i == 0): return None if (i >= (txt.tell() - 1)): Utils.setLengthStringIO(txt, txt.tell() - 1) t1 = t1.previous break if (Utils.getCharAtStringIO(txt, i - 1) == '.' or Utils.getCharAtStringIO(txt, i + 1) == '.'): return None points += 1 i += 1 if (points == 0): return None uri_ = Utils.toStringStringIO(txt) if (check_): ok = is_ip if (not is_ip): if (Utils.toStringStringIO(txt) == "localhost"): ok = True if (not ok and t1.previous is not None and t1.previous.is_char('.')): if (UriItemToken.__m_std_groups.try_parse( t1, TerminParseAttr.NO) is not None): ok = True if (not ok): return None return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt).lower())
def __name_vars(self) -> typing.List[str]: if (self.__m_names is not None): return self.__m_names self.__m_names = list() nam = self.value if (nam is None): return self.__m_names self.__m_names.append(nam) items = list() ty0 = 0 i0 = 0 i = 0 while i <= len(nam): ty = 0 if (i < len(nam)): if (str.isdigit(nam[i])): ty = 1 elif (str.isalpha(nam[i])): ty = 2 else: ty = 3 if (ty != ty0 or ty == 3): if (i > i0): vars0_ = list() p = nam[i0:i0+i - i0] DenominationReferent.__addVars(p, vars0_) items.append(vars0_) if (ty == 1 and ty0 == 2): vars0_ = list() vars0_.append("") vars0_.append("-") items.append(vars0_) i0 = i ty0 = ty i += 1 inds = Utils.newArray(len(items), 0) i = 0 while i < len(inds): inds[i] = 0 i += 1 tmp = io.StringIO() while True: Utils.setLengthStringIO(tmp, 0) i = 0 while i < len(items): print(items[i][inds[i]], end="", file=tmp) i += 1 v = Utils.toStringStringIO(tmp) if (not v in self.__m_names): self.__m_names.append(v) if (len(self.__m_names) > 20): break for i in range(len(inds) - 1, -1, -1): inds[i] += 1 if (inds[i] < len(items[i])): break else: i = -1 if (i < 0): break i += 1 while i < len(inds): inds[i] = 0 i += 1 return self.__m_names
def __attach_uri_content( t0: 'Token', chars_: str, can_be_whitespaces: bool = False) -> 'UriItemToken': txt = io.StringIO() t1 = t0 dom = UriItemToken.attach_domain_name(t0, True, can_be_whitespaces) if (dom is not None): if (len(dom.value) < 3): return None open_char = chr(0) t = t0 if (dom is not None): t = dom.end_token.next0_ first_pass3411 = True while True: if first_pass3411: first_pass3411 = False else: t = t.next0_ if (not (t is not None)): break if (t != t0 and t.is_whitespace_before): if (t.is_newline_before or not can_be_whitespaces): break if (dom is None): break if (t.previous.is_hiphen): pass elif (t.previous.is_char_of(",;")): break elif (t.previous.is_char('.') and t.chars.is_letter and t.length_char == 2): pass else: ok = False tt1 = t if (t.is_char_of("\\/")): tt1 = t.next0_ tt0 = tt1 first_pass3412 = True while True: if first_pass3412: first_pass3412 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1 != tt0 and tt1.is_whitespace_before): break if (isinstance(tt1, NumberToken)): continue if (not (isinstance(tt1, TextToken))): break term1 = tt1.term if (((term1 == "HTM" or term1 == "HTML" or term1 == "SHTML") or term1 == "ASP" or term1 == "ASPX") or term1 == "JSP"): ok = True break if (not tt1.chars.is_letter): if (tt1.is_char_of("\\/")): ok = True break if (not tt1.is_char_of(chars_)): break elif (not tt1.chars.is_latin_letter): break if (not ok): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) print(nt.get_source_text(), end="", file=txt) t1 = t continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token.is_value("РФ", None)): if (txt.tell() > 0 and Utils.getCharAtStringIO( txt, txt.tell() - 1) == '.'): print(rt.begin_token.get_source_text(), end="", file=txt) t1 = t continue if (rt is not None and rt.chars.is_latin_letter and rt.begin_token == rt.end_token): print(rt.begin_token.get_source_text(), end="", file=txt) t1 = t continue break src = tt.get_source_text() ch = src[0] if (not str.isalpha(ch)): if (chars_.find(ch) < 0): break if (ch == '(' or ch == '['): open_char = ch elif (ch == ')'): if (open_char != '('): break open_char = (chr(0)) elif (ch == ']'): if (open_char != '['): break open_char = (chr(0)) print(src, end="", file=txt) t1 = t if (txt.tell() == 0): return dom i = 0 i = 0 while i < txt.tell(): if (str.isalnum(Utils.getCharAtStringIO(txt, i))): break i += 1 if (i >= txt.tell()): return dom if (Utils.getCharAtStringIO(txt, txt.tell() - 1) == '.' or Utils.getCharAtStringIO(txt, txt.tell() - 1) == '/'): Utils.setLengthStringIO(txt, txt.tell() - 1) t1 = t1.previous if (dom is not None): Utils.insertStringIO(txt, 0, dom.value) tmp = Utils.toStringStringIO(txt) if (tmp.startswith("\\\\")): Utils.replaceStringIO(txt, "\\\\", "//") tmp = Utils.toStringStringIO(txt) if (tmp.startswith("//")): tmp = tmp[2:] if (Utils.compareStrings(tmp, "WWW", True) == 0): return None res = UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt)) return res
def _ToString(self, short_variant : bool, lang : 'MorphLang', lev : int, from_range : int) -> str: res = io.StringIO() p = self.pointer if (lang is None): lang = MorphLang.RU if (from_range == 1): print("{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с"))), end="", file=res, flush=True) elif (from_range == 2): print(("to " if lang.is_en else "по ").format(), end="", file=res, flush=True) if (p != DatePointerType.NO): val = MetaDate.POINTER.convertInnerValueToOuterValue(Utils.enumToString(p), lang) if (from_range == 0 or lang.is_en): pass elif (from_range == 1): if (p == DatePointerType.BEGIN): val = ("початку" if lang.is_ua else "начала") elif (p == DatePointerType.CENTER): val = ("середини" if lang.is_ua else "середины") elif (p == DatePointerType.END): val = ("кінця" if lang.is_ua else "конца") elif (p == DatePointerType.TODAY): val = ("цього часу" if lang.is_ua else "настоящего времени") elif (from_range == 2): if (p == DatePointerType.BEGIN): val = ("початок" if lang.is_ua else "начало") elif (p == DatePointerType.CENTER): val = ("середину" if lang.is_ua else "середину") elif (p == DatePointerType.END): val = ("кінець" if lang.is_ua else "конец") elif (p == DatePointerType.TODAY): val = ("теперішній час" if lang.is_ua else "настоящее время") print("{0} ".format(val), end="", file=res, flush=True) if (self.day_of_week > 0): if (lang.is_en): print("{0}, ".format(DateReferent.__m_week_day_en[self.day_of_week - 1]), end="", file=res, flush=True) else: print("{0}, ".format(DateReferent.__m_week_day[self.day_of_week - 1]), end="", file=res, flush=True) y = self.year m = self.month d = self.day cent = self.century if (y == 0 and cent != 0): is_bc = cent < 0 if (cent < 0): cent = (- cent) print(NumberHelper.getNumberRoman(cent), end="", file=res) if (lang.is_ua): print(" century", end="", file=res) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print((" віка" if lang.is_ua else " века"), end="", file=res) else: print((" вік" if lang.is_ua else " век"), end="", file=res) if (is_bc): print((" до н.е." if lang.is_ua else " до н.э."), end="", file=res) return Utils.toStringStringIO(res) if (d > 0): print(d, end="", file=res) if (m > 0 and m <= 12): if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang.is_ua): print((DateReferent.__m_monthua[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0ua[m - 1]), end="", file=res) elif (lang.is_en): print(DateReferent.__m_monthen[m - 1], end="", file=res) else: print((DateReferent.__m_month[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0[m - 1]), end="", file=res) if (y != 0): is_bc = y < 0 if (y < 0): y = (- y) if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang is not None and lang.is_en): print("{0}".format(y), end="", file=res, flush=True) elif (short_variant): print("{0}{1}".format(y, ("р" if lang.is_ua else "г")), end="", file=res, flush=True) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print("{0} {1}".format(y, ("року" if lang.is_ua else "года")), end="", file=res, flush=True) else: print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")), end="", file=res, flush=True) if (is_bc): print((" до н.е." if lang.is_ua else ("BC" if lang.is_en else " до н.э.")), end="", file=res) h = self.hour mi = self.minute se = self.second if (h >= 0 and mi >= 0): if (res.tell() > 0): print(' ', end="", file=res) print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)), end="", file=res, flush=True) if (se >= 0): print(":{0}".format("{:02d}".format(se)), end="", file=res, flush=True) if (res.tell() == 0): return "?" while Utils.getCharAtStringIO(res, res.tell() - 1) == ' ' or Utils.getCharAtStringIO(res, res.tell() - 1) == ',': Utils.setLengthStringIO(res, res.tell() - 1) return Utils.toStringStringIO(res).strip()