コード例 #1
0
ファイル: MorphCase.py プロジェクト: pullenti/PullentiPython
 def __str__(self) -> str:
     tmp_str = io.StringIO()
     if (self.is_nominative):
         print("именит.|", end="", file=tmp_str)
     if (self.is_genitive):
         print("родит.|", end="", file=tmp_str)
     if (self.is_dative):
         print("дател.|", end="", file=tmp_str)
     if (self.is_accusative):
         print("винит.|", end="", file=tmp_str)
     if (self.is_instrumental):
         print("творит.|", end="", file=tmp_str)
     if (self.is_prepositional):
         print("предлож.|", end="", file=tmp_str)
     if (self.is_vocative):
         print("зват.|", end="", file=tmp_str)
     if (self.is_partial):
         print("частич.|", end="", file=tmp_str)
     if (self.is_common):
         print("общ.|", end="", file=tmp_str)
     if (self.is_possessive):
         print("притяж.|", end="", file=tmp_str)
     if (tmp_str.tell() > 0):
         Utils.setLengthStringIO(tmp_str, tmp_str.tell() - 1)
     return Utils.toStringStringIO(tmp_str)
コード例 #2
0
 def _union(self, kw1: 'KeywordReferent', kw2: 'KeywordReferent',
            word2: str) -> None:
     self.typ = kw1.typ
     tmp = list()
     tmp2 = io.StringIO()
     for v in kw1.get_string_values(KeywordReferent.ATTR_VALUE):
         self.add_slot(KeywordReferent.ATTR_VALUE,
                       "{0} {1}".format(v, word2), False, 0)
     norms1 = kw1.get_string_values(KeywordReferent.ATTR_NORMAL)
     if (len(norms1) == 0 and kw1.child_words == 1):
         norms1 = kw1.get_string_values(KeywordReferent.ATTR_VALUE)
     norms2 = kw2.get_string_values(KeywordReferent.ATTR_NORMAL)
     if (len(norms2) == 0 and kw2.child_words == 1):
         norms2 = kw2.get_string_values(KeywordReferent.ATTR_VALUE)
     for n1 in norms1:
         for n2 in norms2:
             tmp.clear()
             tmp.extend(Utils.splitString(n1, ' ', False))
             for n in Utils.splitString(n2, ' ', False):
                 if (not n in tmp):
                     tmp.append(n)
             tmp.sort()
             Utils.setLengthStringIO(tmp2, 0)
             i = 0
             while i < len(tmp):
                 if (i > 0):
                     print(' ', end="", file=tmp2)
                 print(tmp[i], end="", file=tmp2)
                 i += 1
             self.add_slot(KeywordReferent.ATTR_NORMAL,
                           Utils.toStringStringIO(tmp2), False, 0)
     self.add_slot(KeywordReferent.ATTR_REF, kw1, False, 0)
     self.add_slot(KeywordReferent.ATTR_REF, kw2, False, 0)
コード例 #3
0
 def to_string_morph_number(number: 'MorphNumber') -> str:
     res = io.StringIO()
     if ((((number) & (MorphNumber.SINGULAR))) != (MorphNumber.UNDEFINED)):
         print("единств.|", end="", file=res)
     if ((((number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)):
         print("множеств.|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #4
0
 def to_string_morph_form(form: 'MorphForm') -> str:
     res = io.StringIO()
     if ((((form) & (MorphForm.SHORT))) != (MorphForm.UNDEFINED)):
         print("кратк.|", end="", file=res)
     if ((((form) & (MorphForm.SYNONYM))) != (MorphForm.UNDEFINED)):
         print("синонимич.|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #5
0
 def __mergeLetters(self) -> None:
     before_word = False
     tmp = io.StringIO()
     t = self.first_token
     first_pass2800 = True
     while True:
         if first_pass2800: first_pass2800 = False
         else: t = t.next0_
         if (not (t is not None)): break
         tt = Utils.asObjectOrNull(t, TextToken)
         if (not tt.chars.is_letter or tt.length_char != 1): 
             before_word = False
             continue
         i = t.whitespaces_before_count
         if (i > 2 or ((i == 2 and before_word))): 
             pass
         else: 
             before_word = False
             continue
         i = 0
         Utils.setLengthStringIO(tmp, 0)
         print(tt.getSourceText(), end="", file=tmp)
         t1 = t
         while t1.next0_ is not None: 
             tt = (Utils.asObjectOrNull(t1.next0_, TextToken))
             if (tt.length_char != 1 or tt.whitespaces_before_count != 1): 
                 break
             i += 1
             print(tt.getSourceText(), end="", file=tmp)
             t1 = t1.next0_
         if (i > 3 or ((i > 1 and before_word))): 
             pass
         else: 
             before_word = False
             continue
         before_word = False
         mt = Morphology.process(Utils.toStringStringIO(tmp), None, None)
         if (mt is None or len(mt) != 1): 
             t = t1
             continue
         for wf in mt[0].word_forms: 
             if (wf.is_in_dictionary): 
                 before_word = True
                 break
         if (not before_word): 
             t = t1
             continue
         tt = TextToken(mt[0], self)
         if (t == self.first_token): 
             self.first_token = (tt)
         else: 
             tt.previous = t.previous
         tt.next0_ = t1.next0_
         tt.begin_char = t.begin_char
         tt.end_char = t1.end_char
         t = (tt)
コード例 #6
0
 def to_string_morph_aspect(aspect: 'MorphAspect') -> str:
     res = io.StringIO()
     if (((aspect) & (MorphAspect.IMPERFECTIVE)) !=
         (MorphAspect.UNDEFINED)):
         print("несоверш.|", end="", file=res)
     if (((aspect) & (MorphAspect.PERFECTIVE)) != (MorphAspect.UNDEFINED)):
         print("соверш.|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #7
0
 def to_string_morph_tense(tense: 'MorphTense') -> str:
     res = io.StringIO()
     if ((((tense) & (MorphTense.PAST))) != (MorphTense.UNDEFINED)):
         print("прошедшее|", end="", file=res)
     if ((((tense) & (MorphTense.PRESENT))) != (MorphTense.UNDEFINED)):
         print("настоящее|", end="", file=res)
     if ((((tense) & (MorphTense.FUTURE))) != (MorphTense.UNDEFINED)):
         print("будущее|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #8
0
 def to_string_morph_person(person: 'MorphPerson') -> str:
     res = io.StringIO()
     if ((((person) & (MorphPerson.FIRST))) != (MorphPerson.UNDEFINED)):
         print("1лицо|", end="", file=res)
     if ((((person) & (MorphPerson.SECOND))) != (MorphPerson.UNDEFINED)):
         print("2лицо|", end="", file=res)
     if ((((person) & (MorphPerson.THIRD))) != (MorphPerson.UNDEFINED)):
         print("3лицо|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #9
0
 def to_string_morph_gender(gender: 'MorphGender') -> str:
     res = io.StringIO()
     if ((((gender) & (MorphGender.MASCULINE))) != (MorphGender.UNDEFINED)):
         print("муж.|", end="", file=res)
     if ((((gender) & (MorphGender.FEMINIE))) != (MorphGender.UNDEFINED)):
         print("жен.|", end="", file=res)
     if ((((gender) & (MorphGender.NEUTER))) != (MorphGender.UNDEFINED)):
         print("средн.|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #10
0
 def to_string_morph_voice(voice: 'MorphVoice') -> str:
     res = io.StringIO()
     if ((((voice) & (MorphVoice.ACTIVE))) != (MorphVoice.UNDEFINED)):
         print("действит.|", end="", file=res)
     if ((((voice) & (MorphVoice.PASSIVE))) != (MorphVoice.UNDEFINED)):
         print("страдат.|", end="", file=res)
     if ((((voice) & (MorphVoice.MIDDLE))) != (MorphVoice.UNDEFINED)):
         print("средн.|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #11
0
 def to_string_morph_mood(mood: 'MorphMood') -> str:
     res = io.StringIO()
     if ((((mood) & (MorphMood.INDICATIVE))) != (MorphMood.UNDEFINED)):
         print("изъявит.|", end="", file=res)
     if ((((mood) & (MorphMood.IMPERATIVE))) != (MorphMood.UNDEFINED)):
         print("повелит.|", end="", file=res)
     if ((((mood) & (MorphMood.SUBJUNCTIVE))) != (MorphMood.UNDEFINED)):
         print("условн.|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #12
0
 def to_string_morph_finite(finit: 'MorphFinite') -> str:
     res = io.StringIO()
     if ((((finit) & (MorphFinite.FINITE))) != (MorphFinite.UNDEFINED)):
         print("finite|", end="", file=res)
     if ((((finit) & (MorphFinite.GERUND))) != (MorphFinite.UNDEFINED)):
         print("gerund|", end="", file=res)
     if ((((finit) & (MorphFinite.INFINITIVE))) != (MorphFinite.UNDEFINED)):
         print("инфинитив|", end="", file=res)
     if ((((finit) & (MorphFinite.PARTICIPLE))) != (MorphFinite.UNDEFINED)):
         print("participle|", end="", file=res)
     if (res.tell() > 0):
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res)
コード例 #13
0
 def __str__(self) -> str:
     tmp_str = io.StringIO()
     if (self.is_ru):
         print("RU;", end="", file=tmp_str)
     if (self.is_ua):
         print("UA;", end="", file=tmp_str)
     if (self.is_by):
         print("BY;", end="", file=tmp_str)
     if (self.is_en):
         print("EN;", end="", file=tmp_str)
     if (self.is_it):
         print("IT;", end="", file=tmp_str)
     if (self.is_kz):
         print("KZ;", end="", file=tmp_str)
     if (tmp_str.tell() > 0):
         Utils.setLengthStringIO(tmp_str, tmp_str.tell() - 1)
     return Utils.toStringStringIO(tmp_str)
コード例 #14
0
 def attachbbk(t0: 'Token') -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     digs = 0
     t = t0
     first_pass3418 = True
     while True:
         if first_pass3418: first_pass3418 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t != t0):
             break
         if (t.is_table_control_char):
             break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             if (nt.typ != NumberSpellingType.DIGIT
                     or not nt.morph.class0_.is_undefined):
                 break
             d = nt.get_source_text()
             print(d, end="", file=txt)
             digs += len(d)
             t1 = t
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         if (tt.is_char(',')):
             break
         if (tt.is_char('(')):
             if (not (isinstance(tt.next0_, NumberToken))):
                 break
         s = tt.get_source_text()
         if (str.isalpha(s[0])):
             if (tt.is_whitespace_before):
                 break
         print(s, end="", file=txt)
         t1 = t
     if ((txt.tell() < 3) or (digs < 2)):
         return None
     if (Utils.getCharAtStringIO(txt, txt.tell() - 1) == '.'):
         Utils.setLengthStringIO(txt, txt.tell() - 1)
         t1 = t1.previous
     return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
コード例 #15
0
ファイル: MorphClass.py プロジェクト: pullenti/PullentiPython
 def __str__(self) -> str:
     tmp_str = io.StringIO()
     if (self.is_noun): 
         print("существ.|", end="", file=tmp_str)
     if (self.is_adjective): 
         print("прилаг.|", end="", file=tmp_str)
     if (self.is_verb): 
         print("глагол|", end="", file=tmp_str)
     if (self.is_adverb): 
         print("наречие|", end="", file=tmp_str)
     if (self.is_pronoun): 
         print("местоим.|", end="", file=tmp_str)
     if (self.is_misc): 
         if (self.is_conjunction or self.is_preposition or self.is_proper): 
             pass
         else: 
             print("разное|", end="", file=tmp_str)
     if (self.is_preposition): 
         print("предлог|", end="", file=tmp_str)
     if (self.is_conjunction): 
         print("союз|", end="", file=tmp_str)
     if (self.is_proper): 
         print("собств.|", end="", file=tmp_str)
     if (self.is_proper_surname): 
         print("фамилия|", end="", file=tmp_str)
     if (self.is_proper_name): 
         print("имя|", end="", file=tmp_str)
     if (self.is_proper_secname): 
         print("отч.|", end="", file=tmp_str)
     if (self.is_proper_geo): 
         print("геогр.|", end="", file=tmp_str)
     if (self.is_personal_pronoun): 
         print("личн.местоим.|", end="", file=tmp_str)
     if (tmp_str.tell() > 0): 
         Utils.setLengthStringIO(tmp_str, tmp_str.tell() - 1)
     return Utils.toStringStringIO(tmp_str)
コード例 #16
0
ファイル: MorphEngine.py プロジェクト: AAA1911/PullentiPython
 def correct_word_by_morph(self, word: str) -> str:
     vars0_ = list()
     tmp = Utils.newStringIO(len(word))
     ch = 1
     while ch < len(word):
         Utils.setLengthStringIO(tmp, 0)
         print(word, end="", file=tmp)
         Utils.setCharAtStringIO(tmp, ch, '*')
         var = self.__check_corr_var(Utils.toStringStringIO(tmp),
                                     self.m_root, 0)
         if (var is not None):
             if (not var in vars0_):
                 vars0_.append(var)
         ch += 1
     if (len(vars0_) == 0):
         ch = 1
         while ch < len(word):
             Utils.setLengthStringIO(tmp, 0)
             print(word, end="", file=tmp)
             Utils.insertStringIO(tmp, ch, '*')
             var = self.__check_corr_var(Utils.toStringStringIO(tmp),
                                         self.m_root, 0)
             if (var is not None):
                 if (not var in vars0_):
                     vars0_.append(var)
             ch += 1
     if (len(vars0_) == 0):
         ch = 1
         while ch < (len(word) - 1):
             Utils.setLengthStringIO(tmp, 0)
             print(word, end="", file=tmp)
             Utils.removeStringIO(tmp, ch, 1)
             var = self.__check_corr_var(Utils.toStringStringIO(tmp),
                                         self.m_root, 0)
             if (var is not None):
                 if (not var in vars0_):
                     vars0_.append(var)
             ch += 1
     if (len(vars0_) != 1):
         return None
     return vars0_[0]
コード例 #17
0
ファイル: UriAnalyzer.py プロジェクト: MihaJjDa/APCLtask
 def process(self, kit : 'AnalysisKit') -> None:
     """ Основная функция выделения объектов
     
     Args:
         container: 
         lastStage: 
     
     """
     ad = kit.getAnalyzerData(self)
     t = kit.first_token
     first_pass3149 = True
     while True:
         if first_pass3149: first_pass3149 = False
         else: t = t.next0_
         if (not (t is not None)): break
         tt = t
         tok = UriAnalyzer.__m_schemes.tryParse(t, TerminParseAttr.NO)
         if (tok is not None): 
             i = (tok.termin.tag)
             tt = tok.end_token
             if (tt.next0_ is not None and tt.next0_.isChar('(')): 
                 tok1 = UriAnalyzer.__m_schemes.tryParse(tt.next0_.next0_, TerminParseAttr.NO)
                 if ((tok1 is not None and tok1.termin.canonic_text == tok.termin.canonic_text and tok1.end_token.next0_ is not None) and tok1.end_token.next0_.isChar(')')): 
                     tt = tok1.end_token.next0_
             if (i == 0): 
                 if ((tt.next0_ is None or ((not tt.next0_.isCharOf(":|") and not tt.is_table_control_char)) or tt.next0_.is_whitespace_before) or tt.next0_.whitespaces_after_count > 2): 
                     continue
                 t1 = tt.next0_.next0_
                 while t1 is not None and t1.isCharOf("/\\"):
                     t1 = t1.next0_
                 if (t1 is None or t1.whitespaces_before_count > 2): 
                     continue
                 ut = UriItemToken.attachUriContent(t1, False)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok.termin.canonic_text.lower(), ut.value)), UriReferent)
                 rt = ReferentToken(ad.registerReferent(ur), t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 10): 
                 tt = tt.next0_
                 if (tt is None or not tt.isChar(':')): 
                     continue
                 tt = tt.next0_
                 while tt is not None: 
                     if (tt.isCharOf("/\\")): 
                         pass
                     else: 
                         break
                     tt = tt.next0_
                 if (tt is None): 
                     continue
                 if (tt.isValue("WWW", None) and tt.next0_ is not None and tt.next0_.isChar('.')): 
                     tt = tt.next0_.next0_
                 if (tt is None or tt.is_newline_before): 
                     continue
                 ut = UriItemToken.attachUriContent(tt, True)
                 if (ut is None): 
                     continue
                 if (len(ut.value) < 4): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok.termin.canonic_text.lower(), ut.value)), UriReferent)
                 rt = ReferentToken(ad.registerReferent(ur), t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 2): 
                 if (tt.next0_ is None or not tt.next0_.isChar('.') or tt.next0_.is_whitespace_before): 
                     continue
                 if (tt.next0_.is_whitespace_after and tok.termin.canonic_text != "WWW"): 
                     continue
                 ut = UriItemToken.attachUriContent(tt.next0_.next0_, True)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", ut.value)), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 1): 
                 sch = tok.termin.canonic_text
                 ut = None
                 if (sch == "ISBN"): 
                     ut = UriItemToken.attachISBN(tt.next0_)
                     if ((ut is None and t.previous is not None and t.previous.isChar('(')) and t.next0_ is not None and t.next0_.isChar(')')): 
                         tt0 = t.previous.previous
                         while tt0 is not None: 
                             if (tt0.whitespaces_after_count > 2): 
                                 break
                             if (tt0.is_whitespace_before): 
                                 ut = UriItemToken.attachISBN(tt0)
                                 if (ut is not None and ut.end_token.next0_ != t.previous): 
                                     ut = (None)
                                 break
                             tt0 = tt0.previous
                 elif ((sch == "RFC" or sch == "ISO" or sch == "ОКФС") or sch == "ОКОПФ"): 
                     ut = UriItemToken.attachISOContent(tt.next0_, ":")
                 elif (sch == "ГОСТ"): 
                     ut = UriItemToken.attachISOContent(tt.next0_, "-.")
                 elif (sch == "ТУ"): 
                     if (tok.chars.is_all_upper): 
                         ut = UriItemToken.attachISOContent(tt.next0_, "-.")
                         if (ut is not None and (ut.length_char < 10)): 
                             ut = (None)
                 else: 
                     ut = UriItemToken.attachBBK(tt.next0_)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, sch)), UriReferent)
                 if (ut.begin_char < t.begin_char): 
                     rt = ReferentToken(ur, ut.begin_token, t)
                     if (t.next0_ is not None and t.next0_.isChar(')')): 
                         rt.end_token = t.next0_
                 else: 
                     rt = ReferentToken(ur, t, ut.end_token)
                 if (t.previous is not None and t.previous.isValue("КОД", None)): 
                     rt.begin_token = t.previous
                 if (ur.scheme.startswith("ОК")): 
                     UriAnalyzer.__checkDetail(rt)
                 kit.embedToken(rt)
                 t = (rt)
                 if (ur.scheme.startswith("ОК")): 
                     while t.next0_ is not None:
                         if (t.next0_.is_comma_and and (isinstance(t.next0_.next0_, NumberToken))): 
                             pass
                         else: 
                             break
                         ut = UriItemToken.attachBBK(t.next0_.next0_)
                         if (ut is None): 
                             break
                         ur = (Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, sch)), UriReferent))
                         rt = ReferentToken(ur, t.next0_.next0_, ut.end_token)
                         UriAnalyzer.__checkDetail(rt)
                         kit.embedToken(rt)
                         t = (rt)
                 continue
             if (i == 3): 
                 t0 = tt.next0_
                 while t0 is not None:
                     if (t0.isCharOf(":|") or t0.is_table_control_char or t0.is_hiphen): 
                         t0 = t0.next0_
                     else: 
                         break
                 if (t0 is None): 
                     continue
                 ut = UriItemToken.attachSkype(t0)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value.lower(), ("skype" if tok.termin.canonic_text == "SKYPE" else tok.termin.canonic_text))), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 4): 
                 t0 = tt.next0_
                 if (t0 is not None and ((t0.isChar(':') or t0.is_hiphen))): 
                     t0 = t0.next0_
                 if (t0 is None): 
                     continue
                 ut = UriItemToken.attachIcqContent(t0)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, "ICQ")), UriReferent)
                 rt = ReferentToken(ur, t, t0)
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 5 or i == 6): 
                 t0 = tt.next0_
                 has_tab_cel = False
                 is_iban = False
                 first_pass3150 = True
                 while True:
                     if first_pass3150: first_pass3150 = False
                     else: t0 = t0.next0_
                     if (not (t0 is not None)): break
                     if ((((t0.isValue("БАНК", None) or t0.morph.class0_.is_preposition or t0.is_hiphen) or t0.isCharOf(".:") or t0.isValue("РУБЛЬ", None)) or t0.isValue("РУБ", None) or t0.isValue("ДОЛЛАР", None)) or t0.isValue("№", None) or t0.isValue("N", None)): 
                         pass
                     elif (t0.is_table_control_char): 
                         has_tab_cel = True
                     elif (t0.isCharOf("\\/") and t0.next0_ is not None and t0.next0_.isValue("IBAN", None)): 
                         is_iban = True
                         t0 = t0.next0_
                     elif (t0.isValue("IBAN", None)): 
                         is_iban = True
                     elif (isinstance(t0, TextToken)): 
                         npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO, 0)
                         if (npt is not None and npt.morph.case_.is_genitive): 
                             t0 = npt.end_token
                             continue
                         break
                     else: 
                         break
                 if (t0 is None): 
                     continue
                 ur2 = None
                 ur2begin = None
                 ur2end = None
                 t00 = t0
                 val = t0.getSourceText()
                 if (str.isdigit(val[0]) and ((((i == 6 or tok.termin.canonic_text == "ИНН" or tok.termin.canonic_text == "БИК") or tok.termin.canonic_text == "ОГРН" or tok.termin.canonic_text == "СНИЛС") or tok.termin.canonic_text == "ОКПО"))): 
                     if (t0.chars.is_letter): 
                         continue
                     if (Utils.isNullOrEmpty(val) or not str.isdigit(val[0])): 
                         continue
                     if (t0.length_char < 9): 
                         tmp = io.StringIO()
                         print(val, end="", file=tmp)
                         ttt = t0.next0_
                         first_pass3151 = True
                         while True:
                             if first_pass3151: first_pass3151 = False
                             else: ttt = ttt.next0_
                             if (not (ttt is not None)): break
                             if (ttt.whitespaces_before_count > 1): 
                                 break
                             if (isinstance(ttt, NumberToken)): 
                                 print(ttt.getSourceText(), end="", file=tmp)
                                 t0 = ttt
                                 continue
                             if (ttt.is_hiphen or ttt.isChar('.')): 
                                 if (ttt.next0_ is None or not ((isinstance(ttt.next0_, NumberToken)))): 
                                     break
                                 if (ttt.is_whitespace_after or ttt.is_whitespace_before): 
                                     break
                                 continue
                             break
                         val = (None)
                         if (tmp.tell() == 20): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() == 9 and tok.termin.canonic_text == "БИК"): 
                             val = Utils.toStringStringIO(tmp)
                         elif (((tmp.tell() == 10 or tmp.tell() == 12)) and tok.termin.canonic_text == "ИНН"): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() >= 15 and tok.termin.canonic_text == "Л/С"): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() >= 11 and ((tok.termin.canonic_text == "ОГРН" or tok.termin.canonic_text == "СНИЛС"))): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tok.termin.canonic_text == "ОКПО"): 
                             val = Utils.toStringStringIO(tmp)
                     if (val is None): 
                         continue
                 elif (not ((isinstance(t0, NumberToken)))): 
                     if ((isinstance(t0, TextToken)) and is_iban): 
                         tmp1 = io.StringIO()
                         t1 = None
                         ttt = t0
                         first_pass3152 = True
                         while True:
                             if first_pass3152: first_pass3152 = False
                             else: ttt = ttt.next0_
                             if (not (ttt is not None)): break
                             if (ttt.is_newline_before and ttt != t0): 
                                 break
                             if (ttt.is_hiphen): 
                                 continue
                             if (not ((isinstance(ttt, NumberToken)))): 
                                 if (not ((isinstance(ttt, TextToken))) or not ttt.chars.is_latin_letter): 
                                     break
                             print(ttt.getSourceText(), end="", file=tmp1)
                             t1 = ttt
                             if (tmp1.tell() >= 34): 
                                 break
                         if (tmp1.tell() < 10): 
                             continue
                         ur1 = UriReferent._new2560(Utils.toStringStringIO(tmp1), tok.termin.canonic_text)
                         ur1.addSlot(UriReferent.ATTR_DETAIL, "IBAN", False, 0)
                         rt1 = ReferentToken(ad.registerReferent(ur1), t, t1)
                         kit.embedToken(rt1)
                         t = (rt1)
                         continue
                     if (not t0.isCharOf("/\\") or t0.next0_ is None): 
                         continue
                     tok2 = UriAnalyzer.__m_schemes.tryParse(t0.next0_, TerminParseAttr.NO)
                     if (tok2 is None or not ((isinstance(tok2.termin.tag, int))) or (tok2.termin.tag) != i): 
                         continue
                     t0 = tok2.end_token.next0_
                     while t0 is not None:
                         if (t0.isCharOf(":N№")): 
                             t0 = t0.next0_
                         elif (t0.is_table_control_char): 
                             t0 = t0.next0_
                             t00 = t0
                             has_tab_cel = True
                         else: 
                             break
                     if (not ((isinstance(t0, NumberToken)))): 
                         continue
                     tmp = io.StringIO()
                     while t0 is not None: 
                         if (not ((isinstance(t0, NumberToken)))): 
                             break
                         print(t0.getSourceText(), end="", file=tmp)
                         t0 = t0.next0_
                     if (t0 is None or not t0.isCharOf("/\\,") or not ((isinstance(t0.next0_, NumberToken)))): 
                         continue
                     val = Utils.toStringStringIO(tmp)
                     Utils.setLengthStringIO(tmp, 0)
                     ur2begin = t0.next0_
                     t0 = t0.next0_
                     while t0 is not None: 
                         if (not ((isinstance(t0, NumberToken)))): 
                             break
                         if (t0.whitespaces_before_count > 4 and tmp.tell() > 0): 
                             break
                         print(t0.getSourceText(), end="", file=tmp)
                         ur2end = t0
                         t0 = t0.next0_
                     ur2 = (Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok2.termin.canonic_text, Utils.toStringStringIO(tmp))), UriReferent))
                 if (len(val) < 5): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(val, tok.termin.canonic_text)), UriReferent)
                 rt = ReferentToken(ur, t, (t0 if ur2begin is None else ur2begin.previous))
                 if (has_tab_cel): 
                     rt.begin_token = t00
                 if (ur.scheme.startswith("ОК")): 
                     UriAnalyzer.__checkDetail(rt)
                 ttt = t.previous
                 first_pass3153 = True
                 while True:
                     if first_pass3153: first_pass3153 = False
                     else: ttt = ttt.previous
                     if (not (ttt is not None)): break
                     if (ttt.is_table_control_char): 
                         break
                     if (ttt.morph.class0_.is_preposition): 
                         continue
                     if (ttt.isValue("ОРГАНИЗАЦИЯ", None)): 
                         continue
                     if (ttt.isValue("НОМЕР", None) or ttt.isValue("КОД", None)): 
                         rt.begin_token = ttt
                         t = rt.begin_token
                     break
                 kit.embedToken(rt)
                 t = (rt)
                 if (ur2 is not None): 
                     rt2 = ReferentToken(ur2, ur2begin, ur2end)
                     kit.embedToken(rt2)
                     t = (rt2)
                 continue
             continue
         if (t.isChar('@')): 
             u1s = UriItemToken.attachMailUsers(t.previous)
             if (u1s is None): 
                 continue
             u2 = UriItemToken.attachDomainName(t.next0_, False, True)
             if (u2 is None): 
                 continue
             for ii in range(len(u1s) - 1, -1, -1):
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560("{0}@{1}".format(u1s[ii].value, u2.value).lower(), "mailto")), UriReferent)
                 b = u1s[ii].begin_token
                 t0 = b.previous
                 if (t0 is not None and t0.isChar(':')): 
                     t0 = t0.previous
                 if (t0 is not None and ii == 0): 
                     br = False
                     ttt = t0
                     first_pass3154 = True
                     while True:
                         if first_pass3154: first_pass3154 = False
                         else: ttt = ttt.previous
                         if (not (ttt is not None)): break
                         if (not ((isinstance(ttt, TextToken)))): 
                             break
                         if (ttt != t0 and ttt.whitespaces_after_count > 1): 
                             break
                         if (ttt.isChar(')')): 
                             br = True
                             continue
                         if (ttt.isChar('(')): 
                             if (not br): 
                                 break
                             br = False
                             continue
                         if (ttt.isValue("EMAIL", None) or ttt.isValue("MAILTO", None)): 
                             b = ttt
                             break
                         if (ttt.isValue("MAIL", None)): 
                             b = ttt
                             if ((ttt.previous is not None and ttt.previous.is_hiphen and ttt.previous.previous is not None) and ((ttt.previous.previous.isValue("E", None) or ttt.previous.previous.isValue("Е", None)))): 
                                 b = ttt.previous.previous
                             break
                         if (ttt.isValue("ПОЧТА", None) or ttt.isValue("АДРЕС", None)): 
                             b = t0
                             ttt = ttt.previous
                             if (ttt is not None and ttt.isChar('.')): 
                                 ttt = ttt.previous
                             if (ttt is not None and ((t0.isValue("ЭЛ", None) or ttt.isValue("ЭЛЕКТРОННЫЙ", None)))): 
                                 b = ttt
                             if (b.previous is not None and b.previous.isValue("АДРЕС", None)): 
                                 b = b.previous
                             break
                         if (ttt.morph.class0_.is_preposition): 
                             continue
                 rt = ReferentToken(ur, b, (u2.end_token if ii == (len(u1s) - 1) else u1s[ii].end_token))
                 kit.embedToken(rt)
                 t = (rt)
             continue
         if (not t.morph.language.is_cyrillic): 
             if (t.is_whitespace_before or ((t.previous is not None and t.previous.isCharOf(",(")))): 
                 u1 = UriItemToken.attachUrl(t)
                 if (u1 is not None): 
                     if (u1.is_whitespace_after or u1.end_token.next0_ is None or not u1.end_token.next0_.isChar('@')): 
                         ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", u1.value)), UriReferent)
                         rt = ReferentToken(ur, u1.begin_token, u1.end_token)
                         rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(u1.begin_token.previous), u1.begin_token)
                         kit.embedToken(rt)
                         t = (rt)
                         continue
         if ((isinstance(t, TextToken)) and not t.is_whitespace_after and t.length_char > 2): 
             if (UriAnalyzer.__siteBefore(t.previous) is not None): 
                 ut = UriItemToken.attachUriContent(t, True)
                 if (ut is None or ut.value.find('.') <= 0 or ut.value.find('@') > 0): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", ut.value)), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 rt.begin_token = UriAnalyzer.__siteBefore(t.previous)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
         if ((t.chars.is_latin_letter and not t.chars.is_all_lower and t.next0_ is not None) and not t.is_whitespace_after): 
             if (t.next0_.isChar('/')): 
                 rt = UriAnalyzer.__TryAttachLotus(Utils.asObjectOrNull(t, TextToken))
                 if (rt is not None): 
                     rt.referent = ad.registerReferent(rt.referent)
                     kit.embedToken(rt)
                     t = (rt)
                     continue
コード例 #18
0
 def get_variants(rus_or_lat: str) -> typing.List[str]:
     res = list()
     if (Utils.isNullOrEmpty(rus_or_lat)):
         return res
     rus_or_lat = rus_or_lat.upper()
     is_rus = LanguageHelper.is_cyrillic_char(rus_or_lat[0])
     stack = list()
     i = 0
     i = 0
     while i < len(rus_or_lat):
         li = list()
         maxlen = 0
         for a in RusLatAccord.__get_accords():
             pref = None
             if (is_rus and len(a.rus) > 0):
                 pref = a.rus
             elif (not is_rus and len(a.lat) > 0):
                 pref = a.lat
             else:
                 continue
             if (len(pref) < maxlen):
                 continue
             if (not RusLatAccord.__is_pref(rus_or_lat, i, pref)):
                 continue
             if (a.on_tail):
                 if ((len(pref) + i) < len(rus_or_lat)):
                     continue
             if (len(pref) > maxlen):
                 maxlen = len(pref)
                 li.clear()
             li.append(a)
         if (len(li) == 0 or maxlen == 0):
             return res
         stack.append(li)
         i += (maxlen - 1)
         i += 1
     if (len(stack) == 0):
         return res
     ind = list()
     i = 0
     while i < len(stack):
         ind.append(0)
         i += 1
     tmp = io.StringIO()
     while True:
         Utils.setLengthStringIO(tmp, 0)
         i = 0
         while i < len(ind):
             a = stack[i][ind[i]]
             print((a.lat if is_rus else a.rus), end="", file=tmp)
             i += 1
         ok = True
         if (not is_rus):
             i = 0
             while i < tmp.tell():
                 if (Utils.getCharAtStringIO(tmp, i) == 'Й'):
                     if (i == 0):
                         ok = False
                         break
                     if (not LanguageHelper.is_cyrillic_vowel(
                             Utils.getCharAtStringIO(tmp, i - 1))):
                         ok = False
                         break
                 i += 1
         if (ok):
             res.append(Utils.toStringStringIO(tmp))
         for i in range(len(ind) - 1, -1, -1):
             ind[i] += 1
             if (ind[i] < len(stack[i])):
                 break
             else:
                 ind[i] = 0
         else:
             i = -1
         if (i < 0):
             break
     return res
コード例 #19
0
 def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken':
     if (sli is None or len(sli) == 0): 
         return None
     i = 0
     while i < len(sli): 
         if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): 
             return StreetDefineHelper.__tryParseFix(sli)
         elif (sli[i].typ == StreetItemType.NOUN): 
             if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): 
                 sli[i + 1].begin_token = sli[i].begin_token
                 del sli[i]
             if (sli[i].termin.canonic_text == "МЕТРО"): 
                 if ((i + 1) < len(sli)): 
                     sli1 = list()
                     ii = i + 1
                     while ii < len(sli): 
                         sli1.append(sli[ii])
                         ii += 1
                     str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True)
                     if (str1 is not None): 
                         str1.begin_token = sli[i].begin_token
                         str1.is_doubt = sli[i].is_abridge
                         if (sli[i + 1].is_in_brackets): 
                             str1.is_doubt = False
                         return str1
                 elif (i == 1 and sli[0].typ == StreetItemType.NAME): 
                     for_metro = True
                     break
                 if (i == 0 and len(sli) > 0): 
                     for_metro = True
                     break
                 return None
             if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): 
                 stri0 = StreetReferent()
                 stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0)
                 stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0)
                 return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True)
             if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): 
                 stri0 = StreetReferent()
                 stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0)
                 return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True)
             if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): 
                 tt = sli[i].end_token.next0_
                 if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): 
                     tt = tt.next0_
                 nex = NumberHelper.tryParseNumberWithPostfix(tt)
                 if (nex is not None): 
                     return None
             break
         i += 1
     if (i >= len(sli)): 
         return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro)
     name = None
     number = None
     age = None
     adj = None
     noun = sli[i]
     alt_noun = None
     is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")
     before = 0
     after = 0
     j = 0
     while j < i: 
         if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): 
             before += 1
         elif (sli[j].typ == StreetItemType.NUMBER): 
             if (sli[j].is_newline_after): 
                 return None
             if (sli[j].number.morph.class0_.is_adjective): 
                 before += 1
             elif (is_micro_raion): 
                 before += 1
             elif (sli[i].number_has_prefix): 
                 before += 1
         else: 
             before += 1
         j += 1
     j = (i + 1)
     while j < len(sli): 
         if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): 
             after += 1
         elif (sli[j].typ == StreetItemType.NUMBER): 
             if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): 
                 after += 1
             elif (is_micro_raion): 
                 after += 1
             elif (sli[j].number_has_prefix): 
                 after += 1
             elif (ext_onto_regim): 
                 after += 1
         elif (sli[j].typ == StreetItemType.NOUN): 
             break
         else: 
             after += 1
         j += 1
     rli = list()
     if (before > after): 
         if (noun.termin.canonic_text == "МЕТРО"): 
             return None
         tt = sli[0].begin_token
         if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): 
             if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): 
                 if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): 
                     ok = False
                     if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): 
                         ok = True
                     elif (noun.end_token.next0_ is None): 
                         ok = True
                     elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): 
                         ok = True
                     if (not ok): 
                         if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): 
                             ok = True
                     if (not ok): 
                         return None
         n0 = 0
         n1 = (i - 1)
     elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): 
         if (not sli[0].is_whitespace_after): 
             return None
         number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value))
         if (sli[0].is_number_km): 
             number += "км"
         n0 = (i + 1)
         n1 = (len(sli) - 1)
         rli.append(sli[0])
         rli.append(sli[i])
     elif (after > before): 
         n0 = (i + 1)
         n1 = (len(sli) - 1)
         rli.append(sli[i])
     elif (after == 0): 
         return None
     elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): 
         n0 = 0
         n1 = 0
         num = False
         tt2 = sli[2].end_token.next0_
         if (sli[2].is_number_km): 
             num = True
         elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): 
             sli[2].is_number_km = True
             num = True
         elif (sli[2].begin_token.previous.is_comma): 
             pass
         elif (sli[2].begin_token != sli[2].end_token): 
             num = True
         elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): 
             num = True
         elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): 
             if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): 
                 num = True
         if (num): 
             number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value))
             if (sli[2].is_number_km): 
                 number += "км"
             rli.append(sli[2])
         else: 
             del sli[2:2+len(sli) - 2]
     else: 
         return None
     sec_number = None
     j = n0
     first_pass2732 = True
     while True:
         if first_pass2732: first_pass2732 = False
         else: j += 1
         if (not (j <= n1)): break
         if (sli[j].typ == StreetItemType.NUMBER): 
             if (age is not None or ((sli[j].is_newline_before and j > 0))): 
                 break
             if (number is not None): 
                 if (name is not None and name.typ == StreetItemType.STDNAME): 
                     sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
                     if (sli[j].is_number_km): 
                         sec_number += "км"
                     rli.append(sli[j])
                     continue
                 if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): 
                     sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
                     if (sli[j].is_number_km): 
                         sec_number += "км"
                     rli.append(sli[j])
                     continue
                 break
             if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): 
                 if (sli[j].whitespaces_before_count > 2 and j > 0): 
                     break
                 if (sli[j].number is not None and sli[j].number.int_value > 20): 
                     if (j > n0): 
                         if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): 
                             pass
                         else: 
                             break
                 if (j == n0 and n0 > 0): 
                     pass
                 elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): 
                     pass
                 elif (sli[j].number_has_prefix): 
                     pass
                 elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): 
                     pass
                 else: 
                     break
             number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
             if (sli[j].is_number_km): 
                 number += "км"
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.AGE): 
             if (number is not None or age is not None): 
                 break
             age = str(sli[j].number.int_value)
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.STDADJECTIVE): 
             if (adj is not None): 
                 return None
             adj = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): 
             if (name is not None): 
                 if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): 
                     break
                 elif (i < j): 
                     break
                 else: 
                     return None
             name = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): 
             if (name is not None): 
                 break
             name = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.NOUN): 
             if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): 
                 alt_noun = noun
                 noun = sli[j]
                 rli.append(sli[j])
             else: 
                 break
     if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): 
         number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value))
         rli.append(sli[i + 1])
     elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): 
         alt_noun = noun
         noun = sli[j]
         rli.append(sli[j])
     if (name is None): 
         if (number is None and adj is None): 
             return None
         if (noun.is_abridge): 
             if (is_micro_raion): 
                 pass
             elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): 
                 pass
             elif (adj is None or adj.is_abridge): 
                 return None
         if (adj is not None and adj.is_abridge): 
             return None
     if (not sli[i] in rli): 
         rli.append(sli[i])
     street = StreetReferent()
     if (not for_metro): 
         street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0)
         if (noun.alt_termin is not None): 
             if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): 
                 pass
             else: 
                 street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0)
     else: 
         street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0)
     res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street)
     for r in rli: 
         if (res.begin_char > r.begin_char): 
             res.begin_token = r.begin_token
         if (res.end_char < r.end_char): 
             res.end_token = r.end_token
     if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): 
         rli.remove(noun)
     if (noun.is_abridge and (noun.length_char < 4)): 
         res.is_doubt = True
     elif (noun.noun_is_doubt_coef > 0): 
         res.is_doubt = True
         if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): 
             npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0)
             if (npt2 is not None and npt2.end_char > name.end_char): 
                 pass
             elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): 
                 res.is_doubt = False
             elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): 
                 res.is_doubt = False
     name_base = io.StringIO()
     name_alt = io.StringIO()
     name_alt2 = None
     gen = noun.termin.gender
     adj_gen = MorphGender.UNDEFINED
     if (number is not None): 
         street.number = number
         if (sec_number is not None): 
             street.sec_number = sec_number
     if (age is not None): 
         if (street.number is None): 
             street.number = age
         else: 
             street.sec_number = age
     if (name is not None and name.value is not None): 
         if (street.kind == StreetKind.ROAD): 
             for r in rli: 
                 if (r.typ == StreetItemType.NAME and r != name): 
                     print(r.value, end="", file=name_alt)
                     break
         if (name.alt_value is not None and name_alt.tell() == 0): 
             print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True)
         print(" {0}".format(name.value), end="", file=name_base, flush=True)
     elif (name is not None): 
         is_adj = False
         if (isinstance(name.end_token, TextToken)): 
             for wf in name.end_token.morph.items: 
                 if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): 
                     is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo)
                     adj_gen = wf.gender
                     break
                 elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): 
                     is_adj = True
         if (is_adj): 
             tmp = io.StringIO()
             vars0_ = list()
             t = name.begin_token
             while t is not None: 
                 tt = Utils.asObjectOrNull(t, TextToken)
                 if (tt is None): 
                     break
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 if (t == name.end_token): 
                     is_padez = False
                     if (not noun.is_abridge): 
                         if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): 
                             is_padez = True
                         elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): 
                             is_padez = True
                     if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): 
                         is_padez = True
                     if (not is_padez): 
                         print(tt.term, end="", file=tmp)
                         break
                     for wf in tt.morph.items: 
                         if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): 
                             if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): 
                                 wff = Utils.asObjectOrNull(wf, MorphWordForm)
                                 if (wff is None): 
                                     continue
                                 if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): 
                                     continue
                                 if (not wff.normal_case in vars0_): 
                                     vars0_.append(wff.normal_case)
                     if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): 
                         vars0_.append(tt.term)
                     if (len(vars0_) == 0): 
                         vars0_.append(tt.term)
                     break
                 if (not tt.is_hiphen): 
                     print(tt.term, end="", file=tmp)
                 t = t.next0_
             if (len(vars0_) == 0): 
                 print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True)
             else: 
                 head = Utils.toStringStringIO(name_base)
                 print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True)
                 if (len(vars0_) > 1): 
                     Utils.setLengthStringIO(name_alt, 0)
                     print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True)
                 if (len(vars0_) > 2): 
                     name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2])
         else: 
             str_nam = None
             nits = list()
             has_adj = False
             has_proper_name = False
             t = name.begin_token
             while t is not None: 
                 if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): 
                     has_adj = True
                 if ((isinstance(t, TextToken)) and not t.is_hiphen): 
                     if (name.termin is not None): 
                         nits.append(name.termin.canonic_text)
                         break
                     elif (not t.chars.is_letter and len(nits) > 0): 
                         nits[len(nits) - 1] += (t).term
                     else: 
                         nits.append((t).term)
                         if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): 
                             has_proper_name = True
                 elif ((isinstance(t, ReferentToken)) and name.termin is None): 
                     nits.append(t.getSourceText().upper())
                 if (t == name.end_token): 
                     break
                 t = t.next0_
             if (not has_adj and not has_proper_name): 
                 nits.sort()
             str_nam = Utils.joinStrings(" ", list(nits))
             if (has_proper_name and len(nits) == 2): 
                 Utils.setLengthStringIO(name_alt, 0)
                 print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True)
             print(" {0}".format(str_nam), end="", file=name_base, flush=True)
     adj_str = None
     adj_can_be_initial = False
     if (adj is not None): 
         if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): 
             if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): 
                 adj_gen = name.morph.gender
         if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL))
         elif (adj_gen != MorphGender.UNDEFINED): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen))
         elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender))
         else: 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen))
         adj_str = s
         if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): 
             if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): 
                 adj_can_be_initial = True
     s1 = Utils.toStringStringIO(name_base).strip()
     s2 = Utils.toStringStringIO(name_alt).strip()
     if (len(s1) < 3): 
         if (street.number is not None): 
             if (adj_str is not None): 
                 if (adj.is_abridge): 
                     return None
                 street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0)
         elif (adj_str is None): 
             if (len(s1) < 1): 
                 return None
             if (is_micro_raion): 
                 street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
                 if (not Utils.isNullOrEmpty(s2)): 
                     street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0)
             else: 
                 return None
         else: 
             if (adj.is_abridge): 
                 return None
             street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0)
     elif (adj_can_be_initial): 
         street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
         street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0)
         street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     elif (adj_str is None): 
         street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
     else: 
         street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     if (name_alt.tell() > 0): 
         s1 = Utils.toStringStringIO(name_alt).strip()
         if (adj_str is None): 
             street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
         else: 
             street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     if (name_alt2 is not None): 
         if (adj_str is None): 
             if (for_metro and noun is not None): 
                 street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0)
             else: 
                 street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0)
         else: 
             street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0)
     if (name is not None and name.alt_value2 is not None): 
         street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0)
     if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): 
         for n in name.exist_street.names: 
             street.addSlot(StreetReferent.ATTR_NAME, n, False, 0)
     if (alt_noun is not None and not for_metro): 
         street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0)
     if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): 
         res.is_doubt = True
         if (name is not None and name.is_in_dictionary): 
             res.is_doubt = False
         elif (alt_noun is not None or for_metro): 
             res.is_doubt = False
         elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): 
             if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): 
                 res.is_doubt = False
     if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): 
         for s in street.slots: 
             if (s.type_name == StreetReferent.ATTR_TYP): 
                 street.uploadSlot(s, "микрорайон")
             elif (s.type_name == StreetReferent.ATTR_NAME): 
                 street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value))
         if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): 
             street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0)
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma): 
         t1 = t1.next0_
     non = StreetItemToken.tryParse(t1, None, False, None, False)
     if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): 
         if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): 
             street._correct()
             nams = street.names
             for t in street.typs: 
                 for n in nams: 
                     street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0)
             street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0)
             res.end_token = non.end_token
     if (res.is_doubt): 
         if (noun.is_road): 
             if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): 
                 res.is_doubt = False
             elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): 
                 res.is_doubt = False
             elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): 
                 res.is_doubt = False
         elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): 
             res.is_doubt = False
         tt0 = res.begin_token.previous
         first_pass2733 = True
         while True:
             if first_pass2733: first_pass2733 = False
             else: tt0 = tt0.previous
             if (not (tt0 is not None)): break
             if (tt0.isCharOf(",,") or tt0.is_comma_and): 
                 continue
             str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent)
             if (str0 is not None): 
                 res.is_doubt = False
             break
     if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): 
         ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None)
         if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): 
             street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0)
             res.end_token = ait.end_token
     return res
コード例 #20
0
 def __try_attach_(self, pli : typing.List['PhoneItemToken'], ind : int, is_phone_before : bool, prev_phone : 'PhoneReferent', lev : int=0) -> 'ReferentToken':
     if (ind >= len(pli) or lev > 4): 
         return None
     country_code = None
     city_code = None
     j = ind
     if (prev_phone is not None and prev_phone._m_template is not None and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
         tmp = io.StringIO()
         jj = j
         first_pass3391 = True
         while True:
             if first_pass3391: first_pass3391 = False
             else: jj += 1
             if (not (jj < len(pli))): break
             if (pli[jj].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
                 print(len(pli[jj].value), end="", file=tmp)
             elif (pli[jj].item_type == PhoneItemToken.PhoneItemType.DELIM): 
                 if (pli[jj].value == " "): 
                     break
                 print(pli[jj].value, end="", file=tmp)
                 continue
             else: 
                 break
             templ0 = Utils.toStringStringIO(tmp)
             if (templ0 == prev_phone._m_template): 
                 if ((jj + 1) < len(pli)): 
                     if (pli[jj + 1].item_type == PhoneItemToken.PhoneItemType.PREFIX and (jj + 2) == len(pli)): 
                         pass
                     else: 
                         del pli[jj + 1:jj + 1+len(pli) - jj - 1]
                 break
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE): 
         country_code = pli[j].value
         if (country_code != "8"): 
             cc = PhoneHelper.get_country_prefix(country_code)
             if (cc is not None and (len(cc) < len(country_code))): 
                 city_code = country_code[len(cc):]
                 country_code = cc
         j += 1
     elif ((j < len(pli)) and pli[j].can_be_country_prefix): 
         k = j + 1
         if ((k < len(pli)) and pli[k].item_type == PhoneItemToken.PhoneItemType.DELIM): 
             k += 1
         rrt = self.__try_attach_(pli, k, is_phone_before, None, lev + 1)
         if (rrt is not None): 
             if ((((is_phone_before and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM and pli[j + 1].begin_token.is_hiphen) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and len(pli[j].value) == 3) and ((j + 2) < len(pli)) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER) and len(pli[j + 2].value) == 3): 
                 pass
             else: 
                 country_code = pli[j].value
                 j += 1
     if (((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and ((pli[j].value[0] == '8' or pli[j].value[0] == '7'))) and country_code is None): 
         if (len(pli[j].value) == 1): 
             country_code = pli[j].value
             j += 1
         elif (len(pli[j].value) == 4): 
             country_code = pli[j].value[0:0+1]
             if (city_code is None): 
                 city_code = pli[j].value[1:]
             else: 
                 city_code += pli[j].value[1:]
             j += 1
         elif (len(pli[j].value) == 11 and j == (len(pli) - 1) and is_phone_before): 
             ph0 = PhoneReferent()
             if (pli[j].value[0] != '8'): 
                 ph0.country_code = pli[j].value[0:0+1]
             ph0.number = pli[j].value[1:1+3] + pli[j].value[4:]
             return ReferentToken(ph0, pli[0].begin_token, pli[j].end_token)
         elif (city_code is None and len(pli[j].value) > 3 and ((j + 1) < len(pli))): 
             sum0_ = 0
             for it in pli: 
                 if (it.item_type == PhoneItemToken.PhoneItemType.NUMBER): 
                     sum0_ += len(it.value)
             if (sum0_ == 11): 
                 city_code = pli[j].value[1:]
                 j += 1
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.CITYCODE): 
         if (city_code is None): 
             city_code = pli[j].value
         else: 
             city_code += pli[j].value
         j += 1
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): 
         j += 1
     if ((country_code == "8" and city_code is None and ((j + 3) < len(pli))) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
         if (len(pli[j].value) == 3 or len(pli[j].value) == 4): 
             city_code = pli[j].value
             j += 1
             if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): 
                 j += 1
     normal_num_len = 0
     if (country_code == "421"): 
         normal_num_len = 9
     num = io.StringIO()
     templ = io.StringIO()
     part_length = list()
     delim = None
     ok = False
     additional = None
     std = False
     if (country_code is not None and ((j + 4) < len(pli)) and j > 0): 
         if (((((pli[j - 1].value == "-" or pli[j - 1].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 3].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 4].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
             if ((((len(pli[j].value) + len(pli[j + 2].value)) == 6 or ((len(pli[j].value) == 4 and len(pli[j + 2].value) == 5)))) and ((len(pli[j + 4].value) == 4 or len(pli[j + 4].value) == 1))): 
                 print(pli[j].value, end="", file=num)
                 print(pli[j + 2].value, end="", file=num)
                 print(pli[j + 4].value, end="", file=num)
                 print("{0}{1}{2}{3}{4}".format(len(pli[j].value), pli[j + 1].value, len(pli[j + 2].value), pli[j + 3].value, len(pli[j + 4].value)), end="", file=templ, flush=True)
                 std = True
                 ok = True
                 j += 5
     first_pass3392 = True
     while True:
         if first_pass3392: first_pass3392 = False
         else: j += 1
         if (not (j < len(pli))): break
         if (std): 
             break
         if (pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): 
             if (pli[j].is_in_brackets): 
                 continue
             if (j > 0 and pli[j - 1].is_in_brackets): 
                 continue
             if (templ.tell() > 0): 
                 print(pli[j].value, end="", file=templ)
             if (delim is None): 
                 delim = pli[j].value
             elif (pli[j].value != delim): 
                 if ((len(part_length) == 2 and ((part_length[0] == 3 or part_length[0] == 4)) and city_code is None) and part_length[1] == 3): 
                     city_code = Utils.toStringStringIO(num)[0:0+part_length[0]]
                     Utils.removeStringIO(num, 0, part_length[0])
                     del part_length[0]
                     delim = pli[j].value
                     continue
                 if (is_phone_before and ((j + 1) < len(pli)) and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
                     if (num.tell() < 6): 
                         continue
                     if (normal_num_len > 0 and (num.tell() + len(pli[j + 1].value)) == normal_num_len): 
                         continue
                 break
             else: 
                 continue
             ok = False
         elif (pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
             if (num.tell() == 0 and pli[j].begin_token.previous is not None and pli[j].begin_token.previous.is_table_control_char): 
                 tt = pli[len(pli) - 1].end_token.next0_
                 if (tt is not None and tt.is_char_of(",.")): 
                     tt = tt.next0_
                 if (isinstance(tt, NumberToken)): 
                     return None
             if ((num.tell() + len(pli[j].value)) > 13): 
                 if (j > 0 and pli[j - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): 
                     j -= 1
                 ok = True
                 break
             print(pli[j].value, end="", file=num)
             part_length.append(len(pli[j].value))
             print(len(pli[j].value), end="", file=templ)
             ok = True
             if (num.tell() > 10): 
                 j += 1
                 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): 
                     additional = pli[j].value
                     j += 1
                 break
         elif (pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): 
             additional = pli[j].value
             j += 1
             break
         else: 
             break
     if ((j == (len(pli) - 1) and pli[j].is_in_brackets and ((len(pli[j].value) == 3 or len(pli[j].value) == 4))) and additional is None): 
         additional = pli[j].value
         j += 1
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[j].is_in_brackets): 
         is_phone_before = True
         j += 1
     if ((country_code is None and city_code is not None and len(city_code) > 3) and (num.tell() < 8) and city_code[0] != '8'): 
         if ((len(city_code) + num.tell()) == 10): 
             pass
         else: 
             cc = PhoneHelper.get_country_prefix(city_code)
             if (cc is not None): 
                 if (len(cc) > 1 and (len(city_code) - len(cc)) > 1): 
                     country_code = cc
                     city_code = city_code[len(cc):]
     if (country_code is None and city_code is not None and city_code.startswith("00")): 
         cc = PhoneHelper.get_country_prefix(city_code[2:])
         if (cc is not None): 
             if (len(city_code) > (len(cc) + 3)): 
                 country_code = cc
                 city_code = city_code[len(cc) + 2:]
     if (num.tell() == 0 and city_code is not None): 
         if (len(city_code) == 10): 
             print(city_code[3:], end="", file=num)
             part_length.append(num.tell())
             city_code = city_code[0:0+3]
             ok = True
         elif (((len(city_code) == 9 or len(city_code) == 11 or len(city_code) == 8)) and ((is_phone_before or country_code is not None))): 
             print(city_code, end="", file=num)
             part_length.append(num.tell())
             city_code = (None)
             ok = True
     if (num.tell() < 4): 
         ok = False
     if (num.tell() < 7): 
         if (city_code is not None and (len(city_code) + num.tell()) > 7): 
             if (not is_phone_before and len(city_code) == 3): 
                 ii = 0
                 ii = 0
                 while ii < len(part_length): 
                     if (part_length[ii] == 3): 
                         pass
                     elif (part_length[ii] > 3): 
                         break
                     elif ((ii < (len(part_length) - 1)) or (part_length[ii] < 2)): 
                         break
                     ii += 1
                 if (ii >= len(part_length)): 
                     if (country_code == "61"): 
                         pass
                     else: 
                         ok = False
         elif (((num.tell() == 6 or num.tell() == 5)) and ((len(part_length) >= 1 and len(part_length) <= 3)) and is_phone_before): 
             if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].kind == PhoneKind.HOME): 
                 ok = False
         elif (prev_phone is not None and prev_phone.number is not None and ((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)))): 
             pass
         elif (num.tell() > 4 and prev_phone is not None and Utils.toStringStringIO(templ) == prev_phone._m_template): 
             ok = True
         else: 
             ok = False
     if (delim == "." and country_code is None and city_code is None): 
         ok = False
     if ((is_phone_before and country_code is None and city_code is None) and num.tell() > 10): 
         cc = PhoneHelper.get_country_prefix(Utils.toStringStringIO(num))
         if (cc is not None): 
             if ((num.tell() - len(cc)) == 9): 
                 country_code = cc
                 Utils.removeStringIO(num, 0, len(cc))
                 ok = True
     if (ok): 
         if (std): 
             pass
         elif (prev_phone is not None and prev_phone.number is not None and (((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)) or prev_phone._m_template == Utils.toStringStringIO(templ)))): 
             pass
         elif ((len(part_length) == 3 and part_length[0] == 3 and part_length[1] == 2) and part_length[2] == 2): 
             pass
         elif (len(part_length) == 3 and is_phone_before): 
             pass
         elif ((len(part_length) == 4 and (((part_length[0] + part_length[1]) == 3)) and part_length[2] == 2) and part_length[3] == 2): 
             pass
         elif ((len(part_length) == 4 and part_length[0] == 3 and part_length[1] == 3) and part_length[2] == 2 and part_length[3] == 2): 
             pass
         elif (len(part_length) == 5 and (part_length[1] + part_length[2]) == 4 and (part_length[3] + part_length[4]) == 4): 
             pass
         elif (len(part_length) > 4): 
             ok = False
         elif (len(part_length) > 3 and city_code is not None): 
             ok = False
         elif ((is_phone_before or city_code is not None or country_code is not None) or additional is not None): 
             ok = True
         else: 
             ok = False
             if (((num.tell() == 6 or num.tell() == 7)) and (len(part_length) < 4) and j > 0): 
                 next_ph = self.__get_next_phone(pli[j - 1].end_token.next0_, lev + 1)
                 if (next_ph is not None): 
                     d = len(next_ph.number) - num.tell()
                     if (d == 0 or d == 3 or d == 4): 
                         ok = True
     end = (pli[j - 1].end_token if j > 0 else None)
     if (end is None): 
         ok = False
     if ((ok and city_code is None and country_code is None) and prev_phone is None and not is_phone_before): 
         if (not end.is_whitespace_after and end.next0_ is not None): 
             tt = end.next0_
             if (tt.is_char_of(".,)") and tt.next0_ is not None): 
                 tt = tt.next0_
             if (not tt.is_whitespace_before): 
                 ok = False
     if (not ok): 
         return None
     if (templ.tell() > 0 and not str.isdigit(Utils.getCharAtStringIO(templ, templ.tell() - 1))): 
         Utils.setLengthStringIO(templ, templ.tell() - 1)
     if ((country_code is None and city_code is not None and len(city_code) > 3) and num.tell() > 6): 
         cc = PhoneHelper.get_country_prefix(city_code)
         if (cc is not None and ((len(cc) + 1) < len(city_code))): 
             country_code = cc
             city_code = city_code[len(cc):]
     if (pli[0].begin_token.previous is not None): 
         if (pli[0].begin_token.previous.is_value("ГОСТ", None) or pli[0].begin_token.previous.is_value("ТУ", None)): 
             return None
     ph = PhoneReferent()
     if (country_code is not None): 
         ph.country_code = country_code
     number = Utils.toStringStringIO(num)
     if ((city_code is None and num.tell() > 7 and len(part_length) > 0) and (part_length[0] < 5)): 
         city_code = number[0:0+part_length[0]]
         number = number[part_length[0]:]
     if (city_code is None and num.tell() == 11 and Utils.getCharAtStringIO(num, 0) == '8'): 
         city_code = number[1:1+3]
         number = number[4:]
     if (city_code is None and num.tell() == 10): 
         city_code = number[0:0+3]
         number = number[3:]
     if (city_code is not None): 
         number = (city_code + number)
     elif (country_code is None and prev_phone is not None): 
         ok1 = False
         if (len(prev_phone.number) >= (len(number) + 2)): 
             ok1 = True
         elif (templ.tell() > 0 and prev_phone._m_template is not None and LanguageHelper.ends_with(prev_phone._m_template, Utils.toStringStringIO(templ))): 
             ok1 = True
         if (ok1 and len(prev_phone.number) > len(number)): 
             number = (prev_phone.number[0:0+len(prev_phone.number) - len(number)] + number)
     if (ph.country_code is None and prev_phone is not None and prev_phone.country_code is not None): 
         if (len(prev_phone.number) == len(number)): 
             ph.country_code = prev_phone.country_code
     ok = False
     for d in number: 
         if (d != '0'): 
             ok = True
             break
     if (not ok): 
         return None
     if (country_code is not None): 
         if (len(number) < 7): 
             return None
     else: 
         s = PhoneHelper.get_country_prefix(number)
         if (s is not None): 
             num2 = number[len(s):]
             if (len(num2) >= 10 and len(num2) <= 11): 
                 number = num2
                 if (s != "7"): 
                     ph.country_code = s
         if (len(number) == 8 and prev_phone is None): 
             return None
     if (len(number) > 11): 
         if ((len(number) < 14) and ((country_code == "1" or country_code == "43"))): 
             pass
         else: 
             return None
     ph.number = number
     if (additional is not None): 
         ph.add_slot(PhoneReferent.ATTR_ADDNUMBER, additional, True, 0)
     if (not is_phone_before and end.next0_ is not None and not end.is_newline_after): 
         if (end.next0_.is_char_of("+=") or end.next0_.is_hiphen): 
             return None
     if (country_code is not None and country_code == "7"): 
         if (len(number) != 10): 
             return None
     ph._m_template = Utils.toStringStringIO(templ)
     if (j == (len(pli) - 1) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and not pli[j].is_newline_before): 
         end = pli[j].end_token
         if (pli[j].kind != PhoneKind.UNDEFINED): 
             ph.kind = pli[j].kind
     res = ReferentToken(ph, pli[0].begin_token, end)
     if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].end_token.next0_.is_table_control_char): 
         res.begin_token = pli[1].begin_token
     return res
コード例 #21
0
 def __toFullString(self, last_name_first : bool, lang : 'MorphLang') -> str:
     id0_ = None
     for a in self.slots: 
         if (a.type_name == PersonReferent.ATTR_IDENTITY): 
             s = str(a.value)
             if (id0_ is None or len(s) > len(id0_)): 
                 id0_ = s
     if (id0_ is not None): 
         return MiscHelper.convertFirstCharUpperAndOtherLower(id0_)
     sss = self.getStringValue("NAMETYPE")
     if (sss == "china"): 
         last_name_first = True
     n = self.getStringValue(PersonReferent.ATTR_LASTNAME)
     if (n is not None): 
         res = io.StringIO()
         if (last_name_first): 
             print("{0} ".format(n), end="", file=res, flush=True)
         s = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, n, False)
         if (s is not None): 
             print("{0}".format(s), end="", file=res, flush=True)
             if (PersonReferent.__isInitial(s)): 
                 print('.', end="", file=res)
             else: 
                 print(' ', end="", file=res)
             s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False)
             if (s is not None): 
                 print("{0}".format(s), end="", file=res, flush=True)
                 if (PersonReferent.__isInitial(s)): 
                     print('.', end="", file=res)
                 else: 
                     print(' ', end="", file=res)
         if (not last_name_first): 
             print(n, end="", file=res)
         elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): 
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (LanguageHelper.isCyrillicChar(n[0])): 
             nl = None
             for sl in self.slots: 
                 if (sl.type_name == PersonReferent.ATTR_LASTNAME): 
                     ss = Utils.asObjectOrNull(sl.value, str)
                     if (len(ss) > 0 and LanguageHelper.isLatinChar(ss[0])): 
                         nl = ss
                         break
             if (nl is not None): 
                 nal = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, nl, False)
                 if (nal is None): 
                     print(" ({0})".format(nl), end="", file=res, flush=True)
                 elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION): 
                     print(" ({0} {1})".format(nl, nal), end="", file=res, flush=True)
                 else: 
                     print(" ({0} {1})".format(nal, nl), end="", file=res, flush=True)
         return MiscHelper.convertFirstCharUpperAndOtherLower(Utils.toStringStringIO(res))
     else: 
         n = self.getStringValue(PersonReferent.ATTR_FIRSTNAME)
         if ((n) is not None): 
             s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False)
             if (s is not None): 
                 n = "{0} {1}".format(n, s)
             n = MiscHelper.convertFirstCharUpperAndOtherLower(n)
             nik = self.getStringValue(PersonReferent.ATTR_NICKNAME)
             tit = self.__findShortestKingTitul(False)
             if (tit is not None): 
                 n = "{0} {1}".format(tit, n)
             if (nik is not None): 
                 n = "{0} {1}".format(n, nik)
             return n
     return "?"
コード例 #22
0
ファイル: AddressReferent.py プロジェクト: MihaJjDa/APCLtask
 def toString(self,
              short_variant: bool,
              lang: 'MorphLang' = None,
              lev: int = 0) -> str:
     res = io.StringIO()
     str0_ = self.getStringValue(AddressReferent.ATTR_DETAIL)
     if (str0_ is not None):
         str0_ = (Utils.asObjectOrNull(
             MetaAddress._global_meta.detail_feature.
             convertInnerValueToOuterValue(str0_, lang), str))
     if (str0_ is not None):
         print("[{0}".format(str0_.lower()), end="", file=res, flush=True)
         str0_ = self.getStringValue(AddressReferent.ATTR_DETAILPARAM)
         if ((str0_) is not None):
             print(", {0}".format(str0_), end="", file=res, flush=True)
         print(']', end="", file=res)
     strs = self.streets
     if (len(strs) == 0):
         if (self.metro is not None):
             if (res.tell() > 0):
                 print(' ', end="", file=res)
             print(Utils.ifNotNull(self.metro, ""), end="", file=res)
     else:
         if (res.tell() > 0):
             print(' ', end="", file=res)
         i = 0
         while i < len(strs):
             if (i > 0):
                 print(", ", end="", file=res)
             print(strs[i].toString(True, lang, 0), end="", file=res)
             i += 1
     if (self.kilometer is not None):
         print(" {0}км.".format(self.kilometer),
               end="",
               file=res,
               flush=True)
     if (self.house is not None):
         ty = self.house_type
         if (ty == AddressHouseType.ESTATE):
             print(" влад.", end="", file=res)
         elif (ty == AddressHouseType.HOUSEESTATE):
             print(" домовл.", end="", file=res)
         else:
             print(" д.", end="", file=res)
         print(("Б/Н" if self.house == "0" else self.house),
               end="",
               file=res)
     if (self.corpus is not None):
         print(" корп.{0}".format(
             ("Б/Н" if self.corpus == "0" else self.corpus)),
               end="",
               file=res,
               flush=True)
     if (self.building is not None):
         ty = self.building_type
         if (ty == AddressBuildingType.CONSTRUCTION):
             print(" сооруж.", end="", file=res)
         elif (ty == AddressBuildingType.LITER):
             print(" лит.", end="", file=res)
         else:
             print(" стр.", end="", file=res)
         print(("Б/Н" if self.building == "0" else self.building),
               end="",
               file=res)
     if (self.potch is not None):
         print(" под.{0}".format(self.potch), end="", file=res, flush=True)
     if (self.floor0_ is not None):
         print(" эт.{0}".format(self.floor0_), end="", file=res, flush=True)
     if (self.flat is not None):
         print(" кв.{0}".format(self.flat), end="", file=res, flush=True)
     if (self.corpus_or_flat is not None):
         print(" корп.(кв.?){0}".format(self.corpus_or_flat),
               end="",
               file=res,
               flush=True)
     if (self.office is not None):
         print(" оф.{0}".format(self.office), end="", file=res, flush=True)
     if (self.block is not None):
         print(" блок {0}".format(self.block), end="", file=res, flush=True)
     if (self.plot is not None):
         print(" уч.{0}".format(self.plot), end="", file=res, flush=True)
     if (self.box is not None):
         print(" бокс {0}".format(self.box), end="", file=res, flush=True)
     if (self.post_office_box is not None):
         print(" а\\я{0}".format(self.post_office_box),
               end="",
               file=res,
               flush=True)
     if (self.csp is not None):
         print(" ГСП-{0}".format(self.csp), end="", file=res, flush=True)
     kladr = self.getSlotValue(AddressReferent.ATTR_FIAS)
     if (isinstance(kladr, Referent)):
         print(" (ФИАС: {0}".format(
             Utils.ifNotNull((kladr).getStringValue("GUID"), "?")),
               end="",
               file=res,
               flush=True)
         for s in self.slots:
             if (s.type_name == AddressReferent.ATTR_FIAS
                     and (isinstance(s.value, Referent))
                     and s.value != kladr):
                 print(", {0}".format(
                     Utils.ifNotNull((s.value).getStringValue("GUID"),
                                     "?")),
                       end="",
                       file=res,
                       flush=True)
         print(')', end="", file=res)
     bti = self.getStringValue(AddressReferent.ATTR_BTI)
     if (bti is not None):
         print(" (БТИ {0})".format(bti), end="", file=res, flush=True)
     for g in self.geos:
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) == ' '):
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) == ']'):
             pass
         elif (res.tell() > 0):
             print(';', end="", file=res)
         print(" {0}".format(g.toString(True, lang, lev + 1)),
               end="",
               file=res,
               flush=True)
     if (self.zip0_ is not None):
         print("; {0}".format(self.zip0_), end="", file=res, flush=True)
     return Utils.toStringStringIO(res).strip()
コード例 #23
0
 def process(self, kit : 'AnalysisKit') -> None:
     # Основная функция выделения телефонов
     ad = kit.get_analyzer_data(self)
     has_denoms = False
     for a in kit.processor.analyzers: 
         if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): 
             has_denoms = True
     if (not has_denoms): 
         a = DenominationAnalyzer()
         a.process(kit)
     li = list()
     tmp = io.StringIO()
     tmp2 = list()
     max0_ = 0
     t = kit.first_token
     while t is not None: 
         max0_ += 1
         t = t.next0_
     cur = 0
     t = kit.first_token
     first_pass3292 = True
     while True:
         if first_pass3292: first_pass3292 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         r = t.get_referent()
         if (r is not None): 
             t = self.__add_referents(ad, t, cur, max0_)
             continue
         if (not (isinstance(t, TextToken))): 
             continue
         if (not t.chars.is_letter or (t.length_char < 3)): 
             continue
         term = t.term
         if (term == "ЕСТЬ"): 
             if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): 
                 pass
             else: 
                 continue
         npt = None
         npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
         if (npt is None): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_verb and not mc.is_preposition): 
                 if (t.is_verb_be): 
                     continue
                 if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): 
                     continue
                 kref = KeywordReferent._new1595(KeywordType.PREDICATE)
                 norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                 if (norm is None): 
                     norm = t.lemma
                 if (norm.endswith("ЬСЯ")): 
                     norm = norm[0:0+len(norm) - 2]
                 kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
                 drv = DerivateService.find_derivates(norm, True, t.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
                 kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
                 KeywordAnalyzer.__set_rank(kref, cur, max0_)
                 rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph)
                 kit.embed_token(rt1)
                 t = (rt1)
                 continue
             continue
         if (npt.internal_noun is not None): 
             continue
         if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): 
             if (npt.preposition is not None): 
                 t = npt.end_token
                 continue
         if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): 
             t = npt.end_token
             continue
         if (npt.begin_token == npt.end_token): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_preposition): 
                 continue
             elif (mc.is_adverb): 
                 if (t.is_value("ПОТОМ", None)): 
                     continue
         else: 
             pass
         li.clear()
         t0 = t
         tt = t
         first_pass3293 = True
         while True:
             if first_pass3293: first_pass3293 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= npt.end_char)): break
             if (not (isinstance(tt, TextToken))): 
                 continue
             if (tt.is_value("NATURAL", None)): 
                 pass
             if ((tt.length_char < 3) or not tt.chars.is_letter): 
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): 
                 if (tt.is_value("ОТНОШЕНИЕ", None)): 
                     pass
                 else: 
                     continue
             if (mc.is_misc): 
                 if (MiscHelper.is_eng_article(tt)): 
                     continue
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             norm = tt.lemma
             kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
             if (norm != "ЕСТЬ"): 
                 drv = DerivateService.find_derivates(norm, True, tt.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, tt, tt, tt.morph)
             kit.embed_token(rt1)
             if (tt == t and len(li) == 0): 
                 t0 = (rt1)
             t = (rt1)
             li.append(kref)
         if (len(li) > 1): 
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.clear()
             has_norm = False
             for kw in li: 
                 s = kw.get_string_value(KeywordReferent.ATTR_VALUE)
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
                 n = kw.get_string_value(KeywordReferent.ATTR_NORMAL)
                 if (n is not None): 
                     has_norm = True
                     tmp2.append(n)
                 else: 
                     tmp2.append(s)
                 kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0)
             val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.sort()
             for s in tmp2: 
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
             norm = Utils.toStringStringIO(tmp)
             if (norm != val): 
                 kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, t0, t, npt.morph)
             kit.embed_token(rt1)
             t = (rt1)
     cur = 0
     t = kit.first_token
     first_pass3294 = True
     while True:
         if first_pass3294: first_pass3294 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent)
         if (kw is None or kw.typ != KeywordType.OBJECT): 
             continue
         if (t.next0_ is None or kw.child_words > 2): 
             continue
         t1 = t.next0_
         if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): 
             t1 = t1.next0_
             if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): 
                 t1 = t1.next0_
         elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): 
             continue
         kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent)
         if (kw2 is None): 
             continue
         if (kw == kw2): 
             continue
         if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): 
             continue
         kw_un = KeywordReferent()
         kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO))
         kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent))
         KeywordAnalyzer.__set_rank(kw_un, cur, max0_)
         rt1 = ReferentToken._new734(kw_un, t, t1, t.morph)
         kit.embed_token(rt1)
         t = (rt1)
     if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): 
         all0_ = list(ad.referents)
         all0_.sort(key=operator.attrgetter('rank'), reverse=True)
         ad.referents = all0_
     if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): 
         ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES)
         if (ano is not None): 
             ad.register_referent(ano)
コード例 #24
0
 def _to_string(self, short_variant: bool, lang: 'MorphLang', lev: int,
                from_range: int) -> str:
     from pullenti.ner.date.internal.DateRelHelper import DateRelHelper
     res = io.StringIO()
     p = self.pointer
     if (lang is None):
         lang = MorphLang.RU
     if (self.is_relative):
         if (self.pointer == DatePointerType.TODAY):
             print("сейчас".format(), end="", file=res, flush=True)
             if (not short_variant):
                 DateRelHelper.append_to_string(self, res)
             return Utils.toStringStringIO(res)
         word = None
         val = 0
         back = False
         is_local_rel = self.get_string_value(
             DateReferent.ATTR_ISRELATIVE) == "true"
         for s in self.slots:
             if (s.type_name == DateReferent.ATTR_CENTURY):
                 word = "век"
                 wrapval784 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval784)
                 val = wrapval784.value
             elif (s.type_name == DateReferent.ATTR_YEAR):
                 word = "год"
                 wrapval785 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval785)
                 val = wrapval785.value
             elif (s.type_name == DateReferent.ATTR_MONTH):
                 word = "месяц"
                 wrapval786 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval786)
                 val = wrapval786.value
                 if (not is_local_rel and val >= 1 and val <= 12):
                     print(DateReferent.__m_month0[val - 1],
                           end="",
                           file=res)
             elif (s.type_name == DateReferent.ATTR_DAY):
                 word = "день"
                 wrapval787 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval787)
                 val = wrapval787.value
                 if ((not is_local_rel and self.month > 0
                      and self.month <= 12) and self.higher is not None
                         and self.higher.get_string_value(
                             DateReferent.ATTR_ISRELATIVE) != "true"):
                     print("{0} {1}".format(
                         val, DateReferent.__m_month[self.month - 1]),
                           end="",
                           file=res,
                           flush=True)
                 elif (not is_local_rel):
                     print("{0} число".format(val),
                           end="",
                           file=res,
                           flush=True)
             elif (s.type_name == DateReferent.ATTR_QUARTAL):
                 word = "квартал"
                 wrapval788 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval788)
                 val = wrapval788.value
             elif (s.type_name == DateReferent.ATTR_WEEK):
                 word = "неделя"
                 wrapval789 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval789)
                 val = wrapval789.value
             elif (s.type_name == DateReferent.ATTR_HOUR):
                 word = "час"
                 wrapval790 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval790)
                 val = wrapval790.value
                 if (not is_local_rel):
                     print("{0}:{1}".format("{:02d}".format(val),
                                            "{:02d}".format(self.minute)),
                           end="",
                           file=res,
                           flush=True)
             elif (s.type_name == DateReferent.ATTR_MINUTE):
                 word = "минута"
                 wrapval791 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval791)
                 val = wrapval791.value
             elif (s.type_name == DateReferent.ATTR_DAYOFWEEK):
                 wrapval792 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval792)
                 val = wrapval792.value
                 if (not is_local_rel):
                     print((DateReferent.__m_week_day_ex[val - 1]
                            if val >= 1 and val <= 7 else "?"),
                           end="",
                           file=res)
                 else:
                     if (val < 0):
                         val = (-val)
                         back = True
                     if (val >= 0 and val <= 7):
                         print("{0} {1}".format(
                             ((("прошлое" if back else "будущее"))
                              if val == 7 else
                              ((("прошлая" if back else "будущая")) if
                               (val == 3 or val == 6) else
                               (("прошлый" if back else "будущий")))),
                             DateReferent.__m_week_day_ex[val - 1]),
                               end="",
                               file=res,
                               flush=True)
                         break
         if (word is not None and is_local_rel):
             if (val == 0):
                 print("{0} {1}".format(
                     ("текущая" if word == "неделя" or word == "минута" else
                      "текущий"), word),
                       end="",
                       file=res,
                       flush=True)
             elif (val > 0 and not back):
                 print("{0} {1} вперёд".format(
                     val,
                     MiscHelper.get_text_morph_var_by_case_and_number_ex(
                         word, None, MorphNumber.UNDEFINED, str(val))),
                       end="",
                       file=res,
                       flush=True)
             else:
                 val = (-val)
                 print("{0} {1} назад".format(
                     val,
                     MiscHelper.get_text_morph_var_by_case_and_number_ex(
                         word, None, MorphNumber.UNDEFINED, str(val))),
                       end="",
                       file=res,
                       flush=True)
         elif (not is_local_rel and res.tell() == 0):
             print("{0} {1}".format(
                 val,
                 MiscHelper.get_text_morph_var_by_case_and_number_ex(
                     word, None, MorphNumber.UNDEFINED, str(val))),
                   end="",
                   file=res,
                   flush=True)
         if (not short_variant):
             DateRelHelper.append_to_string(self, res)
         if (from_range == 1):
             Utils.insertStringIO(
                 res, 0, "{0} ".format(("з" if lang.is_ua else
                                        ("from" if lang.is_en else "с"))))
         elif (from_range == 2):
             Utils.insertStringIO(res, 0, ("to " if lang.is_en else "по "))
         return Utils.toStringStringIO(res)
     if (from_range == 1):
         print("{0} ".format(("з" if lang.is_ua else
                              ("from" if lang.is_en else "с"))),
               end="",
               file=res,
               flush=True)
     elif (from_range == 2):
         print(("to " if lang.is_en else "по ").format(),
               end="",
               file=res,
               flush=True)
     if (p != DatePointerType.NO):
         val = MetaDate.POINTER.convert_inner_value_to_outer_value(
             Utils.enumToString(p), lang)
         if (from_range == 0 or lang.is_en):
             pass
         elif (from_range == 1):
             if (p == DatePointerType.BEGIN):
                 val = ("початку" if lang.is_ua else "начала")
             elif (p == DatePointerType.CENTER):
                 val = ("середини" if lang.is_ua else "середины")
             elif (p == DatePointerType.END):
                 val = ("кінця" if lang.is_ua else "конца")
             elif (p == DatePointerType.TODAY):
                 val = ("цього часу"
                        if lang.is_ua else "настоящего времени")
         elif (from_range == 2):
             if (p == DatePointerType.BEGIN):
                 val = ("початок" if lang.is_ua else "начало")
             elif (p == DatePointerType.CENTER):
                 val = ("середину" if lang.is_ua else "середину")
             elif (p == DatePointerType.END):
                 val = ("кінець" if lang.is_ua else "конец")
             elif (p == DatePointerType.TODAY):
                 val = ("теперішній час"
                        if lang.is_ua else "настоящее время")
         print("{0} ".format(val), end="", file=res, flush=True)
     if (self.day_of_week > 0):
         if (lang.is_en):
             print("{0}, ".format(
                 DateReferent.__m_week_day_en[self.day_of_week - 1]),
                   end="",
                   file=res,
                   flush=True)
         else:
             print("{0}, ".format(
                 DateReferent.__m_week_day[self.day_of_week - 1]),
                   end="",
                   file=res,
                   flush=True)
     y = self.year
     m = self.month
     d = self.day
     cent = self.century
     if (y == 0 and cent != 0):
         is_bc = cent < 0
         if (cent < 0):
             cent = (-cent)
         print(NumberHelper.get_number_roman(cent), end="", file=res)
         if (lang.is_ua):
             print(" century", end="", file=res)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1):
             print((" віка" if lang.is_ua else " века"), end="", file=res)
         else:
             print((" вік" if lang.is_ua else " век"), end="", file=res)
         if (is_bc):
             print((" до н.е." if lang.is_ua else " до н.э."),
                   end="",
                   file=res)
         return Utils.toStringStringIO(res)
     if (d > 0):
         print(d, end="", file=res)
     if (m > 0 and m <= 12):
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) != ' '):
             print(' ', end="", file=res)
         if (lang.is_ua):
             print((DateReferent.__m_monthua[m - 1]
                    if d > 0 or p != DatePointerType.NO or from_range != 0
                    else DateReferent.__m_month0ua[m - 1]),
                   end="",
                   file=res)
         elif (lang.is_en):
             print(DateReferent.__m_monthen[m - 1], end="", file=res)
         else:
             print((DateReferent.__m_month[m - 1]
                    if d > 0 or p != DatePointerType.NO or from_range != 0
                    else DateReferent.__m_month0[m - 1]),
                   end="",
                   file=res)
     if (y != 0):
         is_bc = y < 0
         if (y < 0):
             y = (-y)
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) != ' '):
             print(' ', end="", file=res)
         if (lang is not None and lang.is_en):
             print("{0}".format(y), end="", file=res, flush=True)
         elif (short_variant):
             print("{0}{1}".format(y, ("р" if lang.is_ua else "г")),
                   end="",
                   file=res,
                   flush=True)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1):
             print("{0} {1}".format(y, ("року" if lang.is_ua else "года")),
                   end="",
                   file=res,
                   flush=True)
         else:
             print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")),
                   end="",
                   file=res,
                   flush=True)
         if (is_bc):
             print((" до н.е." if lang.is_ua else
                    ("BC" if lang.is_en else " до н.э.")),
                   end="",
                   file=res)
     h = self.hour
     mi = self.minute
     se = self.second
     if (h >= 0 and mi >= 0):
         if (res.tell() > 0):
             print(' ', end="", file=res)
         print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)),
               end="",
               file=res,
               flush=True)
         if (se >= 0):
             print(":{0}".format("{:02d}".format(se)),
                   end="",
                   file=res,
                   flush=True)
     if (res.tell() == 0):
         if (self.quartal != 0):
             print("{0}-й квартал".format(self.quartal),
                   end="",
                   file=res,
                   flush=True)
     if (res.tell() == 0):
         return "?"
     while Utils.getCharAtStringIO(
             res,
             res.tell() - 1) == ' ' or Utils.getCharAtStringIO(
                 res,
                 res.tell() - 1) == ',':
         Utils.setLengthStringIO(res, res.tell() - 1)
     if (not short_variant and self.is_relative):
         DateRelHelper.append_to_string(self, res)
     return Utils.toStringStringIO(res).strip()
コード例 #25
0
 def attach_domain_name(t0: 'Token', check_: bool,
                        can_be_whitspaces: bool) -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     ip_count = 0
     is_ip = True
     t = t0
     first_pass3413 = True
     while True:
         if first_pass3413: first_pass3413 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_whitespace_before and t != t0):
             ok = False
             if (not t.is_newline_before and can_be_whitspaces):
                 tt1 = t
                 first_pass3414 = True
                 while True:
                     if first_pass3414: first_pass3414 = False
                     else: tt1 = tt1.next0_
                     if (not (tt1 is not None)): break
                     if (tt1.is_char('.') or tt1.is_hiphen):
                         continue
                     if (tt1.is_whitespace_before):
                         if (tt1.is_newline_before):
                             break
                         if (tt1.previous is not None
                                 and ((tt1.previous.is_char('.')
                                       or tt1.previous.is_hiphen))):
                             pass
                         else:
                             break
                     if (not (isinstance(tt1, TextToken))):
                         break
                     if (UriItemToken.__m_std_groups.try_parse(
                             tt1, TerminParseAttr.NO) is not None):
                         ok = True
                         break
                     if (not tt1.chars.is_latin_letter):
                         break
             if (not ok):
                 break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             if (nt.int_value is None):
                 break
             print(nt.get_source_text(), end="", file=txt)
             t1 = t
             if (nt.typ == NumberSpellingType.DIGIT and nt.int_value >= 0
                     and (nt.int_value < 256)):
                 ip_count += 1
             else:
                 is_ip = False
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         src = tt.term
         ch = src[0]
         if (not str.isalpha(ch)):
             if (".-_".find(ch) < 0):
                 break
             if (ch != '.'):
                 is_ip = False
             if (ch == '-'):
                 if (Utils.compareStrings(Utils.toStringStringIO(txt),
                                          "vk.com", True) == 0):
                     return UriItemToken._new2706(
                         t0, t1,
                         Utils.toStringStringIO(txt).lower())
         else:
             is_ip = False
         print(src.lower(), end="", file=txt)
         t1 = t
     if (txt.tell() == 0):
         return None
     if (ip_count != 4):
         is_ip = False
     i = 0
     points = 0
     i = 0
     while i < txt.tell():
         if (Utils.getCharAtStringIO(txt, i) == '.'):
             if (i == 0):
                 return None
             if (i >= (txt.tell() - 1)):
                 Utils.setLengthStringIO(txt, txt.tell() - 1)
                 t1 = t1.previous
                 break
             if (Utils.getCharAtStringIO(txt, i - 1) == '.'
                     or Utils.getCharAtStringIO(txt, i + 1) == '.'):
                 return None
             points += 1
         i += 1
     if (points == 0):
         return None
     uri_ = Utils.toStringStringIO(txt)
     if (check_):
         ok = is_ip
         if (not is_ip):
             if (Utils.toStringStringIO(txt) == "localhost"):
                 ok = True
         if (not ok and t1.previous is not None
                 and t1.previous.is_char('.')):
             if (UriItemToken.__m_std_groups.try_parse(
                     t1, TerminParseAttr.NO) is not None):
                 ok = True
         if (not ok):
             return None
     return UriItemToken._new2706(t0, t1,
                                  Utils.toStringStringIO(txt).lower())
コード例 #26
0
 def __name_vars(self) -> typing.List[str]:
     if (self.__m_names is not None): 
         return self.__m_names
     self.__m_names = list()
     nam = self.value
     if (nam is None): 
         return self.__m_names
     self.__m_names.append(nam)
     items = list()
     ty0 = 0
     i0 = 0
     i = 0
     while i <= len(nam): 
         ty = 0
         if (i < len(nam)): 
             if (str.isdigit(nam[i])): 
                 ty = 1
             elif (str.isalpha(nam[i])): 
                 ty = 2
             else: 
                 ty = 3
         if (ty != ty0 or ty == 3): 
             if (i > i0): 
                 vars0_ = list()
                 p = nam[i0:i0+i - i0]
                 DenominationReferent.__addVars(p, vars0_)
                 items.append(vars0_)
                 if (ty == 1 and ty0 == 2): 
                     vars0_ = list()
                     vars0_.append("")
                     vars0_.append("-")
                     items.append(vars0_)
             i0 = i
             ty0 = ty
         i += 1
     inds = Utils.newArray(len(items), 0)
     i = 0
     while i < len(inds): 
         inds[i] = 0
         i += 1
     tmp = io.StringIO()
     while True:
         Utils.setLengthStringIO(tmp, 0)
         i = 0
         while i < len(items): 
             print(items[i][inds[i]], end="", file=tmp)
             i += 1
         v = Utils.toStringStringIO(tmp)
         if (not v in self.__m_names): 
             self.__m_names.append(v)
         if (len(self.__m_names) > 20): 
             break
         for i in range(len(inds) - 1, -1, -1):
             inds[i] += 1
             if (inds[i] < len(items[i])): 
                 break
         else: i = -1
         if (i < 0): 
             break
         i += 1
         while i < len(inds): 
             inds[i] = 0
             i += 1
     return self.__m_names
コード例 #27
0
 def __attach_uri_content(
         t0: 'Token',
         chars_: str,
         can_be_whitespaces: bool = False) -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     dom = UriItemToken.attach_domain_name(t0, True, can_be_whitespaces)
     if (dom is not None):
         if (len(dom.value) < 3):
             return None
     open_char = chr(0)
     t = t0
     if (dom is not None):
         t = dom.end_token.next0_
     first_pass3411 = True
     while True:
         if first_pass3411: first_pass3411 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t != t0 and t.is_whitespace_before):
             if (t.is_newline_before or not can_be_whitespaces):
                 break
             if (dom is None):
                 break
             if (t.previous.is_hiphen):
                 pass
             elif (t.previous.is_char_of(",;")):
                 break
             elif (t.previous.is_char('.') and t.chars.is_letter
                   and t.length_char == 2):
                 pass
             else:
                 ok = False
                 tt1 = t
                 if (t.is_char_of("\\/")):
                     tt1 = t.next0_
                 tt0 = tt1
                 first_pass3412 = True
                 while True:
                     if first_pass3412: first_pass3412 = False
                     else: tt1 = tt1.next0_
                     if (not (tt1 is not None)): break
                     if (tt1 != tt0 and tt1.is_whitespace_before):
                         break
                     if (isinstance(tt1, NumberToken)):
                         continue
                     if (not (isinstance(tt1, TextToken))):
                         break
                     term1 = tt1.term
                     if (((term1 == "HTM" or term1 == "HTML" or term1
                           == "SHTML") or term1 == "ASP" or term1 == "ASPX")
                             or term1 == "JSP"):
                         ok = True
                         break
                     if (not tt1.chars.is_letter):
                         if (tt1.is_char_of("\\/")):
                             ok = True
                             break
                         if (not tt1.is_char_of(chars_)):
                             break
                     elif (not tt1.chars.is_latin_letter):
                         break
                 if (not ok):
                     break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             print(nt.get_source_text(), end="", file=txt)
             t1 = t
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             rt = Utils.asObjectOrNull(t, ReferentToken)
             if (rt is not None and rt.begin_token.is_value("РФ", None)):
                 if (txt.tell() > 0 and Utils.getCharAtStringIO(
                         txt,
                         txt.tell() - 1) == '.'):
                     print(rt.begin_token.get_source_text(),
                           end="",
                           file=txt)
                     t1 = t
                     continue
             if (rt is not None and rt.chars.is_latin_letter
                     and rt.begin_token == rt.end_token):
                 print(rt.begin_token.get_source_text(), end="", file=txt)
                 t1 = t
                 continue
             break
         src = tt.get_source_text()
         ch = src[0]
         if (not str.isalpha(ch)):
             if (chars_.find(ch) < 0):
                 break
             if (ch == '(' or ch == '['):
                 open_char = ch
             elif (ch == ')'):
                 if (open_char != '('):
                     break
                 open_char = (chr(0))
             elif (ch == ']'):
                 if (open_char != '['):
                     break
                 open_char = (chr(0))
         print(src, end="", file=txt)
         t1 = t
     if (txt.tell() == 0):
         return dom
     i = 0
     i = 0
     while i < txt.tell():
         if (str.isalnum(Utils.getCharAtStringIO(txt, i))):
             break
         i += 1
     if (i >= txt.tell()):
         return dom
     if (Utils.getCharAtStringIO(txt,
                                 txt.tell() - 1) == '.'
             or Utils.getCharAtStringIO(txt,
                                        txt.tell() - 1) == '/'):
         Utils.setLengthStringIO(txt, txt.tell() - 1)
         t1 = t1.previous
     if (dom is not None):
         Utils.insertStringIO(txt, 0, dom.value)
     tmp = Utils.toStringStringIO(txt)
     if (tmp.startswith("\\\\")):
         Utils.replaceStringIO(txt, "\\\\", "//")
         tmp = Utils.toStringStringIO(txt)
     if (tmp.startswith("//")):
         tmp = tmp[2:]
     if (Utils.compareStrings(tmp, "WWW", True) == 0):
         return None
     res = UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
     return res
コード例 #28
0
ファイル: DateReferent.py プロジェクト: MihaJjDa/APCLtask
 def _ToString(self, short_variant : bool, lang : 'MorphLang', lev : int, from_range : int) -> str:
     res = io.StringIO()
     p = self.pointer
     if (lang is None): 
         lang = MorphLang.RU
     if (from_range == 1): 
         print("{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с"))), end="", file=res, flush=True)
     elif (from_range == 2): 
         print(("to " if lang.is_en else "по ").format(), end="", file=res, flush=True)
     if (p != DatePointerType.NO): 
         val = MetaDate.POINTER.convertInnerValueToOuterValue(Utils.enumToString(p), lang)
         if (from_range == 0 or lang.is_en): 
             pass
         elif (from_range == 1): 
             if (p == DatePointerType.BEGIN): 
                 val = ("початку" if lang.is_ua else "начала")
             elif (p == DatePointerType.CENTER): 
                 val = ("середини" if lang.is_ua else "середины")
             elif (p == DatePointerType.END): 
                 val = ("кінця" if lang.is_ua else "конца")
             elif (p == DatePointerType.TODAY): 
                 val = ("цього часу" if lang.is_ua else "настоящего времени")
         elif (from_range == 2): 
             if (p == DatePointerType.BEGIN): 
                 val = ("початок" if lang.is_ua else "начало")
             elif (p == DatePointerType.CENTER): 
                 val = ("середину" if lang.is_ua else "середину")
             elif (p == DatePointerType.END): 
                 val = ("кінець" if lang.is_ua else "конец")
             elif (p == DatePointerType.TODAY): 
                 val = ("теперішній час" if lang.is_ua else "настоящее время")
         print("{0} ".format(val), end="", file=res, flush=True)
     if (self.day_of_week > 0): 
         if (lang.is_en): 
             print("{0}, ".format(DateReferent.__m_week_day_en[self.day_of_week - 1]), end="", file=res, flush=True)
         else: 
             print("{0}, ".format(DateReferent.__m_week_day[self.day_of_week - 1]), end="", file=res, flush=True)
     y = self.year
     m = self.month
     d = self.day
     cent = self.century
     if (y == 0 and cent != 0): 
         is_bc = cent < 0
         if (cent < 0): 
             cent = (- cent)
         print(NumberHelper.getNumberRoman(cent), end="", file=res)
         if (lang.is_ua): 
             print(" century", end="", file=res)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1): 
             print((" віка" if lang.is_ua else " века"), end="", file=res)
         else: 
             print((" вік" if lang.is_ua else " век"), end="", file=res)
         if (is_bc): 
             print((" до н.е." if lang.is_ua else " до н.э."), end="", file=res)
         return Utils.toStringStringIO(res)
     if (d > 0): 
         print(d, end="", file=res)
     if (m > 0 and m <= 12): 
         if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): 
             print(' ', end="", file=res)
         if (lang.is_ua): 
             print((DateReferent.__m_monthua[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0ua[m - 1]), end="", file=res)
         elif (lang.is_en): 
             print(DateReferent.__m_monthen[m - 1], end="", file=res)
         else: 
             print((DateReferent.__m_month[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0[m - 1]), end="", file=res)
     if (y != 0): 
         is_bc = y < 0
         if (y < 0): 
             y = (- y)
         if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): 
             print(' ', end="", file=res)
         if (lang is not None and lang.is_en): 
             print("{0}".format(y), end="", file=res, flush=True)
         elif (short_variant): 
             print("{0}{1}".format(y, ("р" if lang.is_ua else "г")), end="", file=res, flush=True)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1): 
             print("{0} {1}".format(y, ("року" if lang.is_ua else "года")), end="", file=res, flush=True)
         else: 
             print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")), end="", file=res, flush=True)
         if (is_bc): 
             print((" до н.е." if lang.is_ua else ("BC" if lang.is_en else " до н.э.")), end="", file=res)
     h = self.hour
     mi = self.minute
     se = self.second
     if (h >= 0 and mi >= 0): 
         if (res.tell() > 0): 
             print(' ', end="", file=res)
         print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)), end="", file=res, flush=True)
         if (se >= 0): 
             print(":{0}".format("{:02d}".format(se)), end="", file=res, flush=True)
     if (res.tell() == 0): 
         return "?"
     while Utils.getCharAtStringIO(res, res.tell() - 1) == ' ' or Utils.getCharAtStringIO(res, res.tell() - 1) == ',':
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res).strip()