Example #1
0
 def __init__(self, v: 'MorphRuleVariant' = None, word: str = None) -> None:
     super().__init__(None)
     self.normal_full = None
     self.normal_case = None
     self.misc = None
     self.undef_coef = 0
     self.tag = None
     if (v is None):
         return
     v.copyTo(self)
     self.misc = v.misc_info
     self.tag = (v)
     if (v.normal_tail is not None and word is not None):
         word_begin = word
         if (LanguageHelper.endsWith(word, v.tail)):
             word_begin = word[0:0 + len(word) - len(v.tail)]
         if (len(v.normal_tail) > 0):
             self.normal_case = (word_begin + v.normal_tail)
         else:
             self.normal_case = word_begin
     if (v.full_normal_tail is not None and word is not None):
         word_begin = word
         if (LanguageHelper.endsWith(word, v.tail)):
             word_begin = word[0:0 + len(word) - len(v.tail)]
         if (len(v.full_normal_tail) > 0):
             self.normal_full = (word_begin + v.full_normal_tail)
         else:
             self.normal_full = word_begin
Example #2
0
 def _DelSurnameEnd(s : str) -> str:
     if (len(s) < 3): 
         return s
     if (LanguageHelper.endsWithEx(s, "А", "У", "Е", None)): 
         return s[0:0+len(s) - 1]
     if (LanguageHelper.endsWith(s, "ОМ") or LanguageHelper.endsWith(s, "ЫМ")): 
         return s[0:0+len(s) - 2]
     if (LanguageHelper.endsWithEx(s, "Я", "Ю", None, None)): 
         ch1 = s[len(s) - 2]
         if (ch1 == 'Н' or ch1 == 'Л'): 
             return s[0:0+len(s) - 1] + "Ь"
     return s
Example #3
0
 def __tryAttachMoscowAO(li: typing.List['TerrItemToken'],
                         ad: 'AnalyzerData') -> 'ReferentToken':
     if (li[0].termin_item is None
             or not li[0].termin_item.is_moscow_region):
         return None
     if (li[0].is_doubt):
         ok = False
         if (CityAttachHelper.checkCityAfter(li[0].end_token.next0_)):
             ok = True
         else:
             ali = AddressItemToken.tryParseList(li[0].end_token.next0_,
                                                 None, 2)
             if (ali is not None and len(ali) > 0
                     and ali[0].typ == AddressItemToken.ItemType.STREET):
                 ok = True
         if (not ok):
             return None
     reg = GeoReferent()
     typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ"
     reg._addTyp(typ)
     name = li[0].termin_item.canonic_text
     if (LanguageHelper.endsWith(name, typ)):
         name = name[0:0 + len(name) - len(typ) - 1].strip()
     reg._addName(name)
     return ReferentToken(reg, li[0].begin_token, li[0].end_token)
Example #4
0
 def canHasRef(self, r: 'Referent') -> bool:
     """ Проверка, что этот референт может выступать в качестве ATTR_REF
     
     Args:
         r(Referent): 
     
     """
     nam = self.name
     if (nam is None or r is None):
         return False
     if (isinstance(r, GeoReferent)):
         g = Utils.asObjectOrNull(r, GeoReferent)
         if (LanguageHelper.endsWithEx(nam, "президент", "губернатор", None,
                                       None)):
             return g.is_state or g.is_region
         if (nam == "мэр" or nam == "градоначальник"):
             return g.is_city
         if (nam == "глава"):
             return True
         return False
     if (r.type_name == "ORGANIZATION"):
         if ((LanguageHelper.endsWith(nam, "губернатор") or nam == "мэр"
              or nam == "градоначальник") or nam == "президент"):
             return False
         if ("министр" in nam):
             if (r.findSlot(None, "министерство", True) is None):
                 return False
         if (nam.endswith("директор")):
             if ((r.findSlot(None, "суд", True)) is not None):
                 return False
         return True
     return False
Example #5
0
 def getDocTypes(name: str, name2: str) -> typing.List[str]:
     res = list()
     if (name is None):
         return res
     if (name == "АРЕНДОДАТЕЛЬ"):
         res.append("ДОГОВОР АРЕНДЫ")
         res.append("ДОГОВОР СУБАРЕНДЫ")
     elif (name == "АРЕНДАТОР"):
         res.append("ДОГОВОР АРЕНДЫ")
     elif (name == "СУБАРЕНДАТОР"):
         res.append("ДОГОВОР СУБАРЕНДЫ")
     elif (name == "НАЙМОДАТЕЛЬ" or name == "НАНИМАТЕЛЬ"):
         res.append("ДОГОВОР НАЙМА")
     elif (name == "АГЕНТ" or name == "ПРИНЦИПАЛ"):
         res.append("АГЕНТСКИЙ ДОГОВОР")
     elif (name == "ПРОДАВЕЦ" or name == "ПОКУПАТЕЛЬ"):
         res.append("ДОГОВОР КУПЛИ-ПРОДАЖИ")
     elif (name == "ЗАКАЗЧИК" or name == "ИСПОЛНИТЕЛЬ"
           or LanguageHelper.endsWith(name, "ПОДРЯДЧИК")):
         res.append("ДОГОВОР УСЛУГ")
     elif (name == "ПОСТАВЩИК"):
         res.append("ДОГОВОР ПОСТАВКИ")
     elif (name == "ЛИЦЕНЗИАР" or name == "ЛИЦЕНЗИАТ"):
         res.append("ЛИЦЕНЗИОННЫЙ ДОГОВОР")
     elif (name == "СТРАХОВЩИК" or name == "СТРАХОВАТЕЛЬ"):
         res.append("ДОГОВОР СТРАХОВАНИЯ")
     if (name2 is None):
         return res
     tmp = ParticipantToken.getDocTypes(name2, None)
     for i in range(len(res) - 1, -1, -1):
         if (not res[i] in tmp):
             del res[i]
     return res
Example #6
0
 def mergeSlots(self,
                obj: 'Referent',
                merge_statistic: bool = True) -> None:
     ph = Utils.asObjectOrNull(obj, PhoneReferent)
     if (ph is None):
         return
     if (ph.country_code is not None and self.country_code is None):
         self.country_code = ph.country_code
     if (ph.number is not None
             and LanguageHelper.endsWith(ph.number, self.number)):
         self.number = ph.number
Example #7
0
 def lemma(self) -> str:
     """ Лемма (вариант морфологической нормализации) """
     if (self.__m_lemma is not None):
         return self.__m_lemma
     res = None
     if (self.word_forms is not None and len(self.word_forms) > 0):
         if (len(self.word_forms) == 1):
             res = (Utils.ifNotNull(self.word_forms[0].normal_full,
                                    self.word_forms[0].normal_case))
         if (res is None and not self.char_info.is_all_lower):
             for m in self.word_forms:
                 if (m.class0_.is_proper_surname):
                     s = Utils.ifNotNull(m.normal_full,
                                         Utils.ifNotNull(m.normal_case, ""))
                     if (LanguageHelper.endsWithEx(s, "ОВ", "ЕВ", None,
                                                   None)):
                         res = s
                         break
                 elif (m.class0_.is_proper_name and m.is_in_dictionary):
                     return m.normal_case
         if (res is None):
             best = None
             for m in self.word_forms:
                 if (best is None):
                     best = m
                 elif (self.__compareForms(best, m) > 0):
                     best = m
             res = (Utils.ifNotNull(best.normal_full, best.normal_case))
     if (res is not None):
         if (LanguageHelper.endsWithEx(res, "АНЫЙ", "ЕНЫЙ", None, None)):
             res = (res[0:0 + len(res) - 3] + "ННЫЙ")
         elif (LanguageHelper.endsWith(res, "ЙСЯ")):
             res = res[0:0 + len(res) - 2]
         elif (LanguageHelper.endsWith(res, "АНИЙ") and res == self.term):
             for wf in self.word_forms:
                 if (wf.is_in_dictionary):
                     return res
             return res[0:0 + len(res) - 1] + "Е"
         return res
     return Utils.ifNotNull(self.term, "?")
Example #8
0
 def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None:
     merge_statistic = True
     for s in obj.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): 
             nam = s.value
             if (LanguageHelper.isLatinChar(nam[0])): 
                 if (not lang.is_en): 
                     continue
             elif (lang.is_en): 
                 continue
             if (LanguageHelper.endsWith(nam, " ССР")): 
                 continue
         self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_NAME): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.is_territory): 
         if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): 
             s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True)
             if (s is not None): 
                 self.slots.remove(s)
     if (self.is_state): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): 
                 self.slots.remove(s)
                 break
     if (self.is_city): 
         s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True)))
         if (s is not None): 
             for ss in self.slots: 
                 if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): 
                     self.slots.remove(s)
                     break
     has = False
     i = 0
     while i < len(self.slots): 
         if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): 
             if (not has): 
                 has = True
             else: 
                 del self.slots[i]
                 i -= 1
         i += 1
     self._mergeExtReferents(obj)
Example #9
0
 def canBeGeneralFor(self, obj: 'Referent') -> bool:
     if (not self.__canBeEqual(obj, Referent.EqualType.WITHINONETEXT,
                               True)):
         return False
     ph = Utils.asObjectOrNull(obj, PhoneReferent)
     if (self.country_code is not None and ph.country_code is None):
         return False
     if (self.add_number is None):
         if (ph.add_number is not None):
             return True
     elif (ph.add_number is None):
         return False
     if (LanguageHelper.endsWith(ph.number, self.number)):
         return True
     return False
Example #10
0
 def __canBeEqual(self, obj: 'Referent', typ: 'EqualType',
                  ignore_add_number: bool) -> bool:
     ph = Utils.asObjectOrNull(obj, PhoneReferent)
     if (ph is None):
         return False
     if (ph.country_code is not None and self.country_code is not None):
         if (ph.country_code != self.country_code):
             return False
     if (ignore_add_number):
         if (self.add_number is not None and ph.add_number is not None):
             if (ph.add_number != self.add_number):
                 return False
     elif (self.add_number is not None or ph.add_number is not None):
         if (self.add_number != ph.add_number):
             return False
     if (self.number is None or ph.number is None):
         return False
     if (self.number == ph.number):
         return True
     if (typ != Referent.EqualType.DIFFERENTTEXTS):
         if (LanguageHelper.endsWith(self.number, ph.number)
                 or LanguageHelper.endsWith(ph.number, self.number)):
             return True
     return False
Example #11
0
 def process(self, word : str) -> typing.List['MorphWordForm']:
     """ Обработка одного слова
     
     Args:
         word(str): слово должно быть в верхнем регистре
     
     """
     if (Utils.isNullOrEmpty(word)): 
         return None
     res = None
     if (len(word) > 1): 
         i = 0
         while i < len(word): 
             ch = word[i]
             if (LanguageHelper.isCyrillicVowel(ch) or LanguageHelper.isLatinVowel(ch)): 
                 break
             i += 1
         if (i >= len(word)): 
             return res
     mvs = [ ]
     tn = self.m_root
     i = 0
     while i <= len(word): 
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         if (tn.rules is not None): 
             word_begin = None
             word_end = None
             if (i == 0): 
                 word_end = word
             elif (i < len(word)): 
                 word_end = word[i:]
             else: 
                 word_end = ""
             if (res is None): 
                 res = list()
             for r in tn.rules: 
                 wrapmvs14 = RefOutArgWrapper(None)
                 inoutres15 = Utils.tryGetValue(r.variants, word_end, wrapmvs14)
                 mvs = wrapmvs14.value
                 if (inoutres15): 
                     if (word_begin is None): 
                         if (i == len(word)): 
                             word_begin = word
                         elif (i > 0): 
                             word_begin = word[0:0+i]
                         else: 
                             word_begin = ""
                     r.processResult(res, word_begin, mvs)
         if (tn.nodes is None or i >= len(word)): 
             break
         ch = ord(word[i])
         wraptn16 = RefOutArgWrapper(None)
         inoutres17 = Utils.tryGetValue(tn.nodes, ch, wraptn16)
         tn = wraptn16.value
         if (not inoutres17): 
             break
         i += 1
     need_test_unknown_vars = True
     if (res is not None): 
         for r in res: 
             if ((r.class0_.is_pronoun or r.class0_.is_noun or r.class0_.is_adjective) or (r.class0_.is_misc and r.class0_.is_conjunction) or r.class0_.is_preposition): 
                 need_test_unknown_vars = False
             elif (r.class0_.is_adverb and r.normal_case is not None): 
                 if (not LanguageHelper.endsWithEx(r.normal_case, "О", "А", None, None)): 
                     need_test_unknown_vars = False
                 elif (r.normal_case == "МНОГО"): 
                     need_test_unknown_vars = False
             elif (r.class0_.is_verb and len(res) > 1): 
                 ok = False
                 for rr in res: 
                     if (rr != r and rr.class0_ != r.class0_): 
                         ok = True
                         break
                 if (ok and not LanguageHelper.endsWith(word, "ИМ")): 
                     need_test_unknown_vars = False
     if (need_test_unknown_vars and LanguageHelper.isCyrillicChar(word[0])): 
         gl = 0
         sog = 0
         j = 0
         while j < len(word): 
             if (LanguageHelper.isCyrillicVowel(word[j])): 
                 gl += 1
             else: 
                 sog += 1
             j += 1
         if ((gl < 2) or (sog < 2)): 
             need_test_unknown_vars = False
     if (need_test_unknown_vars and res is not None and len(res) == 1): 
         if (res[0].class0_.is_verb): 
             if ("н.вр." in res[0].misc.attrs and "нес.в." in res[0].misc.attrs and not "страд.з." in res[0].misc.attrs): 
                 need_test_unknown_vars = False
             elif ("б.вр." in res[0].misc.attrs and "сов.в." in res[0].misc.attrs): 
                 need_test_unknown_vars = False
             elif (res[0].normal_case is not None and LanguageHelper.endsWith(res[0].normal_case, "СЯ")): 
                 need_test_unknown_vars = False
         if (res[0].class0_.is_undefined and "прдктв." in res[0].misc.attrs): 
             need_test_unknown_vars = False
     if (need_test_unknown_vars): 
         if (self.m_root_reverce is None): 
             return res
         tn = self.m_root_reverce
         tn0 = None
         for i in range(len(word) - 1, -1, -1):
             if (tn.lazy_pos > 0): 
                 self.__loadTreeNode(tn)
             ch = ord(word[i])
             if (tn.nodes is None): 
                 break
             wrapnext18 = RefOutArgWrapper(None)
             inoutres19 = Utils.tryGetValue(tn.nodes, ch, wrapnext18)
             next0_ = wrapnext18.value
             if (not inoutres19): 
                 break
             tn = next0_
             if (tn.lazy_pos > 0): 
                 self.__loadTreeNode(tn)
             if (tn.reverce_variants is not None): 
                 tn0 = tn
                 break
         else: i = -1
         if (tn0 is not None): 
             glas = i < 4
             while i >= 0: 
                 if (LanguageHelper.isCyrillicVowel(word[i]) or LanguageHelper.isLatinVowel(word[i])): 
                     glas = True
                     break
                 i -= 1
             if (glas): 
                 for mv in tn0.reverce_variants: 
                     if (((not mv.class0_.is_verb and not mv.class0_.is_adjective and not mv.class0_.is_noun) and not mv.class0_.is_proper_surname and not mv.class0_.is_proper_geo) and not mv.class0_.is_proper_secname): 
                         continue
                     ok = False
                     for rr in res: 
                         if (rr.is_in_dictionary): 
                             if (rr.class0_ == mv.class0_ or rr.class0_.is_noun): 
                                 ok = True
                                 break
                             if (not mv.class0_.is_adjective and rr.class0_.is_verb): 
                                 ok = True
                                 break
                     if (ok): 
                         continue
                     if (len(mv.tail) > 0 and not LanguageHelper.endsWith(word, mv.tail)): 
                         continue
                     r = MorphWordForm(mv, word)
                     if (not MorphWordForm._hasMorphEquals(res, r)): 
                         r.undef_coef = mv.coef
                         if (res is None): 
                             res = list()
                         res.append(r)
     if (word == "ПРИ" and res is not None): 
         for i in range(len(res) - 1, -1, -1):
             if (res[i].class0_.is_proper_geo): 
                 del res[i]
         else: i = -1
     if (res is None or len(res) == 0): 
         return None
     MorphEngine.__sort(res, word)
     for v in res: 
         if (v.normal_case is None): 
             v.normal_case = word
         if (v.class0_.is_verb): 
             if (v.normal_full is None and LanguageHelper.endsWith(v.normal_case, "ТЬСЯ")): 
                 v.normal_full = v.normal_case[0:0+len(v.normal_case) - 2]
         v.language = self.language
         if (v.class0_.is_preposition): 
             v.normal_case = LanguageHelper.normalizePreposition(v.normal_case)
     mc = MorphClass()
     for i in range(len(res) - 1, -1, -1):
         if (not res[i].is_in_dictionary and res[i].class0_.is_adjective and len(res) > 1): 
             if ("к.ф." in res[i].misc.attrs or "неизм." in res[i].misc.attrs): 
                 del res[i]
                 continue
         if (res[i].is_in_dictionary): 
             mc.value |= res[i].class0_.value
     else: i = -1
     if (mc == MorphClass.VERB and len(res) > 1): 
         for r in res: 
             if (r.undef_coef > (100) and r.class0_ == MorphClass.ADJECTIVE): 
                 r.undef_coef = (0)
     if (len(res) == 0): 
         return None
     return res
Example #12
0
 def __tryNounName(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                   always: bool) -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 2)
             or ((li[0].typ != CityItemToken.ItemType.NOUN
                  and li[0].typ != CityItemToken.ItemType.MISC))):
         return None
     ok = not li[0].doubtful
     if (ok and li[0].typ == CityItemToken.ItemType.MISC):
         ok = False
     typ = (None
            if li[0].typ == CityItemToken.ItemType.MISC else li[0].value)
     typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else
             li[0].alt_value)
     prob_adj = None
     i1 = 1
     org0_ = None
     if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and
          ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and
         (((LanguageHelper.endsWith(typ, "ПОСЕЛОК")
            or LanguageHelper.endsWith(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ")
           or typ == "СЕЛО"))):
         if (li[i1].begin_token == li[i1].end_token):
             ooo = AddressItemToken.tryAttachOrg(li[i1].begin_token)
             if (ooo is not None and ooo.ref_token is not None):
                 return None
         typ2 = li[i1].value
         if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.isValue("СТ", None)
                 and ((i1 + 1) < len(li))):
             m = li[i1 + 1].morph
             if (m.number == MorphNumber.PLURAL):
                 prob_adj = "СТАРЫЕ"
             elif (m.gender == MorphGender.FEMINIE):
                 prob_adj = "СТАРАЯ"
             elif (m.gender == MorphGender.MASCULINE):
                 prob_adj = "СТАРЫЙ"
             else:
                 prob_adj = "СТАРОЕ"
         i1 += 1
     name = Utils.ifNotNull(li[i1].value,
                            ((None if li[i1].onto_item is None else
                              li[i1].onto_item.canonic_text)))
     alt_name = li[i1].alt_value
     if (name is None):
         return None
     mc = li[0].morph
     if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY
             and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО"
                   or li[0].typ == CityItemToken.ItemType.MISC))):
         if (typ is None and ((i1 + 1) < len(li))
                 and li[i1 + 1].typ == CityItemToken.ItemType.NOUN):
             return None
         oi.value = li[i1].onto_item
         if (oi.value is not None):
             name = oi.value.canonic_text
         if (len(name) > 2 or oi.value.misc_attr is not None):
             if (not li[1].doubtful
                     or ((oi.value is not None
                          and oi.value.misc_attr is not None))):
                 ok = True
             elif (not ok and not li[1].is_newline_before):
                 if (li[0].geo_object_before or li[1].geo_object_after):
                     ok = True
                 elif (StreetDefineHelper.checkStreetAfter(
                         li[1].end_token.next0_)):
                     ok = True
                 elif (li[1].end_token.next0_ is not None
                       and (isinstance(li[1].end_token.next0_.getReferent(),
                                       DateReferent))):
                     ok = True
                 elif ((li[1].whitespaces_before_count < 2)
                       and li[1].onto_item is not None):
                     if (li[1].is_newline_after):
                         ok = True
             if (li[1].doubtful and li[1].end_token.next0_ is not None and
                     li[1].end_token.chars == li[1].end_token.next0_.chars):
                 ok = False
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 ok = True
         if (not ok):
             ok = CityAttachHelper.checkYearAfter(li[1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkCityAfter(li[1].end_token.next0_)
     elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME
            or li[i1].typ == CityItemToken.ItemType.CITY)):
         if (((li[0].value == "АДМИНИСТРАЦИЯ"
               or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1):
             return None
         if (li[i1].is_newline_before):
             if (len(li) != 2):
                 return None
         if (not li[0].doubtful):
             ok = True
             if (len(name) < 2):
                 ok = False
             elif ((len(name) < 3)
                   and li[0].morph.number != MorphNumber.SINGULAR):
                 ok = False
             if (li[i1].doubtful and not li[i1].geo_object_after
                     and not li[0].geo_object_before):
                 if (li[i1].morph.case_.is_genitive):
                     if (((li[0].begin_token.previous is None
                           or MiscLocationHelper.checkGeoObjectBefore(
                               li[0].begin_token))) and
                         ((li[i1].end_token.next0_ is None
                           or MiscLocationHelper.checkGeoObjectAfter(
                               li[i1].end_token.next0_)
                           or AddressItemToken.checkHouseAfter(
                               li[i1].end_token.next0_, False, True)))):
                         pass
                     else:
                         ok = False
                 else:
                     rt0 = li[i1].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt0 is not None):
                         rt1 = li[i1].kit.processReferent(
                             "PERSON", li[i1].begin_token)
                         if (rt1 is not None):
                             ok = False
             npt = NounPhraseHelper.tryParse(li[i1].begin_token,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None):
                 if (npt.end_token.end_char > li[i1].end_char
                         and len(npt.adjectives) > 0 and
                         not npt.adjectives[0].end_token.next0_.is_comma):
                     ok = False
                 elif (TerrItemToken._m_unknown_regions.tryParse(
                         npt.end_token, TerminParseAttr.FULLWORDSONLY)
                       is not None):
                     ok1 = False
                     if (li[0].begin_token.previous is not None):
                         ttt = li[0].begin_token.previous
                         if (ttt.is_comma and ttt.previous is not None):
                             ttt = ttt.previous
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (npt.end_token.next0_ is not None):
                         ttt = npt.end_token.next0_
                         if (ttt.is_comma and ttt.next0_ is not None):
                             ttt = ttt.next0_
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (not ok1):
                         return None
             if (li[0].value == "ПОРТ"):
                 if (li[i1].chars.is_all_upper
                         or li[i1].chars.is_latin_letter):
                     return None
         elif (li[0].geo_object_before):
             ok = True
         elif (li[i1].geo_object_after and not li[i1].is_newline_after):
             ok = True
         else:
             ok = CityAttachHelper.checkYearAfter(li[i1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkStreetAfter(li[i1].end_token.next0_)
         if (not ok and li[0].begin_token.previous is not None
                 and li[0].begin_token.previous.isValue("В", None)):
             ok = True
     else:
         return None
     if (not ok and not always):
         if (MiscLocationHelper.checkNearBefore(li[0].begin_token.previous)
                 is None):
             return None
     if (len(li) > (i1 + 1)):
         del li[i1 + 1:i1 + 1 + len(li) - i1 - 1]
     city = GeoReferent()
     if (oi.value is not None and oi.value.referent is not None):
         city = (Utils.asObjectOrNull(oi.value.referent.clone(),
                                      GeoReferent))
         city.occurrence.clear()
     if (not li[0].morph.case_.is_undefined
             and li[0].morph.gender != MorphGender.UNDEFINED):
         if (li[i1].end_token.morph.class0_.is_adjective
                 and li[i1].begin_token == li[i1].end_token):
             nam = ProperNameHelper.getNameEx(
                 li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE,
                 li[0].morph.case_, li[0].morph.gender, False, False)
             if (nam is not None and nam != name):
                 name = nam
     if (li[0].morph.case_.is_nominative):
         if (alt_name is not None):
             city._addName(alt_name)
         alt_name = (None)
     city._addName(name)
     if (prob_adj is not None):
         city._addName(prob_adj + " " + name)
     if (alt_name is not None):
         city._addName(alt_name)
         if (prob_adj is not None):
             city._addName(prob_adj + " " + alt_name)
     if (typ is not None):
         city._addTyp(typ)
     elif (not city.is_city):
         city._addTypCity(li[0].kit.base_language)
     if (typ2 is not None):
         city._addTyp(typ2.lower())
     if (li[0].higher_geo is not None
             and GeoOwnerHelper.canBeHigher(li[0].higher_geo, city)):
         city.higher = li[0].higher_geo
     if (li[0].typ == CityItemToken.ItemType.MISC):
         del li[0]
     res = ReferentToken._new719(city, li[0].begin_token,
                                 li[len(li) - 1].end_token, mc)
     if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen
             and (isinstance(res.end_token.next0_.next0_, NumberToken))):
         num = Utils.asObjectOrNull(res.end_token.next0_.next0_,
                                    NumberToken)
         if ((num.typ == NumberSpellingType.DIGIT
              and not num.morph.class0_.is_adjective
              and num.int_value is not None) and (num.int_value < 50)):
             for s in city.slots:
                 if (s.type_name == GeoReferent.ATTR_NAME):
                     city.uploadSlot(s,
                                     "{0}-{1}".format(s.value, num.value))
             res.end_token = num
     if (li[0].begin_token == li[0].end_token
             and li[0].begin_token.isValue("ГОРОДОК", None)):
         if (AddressItemToken.checkHouseAfter(res.end_token.next0_, True,
                                              False)):
             return None
     return res
Example #13
0
 def __TryAttach_(self,
                  pli: typing.List['PhoneItemToken'],
                  ind: int,
                  is_phone_before: bool,
                  prev_phone: 'PhoneReferent',
                  lev: int = 0) -> 'ReferentToken':
     if (ind >= len(pli) or lev > 4):
         return None
     country_code = None
     city_code = None
     j = ind
     if (prev_phone is not None and prev_phone._m_template is not None
             and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER):
         tmp = io.StringIO()
         jj = j
         first_pass3119 = True
         while True:
             if first_pass3119: first_pass3119 = False
             else: jj += 1
             if (not (jj < len(pli))): break
             if (pli[jj].item_type == PhoneItemToken.PhoneItemType.NUMBER):
                 print(len(pli[jj].value), end="", file=tmp)
             elif (pli[jj].item_type == PhoneItemToken.PhoneItemType.DELIM):
                 if (pli[jj].value == " "):
                     break
                 print(pli[jj].value, end="", file=tmp)
                 continue
             else:
                 break
             templ0 = Utils.toStringStringIO(tmp)
             if (templ0 == prev_phone._m_template):
                 if ((jj + 1) < len(pli)):
                     if (pli[jj + 1].item_type
                             == PhoneItemToken.PhoneItemType.PREFIX
                             and (jj + 2) == len(pli)):
                         pass
                     else:
                         del pli[jj + 1:jj + 1 + len(pli) - jj - 1]
                 break
     if ((j < len(pli)) and pli[j].item_type
             == PhoneItemToken.PhoneItemType.COUNTRYCODE):
         country_code = pli[j].value
         if (country_code != "8"):
             cc = PhoneHelper.getCountryPrefix(country_code)
             if (cc is not None and (len(cc) < len(country_code))):
                 city_code = country_code[len(cc):]
                 country_code = cc
         j += 1
     elif ((j < len(pli)) and pli[j].can_be_country_prefix):
         k = j + 1
         if ((k < len(pli)) and pli[k].item_type
                 == PhoneItemToken.PhoneItemType.DELIM):
             k += 1
         rrt = self.__TryAttach_(pli, k, is_phone_before, None, lev + 1)
         if (rrt is not None):
             if ((((is_phone_before and pli[j + 1].item_type
                    == PhoneItemToken.PhoneItemType.DELIM
                    and pli[j + 1].begin_token.is_hiphen) and
                   pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER
                   and len(pli[j].value) == 3) and
                  ((j + 2) < len(pli)) and pli[j + 2].item_type
                  == PhoneItemToken.PhoneItemType.NUMBER)
                     and len(pli[j + 2].value) == 3):
                 pass
             else:
                 country_code = pli[j].value
                 j += 1
     if (((j < len(pli))
          and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and
          ((pli[j].value[0] == '8' or pli[j].value[0] == '7')))
             and country_code is None):
         if (len(pli[j].value) == 1):
             country_code = pli[j].value
             j += 1
         elif (len(pli[j].value) == 4):
             country_code = pli[j].value[0:0 + 1]
             if (city_code is None):
                 city_code = pli[j].value[1:]
             else:
                 city_code += pli[j].value[1:]
             j += 1
         elif (len(pli[j].value) == 11 and j == (len(pli) - 1)
               and is_phone_before):
             ph0 = PhoneReferent()
             if (pli[j].value[0] != '8'):
                 ph0.country_code = pli[j].value[0:0 + 1]
             ph0.number = pli[j].value[1:1 + 3] + pli[j].value[4:]
             return ReferentToken(ph0, pli[0].begin_token, pli[j].end_token)
         elif (city_code is None and len(pli[j].value) > 3
               and ((j + 1) < len(pli))):
             sum0_ = 0
             for it in pli:
                 if (it.item_type == PhoneItemToken.PhoneItemType.NUMBER):
                     sum0_ += len(it.value)
             if (sum0_ == 11):
                 city_code = pli[j].value[1:]
                 j += 1
     if ((j < len(pli))
             and pli[j].item_type == PhoneItemToken.PhoneItemType.CITYCODE):
         if (city_code is None):
             city_code = pli[j].value
         else:
             city_code += pli[j].value
         j += 1
     if ((j < len(pli))
             and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM):
         j += 1
     if ((country_code == "8" and city_code is None and
          ((j + 3) < len(pli)))
             and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER):
         if (len(pli[j].value) == 3 or len(pli[j].value) == 4):
             city_code = pli[j].value
             j += 1
             if ((j < len(pli)) and pli[j].item_type
                     == PhoneItemToken.PhoneItemType.DELIM):
                 j += 1
     normal_num_len = 0
     if (country_code == "421"):
         normal_num_len = 9
     num = io.StringIO()
     templ = io.StringIO()
     part_length = list()
     delim = None
     ok = False
     additional = None
     std = False
     if (country_code is not None and ((j + 4) < len(pli)) and j > 0):
         if (((((pli[j - 1].value == "-" or pli[j - 1].item_type
                 == PhoneItemToken.PhoneItemType.COUNTRYCODE)) and
               pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and
               pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM)
              and pli[j + 2].item_type
              == PhoneItemToken.PhoneItemType.NUMBER and
              pli[j + 3].item_type == PhoneItemToken.PhoneItemType.DELIM)
                 and pli[j + 4].item_type
                 == PhoneItemToken.PhoneItemType.NUMBER):
             if ((((len(pli[j].value) + len(pli[j + 2].value)) == 6 or (
                 (len(pli[j].value) == 4 and len(pli[j + 2].value) == 5))))
                     and ((len(pli[j + 4].value) == 4
                           or len(pli[j + 4].value) == 1))):
                 print(pli[j].value, end="", file=num)
                 print(pli[j + 2].value, end="", file=num)
                 print(pli[j + 4].value, end="", file=num)
                 print("{0}{1}{2}{3}{4}".format(len(pli[j].value),
                                                pli[j + 1].value,
                                                len(pli[j + 2].value),
                                                pli[j + 3].value,
                                                len(pli[j + 4].value)),
                       end="",
                       file=templ,
                       flush=True)
                 std = True
                 ok = True
                 j += 5
     first_pass3120 = True
     while True:
         if first_pass3120: first_pass3120 = False
         else: j += 1
         if (not (j < len(pli))): break
         if (std):
             break
         if (pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM):
             if (pli[j].is_in_brackets):
                 continue
             if (j > 0 and pli[j - 1].is_in_brackets):
                 continue
             if (templ.tell() > 0):
                 print(pli[j].value, end="", file=templ)
             if (delim is None):
                 delim = pli[j].value
             elif (pli[j].value != delim):
                 if ((len(part_length) == 2 and
                      ((part_length[0] == 3 or part_length[0] == 4))
                      and city_code is None) and part_length[1] == 3):
                     city_code = Utils.toStringStringIO(num)[0:0 +
                                                             part_length[0]]
                     Utils.removeStringIO(num, 0, part_length[0])
                     del part_length[0]
                     delim = pli[j].value
                     continue
                 if (is_phone_before and ((j + 1) < len(pli))
                         and pli[j + 1].item_type
                         == PhoneItemToken.PhoneItemType.NUMBER):
                     if (num.tell() < 6):
                         continue
                     if (normal_num_len > 0
                             and (num.tell() + len(pli[j + 1].value))
                             == normal_num_len):
                         continue
                 break
             else:
                 continue
             ok = False
         elif (pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER):
             if ((num.tell() + len(pli[j].value)) > 13):
                 if (j > 0 and pli[j - 1].item_type
                         == PhoneItemToken.PhoneItemType.DELIM):
                     j -= 1
                 ok = True
                 break
             print(pli[j].value, end="", file=num)
             part_length.append(len(pli[j].value))
             print(len(pli[j].value), end="", file=templ)
             ok = True
             if (num.tell() > 10):
                 j += 1
                 if ((j < len(pli)) and pli[j].item_type
                         == PhoneItemToken.PhoneItemType.ADDNUMBER):
                     additional = pli[j].value
                     j += 1
                 break
         elif (pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER):
             additional = pli[j].value
             j += 1
             break
         else:
             break
     if ((j == (len(pli) - 1) and pli[j].is_in_brackets and
          ((len(pli[j].value) == 3 or len(pli[j].value) == 4)))
             and additional is None):
         additional = pli[j].value
         j += 1
     if ((j < len(pli))
             and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX
             and pli[j].is_in_brackets):
         is_phone_before = True
         j += 1
     if ((country_code is None and city_code is not None
          and len(city_code) > 3) and (num.tell() < 8)
             and city_code[0] != '8'):
         if ((len(city_code) + num.tell()) == 10):
             pass
         else:
             cc = PhoneHelper.getCountryPrefix(city_code)
             if (cc is not None):
                 if (len(cc) > 1 and (len(city_code) - len(cc)) > 1):
                     country_code = cc
                     city_code = city_code[len(cc):]
     if (country_code is None and city_code is not None
             and city_code.startswith("00")):
         cc = PhoneHelper.getCountryPrefix(city_code[2:])
         if (cc is not None):
             if (len(city_code) > (len(cc) + 3)):
                 country_code = cc
                 city_code = city_code[len(cc) + 2:]
     if (num.tell() == 0 and city_code is not None):
         if (len(city_code) == 10):
             print(city_code[3:], end="", file=num)
             part_length.append(num.tell())
             city_code = city_code[0:0 + 3]
             ok = True
         elif (((len(city_code) == 9 or len(city_code) == 11
                 or len(city_code) == 8))
               and ((is_phone_before or country_code is not None))):
             print(city_code, end="", file=num)
             part_length.append(num.tell())
             city_code = (None)
             ok = True
     if (num.tell() < 4):
         ok = False
     if (num.tell() < 7):
         if (city_code is not None and (len(city_code) + num.tell()) > 7):
             if (not is_phone_before and len(city_code) == 3):
                 ii = 0
                 while ii < len(part_length):
                     if (part_length[ii] == 3):
                         pass
                     elif (part_length[ii] > 3):
                         break
                     elif ((ii < (len(part_length) - 1))
                           or (part_length[ii] < 2)):
                         break
                     ii += 1
                 if (ii >= len(part_length)):
                     if (country_code == "61"):
                         pass
                     else:
                         ok = False
         elif (((num.tell() == 6 or num.tell() == 5))
               and ((len(part_length) >= 1 and len(part_length) <= 3))
               and is_phone_before):
             if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX
                     and pli[0].kind == PhoneKind.HOME):
                 ok = False
         elif (prev_phone is not None and prev_phone.number is not None
               and ((len(prev_phone.number) == num.tell()
                     or len(prev_phone.number) == (num.tell() + 3)
                     or len(prev_phone.number) == (num.tell() + 4)))):
             pass
         elif (num.tell() > 4 and prev_phone is not None
               and Utils.toStringStringIO(templ) == prev_phone._m_template):
             ok = True
         else:
             ok = False
     if (delim == "." and country_code is None and city_code is None):
         ok = False
     if ((is_phone_before and country_code is None and city_code is None)
             and num.tell() > 10):
         cc = PhoneHelper.getCountryPrefix(Utils.toStringStringIO(num))
         if (cc is not None):
             if ((num.tell() - len(cc)) == 9):
                 country_code = cc
                 Utils.removeStringIO(num, 0, len(cc))
                 ok = True
     if (ok):
         if (std):
             pass
         elif (prev_phone is not None and prev_phone.number is not None and
               (((len(prev_phone.number) == num.tell()
                  or len(prev_phone.number) == (num.tell() + 3)
                  or len(prev_phone.number) == (num.tell() + 4)) or
                 prev_phone._m_template == Utils.toStringStringIO(templ)))):
             pass
         elif ((len(part_length) == 3 and part_length[0] == 3
                and part_length[1] == 2) and part_length[2] == 2):
             pass
         elif (len(part_length) == 3 and is_phone_before):
             pass
         elif ((len(part_length) == 4 and
                (((part_length[0] + part_length[1]) == 3))
                and part_length[2] == 2) and part_length[3] == 2):
             pass
         elif ((len(part_length) == 4 and part_length[0] == 3
                and part_length[1] == 3) and part_length[2] == 2
               and part_length[3] == 2):
             pass
         elif (len(part_length) == 5
               and (part_length[1] + part_length[2]) == 4
               and (part_length[3] + part_length[4]) == 4):
             pass
         elif (len(part_length) > 4):
             ok = False
         elif (len(part_length) > 3 and city_code is not None):
             ok = False
         elif ((is_phone_before or city_code is not None
                or country_code is not None) or additional is not None):
             ok = True
         else:
             ok = False
             if (((num.tell() == 6 or num.tell() == 7))
                     and (len(part_length) < 4) and j > 0):
                 next_ph = self.__getNextPhone(pli[j - 1].end_token.next0_,
                                               lev + 1)
                 if (next_ph is not None):
                     d = len(next_ph.number) - num.tell()
                     if (d == 0 or d == 3 or d == 4):
                         ok = True
     end = (pli[j - 1].end_token if j > 0 else None)
     if (end is None):
         ok = False
     if ((ok and city_code is None and country_code is None)
             and prev_phone is None and not is_phone_before):
         if (not end.is_whitespace_after and end.next0_ is not None):
             tt = end.next0_
             if (tt.isCharOf(".,)") and tt.next0_ is not None):
                 tt = tt.next0_
             if (not tt.is_whitespace_before):
                 ok = False
     if (not ok):
         return None
     if (templ.tell() > 0 and not str.isdigit(
             Utils.getCharAtStringIO(templ,
                                     templ.tell() - 1))):
         Utils.setLengthStringIO(templ, templ.tell() - 1)
     if ((country_code is None and city_code is not None
          and len(city_code) > 3) and num.tell() > 6):
         cc = PhoneHelper.getCountryPrefix(city_code)
         if (cc is not None and ((len(cc) + 1) < len(city_code))):
             country_code = cc
             city_code = city_code[len(cc):]
     ph = PhoneReferent()
     if (country_code != "8" and country_code is not None):
         ph.country_code = country_code
     number = Utils.toStringStringIO(num)
     if ((city_code is None and num.tell() > 7 and len(part_length) > 0)
             and (part_length[0] < 5)):
         city_code = number[0:0 + part_length[0]]
         number = number[part_length[0]:]
     if (city_code is None and num.tell() == 11
             and Utils.getCharAtStringIO(num, 0) == '8'):
         city_code = number[1:1 + 3]
         number = number[4:]
     if (city_code is None and num.tell() == 10):
         city_code = number[0:0 + 3]
         number = number[3:]
     if (city_code is not None):
         number = (city_code + number)
     elif (country_code is None and prev_phone is not None):
         ok1 = False
         if (len(prev_phone.number) >= (len(number) + 2)):
             ok1 = True
         elif (templ.tell() > 0 and prev_phone._m_template is not None
               and LanguageHelper.endsWith(prev_phone._m_template,
                                           Utils.toStringStringIO(templ))):
             ok1 = True
         if (ok1 and len(prev_phone.number) > len(number)):
             number = (prev_phone.number[0:0 + len(prev_phone.number) -
                                         len(number)] + number)
     if (ph.country_code is None and prev_phone is not None
             and prev_phone.country_code is not None):
         if (len(prev_phone.number) == len(number)):
             ph.country_code = prev_phone.country_code
     ok = False
     for d in number:
         if (d != '0'):
             ok = True
             break
     if (not ok):
         return None
     if (country_code is not None):
         if (len(number) < 7):
             return None
     else:
         s = PhoneHelper.getCountryPrefix(number)
         if (s is not None):
             num2 = number[len(s):]
             if (len(num2) >= 10 and len(num2) <= 11):
                 number = num2
                 if (s != "7"):
                     ph.country_code = s
         if (len(number) == 8 and prev_phone is None):
             return None
     if (len(number) > 11):
         if ((len(number) < 14)
                 and ((country_code == "1" or country_code == "43"))):
             pass
         else:
             return None
     ph.number = number
     if (additional is not None):
         ph.addSlot(PhoneReferent.ATTR_ADDNUMBER, additional, True, 0)
     if (not is_phone_before and end.next0_ is not None
             and not end.is_newline_after):
         if (end.next0_.isCharOf("+=") or end.next0_.is_hiphen):
             return None
     if (country_code is not None and country_code == "7"):
         if (len(number) != 10):
             return None
     ph._m_template = Utils.toStringStringIO(templ)
     if (j == (len(pli) - 1)
             and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX
             and not pli[j].is_newline_before):
         end = pli[j].end_token
         if (pli[j].kind != PhoneKind.UNDEFINED):
             ph.kind = pli[j].kind
     res = ReferentToken(ph, pli[0].begin_token, end)
     if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX
             and pli[0].end_token.next0_.is_table_control_char):
         res.begin_token = pli[1].begin_token
     return res
Example #14
0
 def getWordform(self, word : str, cla : 'MorphClass', gender : 'MorphGender', cas : 'MorphCase', num : 'MorphNumber', add_info : 'MorphWordForm') -> str:
     tn = self.m_root
     find = False
     res = None
     max_coef = -10
     i = 0
     while i <= len(word): 
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         if (tn.rules is not None): 
             word_begin = ""
             word_end = ""
             if (i > 0): 
                 word_begin = word[0:0+i]
             else: 
                 word_end = word
             if (i < len(word)): 
                 word_end = word[i:]
             else: 
                 word_begin = word
             for r in tn.rules: 
                 if (word_end in r.variants): 
                     for li in r.variants_list: 
                         for v in li: 
                             if ((((cla.value) & (v.class0_.value))) != 0 and v.normal_tail is not None): 
                                 if (cas.is_undefined): 
                                     if (v.case_.is_nominative or v.case_.is_undefined): 
                                         pass
                                     else: 
                                         continue
                                 elif (((v.case_) & cas).is_undefined): 
                                     continue
                                 sur = cla.is_proper_surname
                                 sur0 = v.class0_.is_proper_surname
                                 if (sur or sur0): 
                                     if (sur != sur0): 
                                         continue
                                 find = True
                                 if (gender != MorphGender.UNDEFINED): 
                                     if ((((gender) & (v.gender))) == (MorphGender.UNDEFINED)): 
                                         continue
                                 if (num != MorphNumber.UNDEFINED): 
                                     if ((((num) & (v.number))) == (MorphNumber.UNDEFINED)): 
                                         continue
                                 re = word_begin + v.tail
                                 co = 0
                                 if (add_info is not None): 
                                     co = v.calcEqCoef(add_info)
                                 if (res is None or co > max_coef): 
                                     res = re
                                     max_coef = co
                                 if (max_coef == 0): 
                                     if ((word_begin + v.normal_tail) == word): 
                                         return re
         if (tn.nodes is None or i >= len(word)): 
             break
         ch = ord(word[i])
         wraptn22 = RefOutArgWrapper(None)
         inoutres23 = Utils.tryGetValue(tn.nodes, ch, wraptn22)
         tn = wraptn22.value
         if (not inoutres23): 
             break
         i += 1
     if (find): 
         return res
     tn = self.m_root_reverce
     tn0 = None
     for i in range(len(word) - 1, -1, -1):
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         ch = ord(word[i])
         if (tn.nodes is None): 
             break
         wrapnext24 = RefOutArgWrapper(None)
         inoutres25 = Utils.tryGetValue(tn.nodes, ch, wrapnext24)
         next0_ = wrapnext24.value
         if (not inoutres25): 
             break
         tn = next0_
         if (tn.lazy_pos > 0): 
             self.__loadTreeNode(tn)
         if (tn.reverce_variants is not None): 
             tn0 = tn
             break
     else: i = -1
     if (tn0 is None): 
         return None
     for mv in tn0.reverce_variants: 
         if ((((mv.class0_.value) & (cla.value))) != 0 and mv.rule is not None): 
             if (len(mv.tail) > 0 and not LanguageHelper.endsWith(word, mv.tail)): 
                 continue
             word_begin = word[0:0+len(word) - len(mv.tail)]
             for liv in mv.rule.variants_list: 
                 for v in liv: 
                     if ((((v.class0_.value) & (cla.value))) != 0): 
                         sur = cla.is_proper_surname
                         sur0 = v.class0_.is_proper_surname
                         if (sur or sur0): 
                             if (sur != sur0): 
                                 continue
                         if (not cas.is_undefined): 
                             if (((cas) & v.case_).is_undefined and not v.case_.is_undefined): 
                                 continue
                         if (num != MorphNumber.UNDEFINED): 
                             if (v.number != MorphNumber.UNDEFINED): 
                                 if ((((v.number) & (num))) == (MorphNumber.UNDEFINED)): 
                                     continue
                         if (gender != MorphGender.UNDEFINED): 
                             if (v.gender != MorphGender.UNDEFINED): 
                                 if ((((v.gender) & (gender))) == (MorphGender.UNDEFINED)): 
                                     continue
                         res = (word_begin + v.tail)
                         if (res == word): 
                             return word
                         return res
     if (cla.is_proper_surname): 
         if ((gender == MorphGender.FEMINIE and cla.is_proper_surname and not cas.is_undefined) and not cas.is_nominative): 
             if (word.endswith("ВА") or word.endswith("НА")): 
                 if (cas.is_accusative): 
                     return word[0:0+len(word) - 1] + "У"
                 return word[0:0+len(word) - 1] + "ОЙ"
         if (gender == MorphGender.FEMINIE): 
             last = word[len(word) - 1]
             if (last == 'А' or last == 'Я' or last == 'О'): 
                 return word
             if (LanguageHelper.isCyrillicVowel(last)): 
                 return word[0:0+len(word) - 1] + "А"
             elif (last == 'Й'): 
                 return word[0:0+len(word) - 2] + "АЯ"
             else: 
                 return word + "А"
     return res
Example #15
0
 def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken':
     if (sli is None or len(sli) == 0): 
         return None
     i = 0
     while i < len(sli): 
         if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): 
             return StreetDefineHelper.__tryParseFix(sli)
         elif (sli[i].typ == StreetItemType.NOUN): 
             if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): 
                 sli[i + 1].begin_token = sli[i].begin_token
                 del sli[i]
             if (sli[i].termin.canonic_text == "МЕТРО"): 
                 if ((i + 1) < len(sli)): 
                     sli1 = list()
                     ii = i + 1
                     while ii < len(sli): 
                         sli1.append(sli[ii])
                         ii += 1
                     str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True)
                     if (str1 is not None): 
                         str1.begin_token = sli[i].begin_token
                         str1.is_doubt = sli[i].is_abridge
                         if (sli[i + 1].is_in_brackets): 
                             str1.is_doubt = False
                         return str1
                 elif (i == 1 and sli[0].typ == StreetItemType.NAME): 
                     for_metro = True
                     break
                 if (i == 0 and len(sli) > 0): 
                     for_metro = True
                     break
                 return None
             if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): 
                 stri0 = StreetReferent()
                 stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0)
                 stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0)
                 return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True)
             if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): 
                 stri0 = StreetReferent()
                 stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0)
                 return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True)
             if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): 
                 tt = sli[i].end_token.next0_
                 if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): 
                     tt = tt.next0_
                 nex = NumberHelper.tryParseNumberWithPostfix(tt)
                 if (nex is not None): 
                     return None
             break
         i += 1
     if (i >= len(sli)): 
         return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro)
     name = None
     number = None
     age = None
     adj = None
     noun = sli[i]
     alt_noun = None
     is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")
     before = 0
     after = 0
     j = 0
     while j < i: 
         if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): 
             before += 1
         elif (sli[j].typ == StreetItemType.NUMBER): 
             if (sli[j].is_newline_after): 
                 return None
             if (sli[j].number.morph.class0_.is_adjective): 
                 before += 1
             elif (is_micro_raion): 
                 before += 1
             elif (sli[i].number_has_prefix): 
                 before += 1
         else: 
             before += 1
         j += 1
     j = (i + 1)
     while j < len(sli): 
         if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): 
             after += 1
         elif (sli[j].typ == StreetItemType.NUMBER): 
             if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): 
                 after += 1
             elif (is_micro_raion): 
                 after += 1
             elif (sli[j].number_has_prefix): 
                 after += 1
             elif (ext_onto_regim): 
                 after += 1
         elif (sli[j].typ == StreetItemType.NOUN): 
             break
         else: 
             after += 1
         j += 1
     rli = list()
     if (before > after): 
         if (noun.termin.canonic_text == "МЕТРО"): 
             return None
         tt = sli[0].begin_token
         if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): 
             if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): 
                 if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): 
                     ok = False
                     if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): 
                         ok = True
                     elif (noun.end_token.next0_ is None): 
                         ok = True
                     elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): 
                         ok = True
                     if (not ok): 
                         if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): 
                             ok = True
                     if (not ok): 
                         return None
         n0 = 0
         n1 = (i - 1)
     elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): 
         if (not sli[0].is_whitespace_after): 
             return None
         number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value))
         if (sli[0].is_number_km): 
             number += "км"
         n0 = (i + 1)
         n1 = (len(sli) - 1)
         rli.append(sli[0])
         rli.append(sli[i])
     elif (after > before): 
         n0 = (i + 1)
         n1 = (len(sli) - 1)
         rli.append(sli[i])
     elif (after == 0): 
         return None
     elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): 
         n0 = 0
         n1 = 0
         num = False
         tt2 = sli[2].end_token.next0_
         if (sli[2].is_number_km): 
             num = True
         elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): 
             sli[2].is_number_km = True
             num = True
         elif (sli[2].begin_token.previous.is_comma): 
             pass
         elif (sli[2].begin_token != sli[2].end_token): 
             num = True
         elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): 
             num = True
         elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): 
             if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): 
                 num = True
         if (num): 
             number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value))
             if (sli[2].is_number_km): 
                 number += "км"
             rli.append(sli[2])
         else: 
             del sli[2:2+len(sli) - 2]
     else: 
         return None
     sec_number = None
     j = n0
     first_pass2732 = True
     while True:
         if first_pass2732: first_pass2732 = False
         else: j += 1
         if (not (j <= n1)): break
         if (sli[j].typ == StreetItemType.NUMBER): 
             if (age is not None or ((sli[j].is_newline_before and j > 0))): 
                 break
             if (number is not None): 
                 if (name is not None and name.typ == StreetItemType.STDNAME): 
                     sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
                     if (sli[j].is_number_km): 
                         sec_number += "км"
                     rli.append(sli[j])
                     continue
                 if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): 
                     sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
                     if (sli[j].is_number_km): 
                         sec_number += "км"
                     rli.append(sli[j])
                     continue
                 break
             if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): 
                 if (sli[j].whitespaces_before_count > 2 and j > 0): 
                     break
                 if (sli[j].number is not None and sli[j].number.int_value > 20): 
                     if (j > n0): 
                         if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): 
                             pass
                         else: 
                             break
                 if (j == n0 and n0 > 0): 
                     pass
                 elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): 
                     pass
                 elif (sli[j].number_has_prefix): 
                     pass
                 elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): 
                     pass
                 else: 
                     break
             number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
             if (sli[j].is_number_km): 
                 number += "км"
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.AGE): 
             if (number is not None or age is not None): 
                 break
             age = str(sli[j].number.int_value)
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.STDADJECTIVE): 
             if (adj is not None): 
                 return None
             adj = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): 
             if (name is not None): 
                 if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): 
                     break
                 elif (i < j): 
                     break
                 else: 
                     return None
             name = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): 
             if (name is not None): 
                 break
             name = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.NOUN): 
             if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): 
                 alt_noun = noun
                 noun = sli[j]
                 rli.append(sli[j])
             else: 
                 break
     if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): 
         number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value))
         rli.append(sli[i + 1])
     elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): 
         alt_noun = noun
         noun = sli[j]
         rli.append(sli[j])
     if (name is None): 
         if (number is None and adj is None): 
             return None
         if (noun.is_abridge): 
             if (is_micro_raion): 
                 pass
             elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): 
                 pass
             elif (adj is None or adj.is_abridge): 
                 return None
         if (adj is not None and adj.is_abridge): 
             return None
     if (not sli[i] in rli): 
         rli.append(sli[i])
     street = StreetReferent()
     if (not for_metro): 
         street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0)
         if (noun.alt_termin is not None): 
             if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): 
                 pass
             else: 
                 street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0)
     else: 
         street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0)
     res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street)
     for r in rli: 
         if (res.begin_char > r.begin_char): 
             res.begin_token = r.begin_token
         if (res.end_char < r.end_char): 
             res.end_token = r.end_token
     if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): 
         rli.remove(noun)
     if (noun.is_abridge and (noun.length_char < 4)): 
         res.is_doubt = True
     elif (noun.noun_is_doubt_coef > 0): 
         res.is_doubt = True
         if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): 
             npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0)
             if (npt2 is not None and npt2.end_char > name.end_char): 
                 pass
             elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): 
                 res.is_doubt = False
             elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): 
                 res.is_doubt = False
     name_base = io.StringIO()
     name_alt = io.StringIO()
     name_alt2 = None
     gen = noun.termin.gender
     adj_gen = MorphGender.UNDEFINED
     if (number is not None): 
         street.number = number
         if (sec_number is not None): 
             street.sec_number = sec_number
     if (age is not None): 
         if (street.number is None): 
             street.number = age
         else: 
             street.sec_number = age
     if (name is not None and name.value is not None): 
         if (street.kind == StreetKind.ROAD): 
             for r in rli: 
                 if (r.typ == StreetItemType.NAME and r != name): 
                     print(r.value, end="", file=name_alt)
                     break
         if (name.alt_value is not None and name_alt.tell() == 0): 
             print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True)
         print(" {0}".format(name.value), end="", file=name_base, flush=True)
     elif (name is not None): 
         is_adj = False
         if (isinstance(name.end_token, TextToken)): 
             for wf in name.end_token.morph.items: 
                 if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): 
                     is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo)
                     adj_gen = wf.gender
                     break
                 elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): 
                     is_adj = True
         if (is_adj): 
             tmp = io.StringIO()
             vars0_ = list()
             t = name.begin_token
             while t is not None: 
                 tt = Utils.asObjectOrNull(t, TextToken)
                 if (tt is None): 
                     break
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 if (t == name.end_token): 
                     is_padez = False
                     if (not noun.is_abridge): 
                         if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): 
                             is_padez = True
                         elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): 
                             is_padez = True
                     if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): 
                         is_padez = True
                     if (not is_padez): 
                         print(tt.term, end="", file=tmp)
                         break
                     for wf in tt.morph.items: 
                         if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): 
                             if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): 
                                 wff = Utils.asObjectOrNull(wf, MorphWordForm)
                                 if (wff is None): 
                                     continue
                                 if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): 
                                     continue
                                 if (not wff.normal_case in vars0_): 
                                     vars0_.append(wff.normal_case)
                     if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): 
                         vars0_.append(tt.term)
                     if (len(vars0_) == 0): 
                         vars0_.append(tt.term)
                     break
                 if (not tt.is_hiphen): 
                     print(tt.term, end="", file=tmp)
                 t = t.next0_
             if (len(vars0_) == 0): 
                 print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True)
             else: 
                 head = Utils.toStringStringIO(name_base)
                 print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True)
                 if (len(vars0_) > 1): 
                     Utils.setLengthStringIO(name_alt, 0)
                     print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True)
                 if (len(vars0_) > 2): 
                     name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2])
         else: 
             str_nam = None
             nits = list()
             has_adj = False
             has_proper_name = False
             t = name.begin_token
             while t is not None: 
                 if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): 
                     has_adj = True
                 if ((isinstance(t, TextToken)) and not t.is_hiphen): 
                     if (name.termin is not None): 
                         nits.append(name.termin.canonic_text)
                         break
                     elif (not t.chars.is_letter and len(nits) > 0): 
                         nits[len(nits) - 1] += (t).term
                     else: 
                         nits.append((t).term)
                         if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): 
                             has_proper_name = True
                 elif ((isinstance(t, ReferentToken)) and name.termin is None): 
                     nits.append(t.getSourceText().upper())
                 if (t == name.end_token): 
                     break
                 t = t.next0_
             if (not has_adj and not has_proper_name): 
                 nits.sort()
             str_nam = Utils.joinStrings(" ", list(nits))
             if (has_proper_name and len(nits) == 2): 
                 Utils.setLengthStringIO(name_alt, 0)
                 print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True)
             print(" {0}".format(str_nam), end="", file=name_base, flush=True)
     adj_str = None
     adj_can_be_initial = False
     if (adj is not None): 
         if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): 
             if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): 
                 adj_gen = name.morph.gender
         if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL))
         elif (adj_gen != MorphGender.UNDEFINED): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen))
         elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender))
         else: 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen))
         adj_str = s
         if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): 
             if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): 
                 adj_can_be_initial = True
     s1 = Utils.toStringStringIO(name_base).strip()
     s2 = Utils.toStringStringIO(name_alt).strip()
     if (len(s1) < 3): 
         if (street.number is not None): 
             if (adj_str is not None): 
                 if (adj.is_abridge): 
                     return None
                 street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0)
         elif (adj_str is None): 
             if (len(s1) < 1): 
                 return None
             if (is_micro_raion): 
                 street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
                 if (not Utils.isNullOrEmpty(s2)): 
                     street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0)
             else: 
                 return None
         else: 
             if (adj.is_abridge): 
                 return None
             street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0)
     elif (adj_can_be_initial): 
         street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
         street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0)
         street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     elif (adj_str is None): 
         street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
     else: 
         street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     if (name_alt.tell() > 0): 
         s1 = Utils.toStringStringIO(name_alt).strip()
         if (adj_str is None): 
             street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
         else: 
             street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     if (name_alt2 is not None): 
         if (adj_str is None): 
             if (for_metro and noun is not None): 
                 street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0)
             else: 
                 street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0)
         else: 
             street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0)
     if (name is not None and name.alt_value2 is not None): 
         street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0)
     if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): 
         for n in name.exist_street.names: 
             street.addSlot(StreetReferent.ATTR_NAME, n, False, 0)
     if (alt_noun is not None and not for_metro): 
         street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0)
     if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): 
         res.is_doubt = True
         if (name is not None and name.is_in_dictionary): 
             res.is_doubt = False
         elif (alt_noun is not None or for_metro): 
             res.is_doubt = False
         elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): 
             if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): 
                 res.is_doubt = False
     if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): 
         for s in street.slots: 
             if (s.type_name == StreetReferent.ATTR_TYP): 
                 street.uploadSlot(s, "микрорайон")
             elif (s.type_name == StreetReferent.ATTR_NAME): 
                 street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value))
         if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): 
             street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0)
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma): 
         t1 = t1.next0_
     non = StreetItemToken.tryParse(t1, None, False, None, False)
     if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): 
         if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): 
             street._correct()
             nams = street.names
             for t in street.typs: 
                 for n in nams: 
                     street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0)
             street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0)
             res.end_token = non.end_token
     if (res.is_doubt): 
         if (noun.is_road): 
             if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): 
                 res.is_doubt = False
             elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): 
                 res.is_doubt = False
             elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): 
                 res.is_doubt = False
         elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): 
             res.is_doubt = False
         tt0 = res.begin_token.previous
         first_pass2733 = True
         while True:
             if first_pass2733: first_pass2733 = False
             else: tt0 = tt0.previous
             if (not (tt0 is not None)): break
             if (tt0.isCharOf(",,") or tt0.is_comma_and): 
                 continue
             str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent)
             if (str0 is not None): 
                 res.is_doubt = False
             break
     if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): 
         ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None)
         if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): 
             street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0)
             res.end_token = ait.end_token
     return res
Example #16
0
 def tryParse(t: 'Token',
              typ: 'BracketParseAttr' = BracketParseAttr.NO,
              max_tokens: int = 100) -> 'BracketSequenceToken':
     """ Попробовать восстановить последовательность, обрамляемой кавычками
     
     Args:
         t(Token): 
         typ(BracketParseAttr): параметры выделения
         max_tokens(int): максимально токенов (вдруг забыли закрывающую ккавычку)
     
     """
     t0 = t
     cou = 0
     if (not BracketHelper.canBeStartOfSequence(t0, False, False)):
         return None
     br_list = list()
     br_list.append(BracketHelper.Bracket(t0))
     cou = 0
     crlf = 0
     last = None
     lev = 1
     is_assim = br_list[
         0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find(
             br_list[0].char0_) >= 0
     t = t0.next0_
     first_pass2802 = True
     while True:
         if first_pass2802: first_pass2802 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         last = t
         if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                 or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
             if (t.is_newline_before
                     and (((typ) & (BracketParseAttr.CANBEMANYLINES)))
                     == (BracketParseAttr.NO)):
                 if (t.whitespaces_before_count > 10
                         or BracketHelper.canBeStartOfSequence(
                             t, False, False)):
                     if (t.isChar('(') and not t0.isChar('(')):
                         pass
                     else:
                         last = t.previous
                         break
             bb = BracketHelper.Bracket(t)
             br_list.append(bb)
             if (len(br_list) > 20):
                 break
             if ((len(br_list) == 3 and br_list[1].can_be_open
                  and bb.can_be_close) and BracketHelper.__mustBeCloseChar(
                      bb.char0_, br_list[1].char0_)
                     and BracketHelper.__mustBeCloseChar(
                         bb.char0_, br_list[0].char0_)):
                 ok = False
                 tt = t.next0_
                 while tt is not None:
                     if (tt.is_newline_before):
                         break
                     if (tt.isChar(',')):
                         break
                     if (tt.isChar('.')):
                         tt = tt.next0_
                         while tt is not None:
                             if (tt.is_newline_before):
                                 break
                             elif (tt.isCharOf(BracketHelper.M_OPEN_CHARS)
                                   or tt.isCharOf(
                                       BracketHelper.M_CLOSE_CHARS)):
                                 bb2 = BracketHelper.Bracket(tt)
                                 if (BracketHelper.canBeEndOfSequence(
                                         tt, False, None, False)
                                         and BracketHelper.__canBeCloseChar(
                                             bb2.char0_,
                                             br_list[0].char0_)):
                                     ok = True
                                 break
                             tt = tt.next0_
                         break
                     if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                             or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
                         ok = True
                         break
                     tt = tt.next0_
                 if (not ok):
                     break
             if (is_assim):
                 if (bb.can_be_open and not bb.can_be_close
                         and bb.char0_ == br_list[0].char0_):
                     lev += 1
                 elif (bb.can_be_close and not bb.can_be_open and
                       BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_)
                       == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)):
                     lev -= 1
                     if (lev == 0):
                         break
         else:
             cou += 1
             if ((cou) > max_tokens):
                 break
             if ((((typ) & (BracketParseAttr.CANCONTAINSVERBS))) == (
                     BracketParseAttr.NO)):
                 if (t.morph.language.is_cyrillic):
                     if (t.getMorphClassInDictionary() == MorphClass.VERB):
                         if (not t.morph.class0_.is_adjective
                                 and not t.morph.containsAttr(
                                     "страд.з.", None)):
                             if (t.chars.is_all_lower):
                                 norm = t.getNormalCaseText(
                                     None, False, MorphGender.UNDEFINED,
                                     False)
                                 if (not LanguageHelper.endsWith(
                                         norm, "СЯ")):
                                     if (len(br_list) > 1):
                                         break
                                     if (br_list[0].char0_ != '('):
                                         break
                 elif (t.morph.language.is_en):
                     if (t.morph.class0_ == MorphClass.VERB
                             and t.chars.is_all_lower):
                         break
                 r = t.getReferent()
                 if (r is not None and r.type_name == "ADDRESS"):
                     if (not t0.isChar('(')):
                         break
         if ((((typ) & (BracketParseAttr.CANBEMANYLINES))) !=
             (BracketParseAttr.NO)):
             if (t.is_newline_before):
                 if (t.newlines_before_count > 1):
                     break
                 crlf += 1
             continue
         if (t.is_newline_before):
             if (t.whitespaces_before_count > 15):
                 break
             crlf += 1
             if (not t.chars.is_all_lower):
                 if (t.previous is not None and t.previous.isChar('.')):
                     break
             if ((isinstance(t.previous, MetaToken))
                     and BracketHelper.canBeEndOfSequence(
                         (t.previous).end_token, False, None, False)):
                 break
         if (crlf > 1):
             if (len(br_list) > 1):
                 break
             if (crlf > 10):
                 break
         if (t.isChar(';') and t.is_newline_after):
             break
     if ((len(br_list) == 1 and br_list[0].can_be_open and
          (isinstance(last, MetaToken))) and last.is_newline_after):
         if (BracketHelper.canBeEndOfSequence((last).end_token, False, None,
                                              False)):
             return BracketSequenceToken(t0, last)
     if (len(br_list) < 1):
         return None
     i = 1
     while i < (len(br_list) - 1):
         if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'):
             br_list[i].can_be_open = True
             br_list[i + 1].can_be_close = True
         i += 1
     internals = None
     while len(br_list) > 3:
         i = len(br_list) - 1
         if ((br_list[i].can_be_close and br_list[i - 1].can_be_open
              and not BracketHelper.__canBeCloseChar(
                  br_list[i].char0_, br_list[0].char0_))
                 and BracketHelper.__canBeCloseChar(br_list[i].char0_,
                                                    br_list[i - 1].char0_)):
             del br_list[len(br_list) - 2:len(br_list) - 2 + 2]
             continue
         break
     while len(br_list) >= 4:
         changed = False
         i = 1
         while i < (len(br_list) - 2):
             if ((br_list[i].can_be_open and not br_list[i].can_be_close
                  and br_list[i + 1].can_be_close)
                     and not br_list[i + 1].can_be_open):
                 ok = False
                 if (BracketHelper.__mustBeCloseChar(
                         br_list[i + 1].char0_, br_list[i].char0_)
                         or br_list[i].char0_ != br_list[0].char0_):
                     ok = True
                     if ((i == 1 and ((i + 2) < len(br_list))
                          and br_list[i + 2].char0_ == ')')
                             and br_list[i + 1].char0_ != ')'
                             and BracketHelper.__canBeCloseChar(
                                 br_list[i + 1].char0_,
                                 br_list[i - 1].char0_)):
                         br_list[i + 2] = br_list[i + 1]
                 elif (i > 1 and ((i + 2) < len(br_list))
                       and BracketHelper.__mustBeCloseChar(
                           br_list[i + 2].char0_, br_list[i - 1].char0_)):
                     ok = True
                 if (ok):
                     if (internals is None):
                         internals = list()
                     internals.append(
                         BracketSequenceToken(br_list[i].source,
                                              br_list[i + 1].source))
                     del br_list[i:i + 2]
                     changed = True
                     break
             i += 1
         if (not changed):
             break
     res = None
     if ((len(br_list) >= 4 and br_list[1].can_be_open
          and br_list[2].can_be_close) and br_list[3].can_be_close
             and not br_list[3].can_be_open):
         if (BracketHelper.__canBeCloseChar(br_list[3].char0_,
                                            br_list[0].char0_)):
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[3].source)
             if (br_list[0].source.next0_ != br_list[1].source
                     or br_list[2].source.next0_ != br_list[3].source):
                 res.internal.append(
                     BracketSequenceToken(br_list[1].source,
                                          br_list[2].source))
             if (internals is not None):
                 res.internal.extend(internals)
     if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close)
             and not br_list[2].can_be_open):
         if ((((typ) & (BracketParseAttr.NEARCLOSEBRACKET))) !=
             (BracketParseAttr.NO)):
             if (BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                br_list[0].char0_)):
                 return BracketSequenceToken(br_list[0].source,
                                             br_list[1].source)
         ok = True
         if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                            br_list[0].char0_)
                 and BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                    br_list[0].char0_)
                 and br_list[1].can_be_close):
             t = br_list[1].source
             while t != br_list[2].source and t is not None:
                 if (t.is_newline_before):
                     ok = False
                     break
                 if (t.chars.is_letter and t.chars.is_all_lower):
                     ok = False
                     break
                 npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None):
                     t = npt.end_token
                 t = t.next0_
             if (ok):
                 t = br_list[0].source.next0_
                 while t != br_list[1].source and t is not None:
                     if (t.is_newline_before):
                         return BracketSequenceToken(
                             br_list[0].source, t.previous)
                     t = t.next0_
             lev1 = 0
             tt = br_list[0].source.previous
             first_pass2803 = True
             while True:
                 if first_pass2803: first_pass2803 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after or tt.is_table_control_char):
                     break
                 if (not ((isinstance(tt, TextToken)))):
                     continue
                 if (tt.chars.is_letter or tt.length_char > 1):
                     continue
                 ch = (tt).term[0]
                 if (BracketHelper.__canBeCloseChar(ch, br_list[0].char0_)):
                     lev1 += 1
                 elif (BracketHelper.__canBeCloseChar(
                         br_list[1].char0_, ch)):
                     lev1 -= 1
                     if (lev1 < 0):
                         return BracketSequenceToken(
                             br_list[0].source, br_list[1].source)
         if (ok and BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                   br_list[0].char0_)):
             intern = BracketSequenceToken(br_list[1].source,
                                           br_list[2].source)
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[2].source)
             res.internal.append(intern)
         elif (ok and BracketHelper.__canBeCloseChar(
                 br_list[2].char0_, br_list[1].char0_)
               and br_list[0].can_be_open):
             if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                br_list[0].char0_)):
                 intern = BracketSequenceToken(br_list[1].source,
                                               br_list[2].source)
                 res = BracketSequenceToken(br_list[0].source,
                                            br_list[2].source)
                 res.internal.append(intern)
             elif (len(br_list) == 3):
                 return None
     if (res is None and len(br_list) > 1 and br_list[1].can_be_close):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None
             and len(br_list) > 1 and BracketHelper.__canBeCloseChar(
                 br_list[1].char0_, br_list[0].char0_)):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None and len(br_list) == 2
             and br_list[0].char0_ == br_list[1].char0_):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is not None and internals is not None):
         for i in internals:
             if (i.begin_char < res.end_char):
                 res.internal.append(i)
     if (res is None):
         cou = 0
         tt = t0.next0_
         first_pass2804 = True
         while True:
             if first_pass2804: first_pass2804 = False
             else:
                 tt = tt.next0_
                 cou += 1
             if (not (tt is not None)): break
             if (tt.is_table_control_char):
                 break
             if (MiscHelper.canBeStartOfSentence(tt)):
                 break
             if (max_tokens > 0 and cou > max_tokens):
                 break
             mt = Utils.asObjectOrNull(tt, MetaToken)
             if (mt is None):
                 continue
             if (isinstance(mt.end_token, TextToken)):
                 if ((mt.end_token).isCharOf(BracketHelper.M_CLOSE_CHARS)):
                     bb = BracketHelper.Bracket(
                         Utils.asObjectOrNull(mt.end_token, TextToken))
                     if (bb.can_be_close and BracketHelper.__canBeCloseChar(
                             bb.char0_, br_list[0].char0_)):
                         return BracketSequenceToken(t0, tt)
     return res
Example #17
0
 def __compareForms(self, x: 'MorphWordForm', y: 'MorphWordForm') -> int:
     vx = Utils.ifNotNull(x.normal_full, x.normal_case)
     vy = Utils.ifNotNull(y.normal_full, y.normal_case)
     if (vx == vy):
         return 0
     if (Utils.isNullOrEmpty(vx)):
         return 1
     if (Utils.isNullOrEmpty(vy)):
         return -1
     lastx = vx[len(vx) - 1]
     lasty = vy[len(vy) - 1]
     if (x.class0_.is_proper_surname and not self.char_info.is_all_lower):
         if (LanguageHelper.endsWithEx(vx, "ОВ", "ЕВ", "ИН", None)):
             if (not y.class0_.is_proper_surname):
                 return -1
     if (y.class0_.is_proper_surname and not self.char_info.is_all_lower):
         if (LanguageHelper.endsWithEx(vy, "ОВ", "ЕВ", "ИН", None)):
             if (not x.class0_.is_proper_surname):
                 return 1
             if (len(vx) > len(vy)):
                 return -1
             if (len(vx) < len(vy)):
                 return 1
             return 0
     if (x.class0_ == y.class0_):
         if (x.class0_.is_adjective):
             if (lastx == 'Й' and lasty != 'Й'):
                 return -1
             if (lastx != 'Й' and lasty == 'Й'):
                 return 1
             if (not LanguageHelper.endsWith(vx, "ОЙ")
                     and LanguageHelper.endsWith(vy, "ОЙ")):
                 return -1
             if (LanguageHelper.endsWith(vx, "ОЙ")
                     and not LanguageHelper.endsWith(vy, "ОЙ")):
                 return 1
         if (x.class0_.is_noun):
             if (x.number == MorphNumber.SINGULAR
                     and y.number == MorphNumber.PLURAL and len(vx) <=
                 (len(vy) + 1)):
                 return -1
             if (x.number == MorphNumber.PLURAL
                     and y.number == MorphNumber.SINGULAR and len(vx) >=
                 (len(vy) - 1)):
                 return 1
         if (len(vx) < len(vy)):
             return -1
         if (len(vx) > len(vy)):
             return 1
         return 0
     if (x.class0_.is_adverb):
         return 1
     if (x.class0_.is_noun and x.is_in_dictionary):
         if (y.class0_.is_adjective and y.is_in_dictionary):
             if (not "к.ф." in y.misc.attrs):
                 return 1
         return -1
     if (x.class0_.is_adjective):
         if (not x.is_in_dictionary and y.class0_.is_noun
                 and y.is_in_dictionary):
             return 1
         return -1
     if (x.class0_.is_verb):
         if (y.class0_.is_noun or y.class0_.is_adjective
                 or y.class0_.is_preposition):
             return 1
         return -1
     if (y.class0_.is_adverb):
         return -1
     if (y.class0_.is_noun and y.is_in_dictionary):
         return 1
     if (y.class0_.is_adjective):
         if (((x.class0_.is_noun or x.class0_.is_proper_secname))
                 and x.is_in_dictionary):
             return -1
         if (x.class0_.is_noun and not y.is_in_dictionary):
             if (len(vx) < len(vy)):
                 return -1
         return 1
     if (y.class0_.is_verb):
         if (x.class0_.is_noun or x.class0_.is_adjective
                 or x.class0_.is_preposition):
             return -1
         if (x.class0_.is_proper):
             return -1
         return 1
     if (len(vx) < len(vy)):
         return -1
     if (len(vx) > len(vy)):
         return 1
     return 0
Example #18
0
 def canBeEquals(self, obj : 'Referent', typ : 'EqualType') -> bool:
     geo_ = Utils.asObjectOrNull(obj, GeoReferent)
     if (geo_ is None): 
         return False
     if (geo_.alpha2 is not None and geo_.alpha2 == self.alpha2): 
         return True
     if (self.is_city != geo_.is_city): 
         return False
     if (self.is_union != geo_.is_union): 
         return False
     if (self.is_union): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_REF): 
                 if (obj.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): 
                     return False
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_REF): 
                 if (self.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): 
                     return False
         return True
     ref1 = Utils.asObjectOrNull(self.getSlotValue(GeoReferent.ATTR_REF), Referent)
     ref2 = Utils.asObjectOrNull(geo_.getSlotValue(GeoReferent.ATTR_REF), Referent)
     if (ref1 is not None and ref2 is not None): 
         if (ref1 != ref2): 
             return False
     r = self.is_region or self.is_state
     r1 = geo_.is_region or geo_.is_state
     if (r != r1): 
         if (self.is_territory != geo_.is_territory): 
             return False
         return False
     eq_names = False
     for s in self.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME): 
             if (geo_.findSlot(s.type_name, s.value, True) is not None): 
                 eq_names = True
                 break
     if (not eq_names): 
         return False
     if (self.is_region and geo_.is_region): 
         typs1 = self.typs
         typs2 = geo_.typs
         ok = False
         for t in typs1: 
             if (t in typs2): 
                 ok = True
             else: 
                 for tt in typs2: 
                     if (LanguageHelper.endsWith(tt, t) or LanguageHelper.endsWith(t, tt)): 
                         ok = True
         if (not ok): 
             return False
     if (self.higher is not None and geo_.higher is not None): 
         if (GeoReferent.__checkRoundDep(self) or GeoReferent.__checkRoundDep(geo_)): 
             return False
         if (self.higher.canBeEquals(geo_.higher, typ)): 
             pass
         elif (geo_.higher.higher is not None and self.higher.canBeEquals(geo_.higher.higher, typ)): 
             pass
         elif (self.higher.higher is not None and self.higher.higher.canBeEquals(geo_.higher, typ)): 
             pass
         else: 
             return False
     return True
Example #19
0
 def tryParse(t: 'Token', items: typing.List['NounPhraseItem'],
              attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token):
         res = NounPhraseItem.tryParse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             return res
     if (rt is not None and items is not None and len(items) > 0):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.getReferent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = (t).term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and (wf).is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.getMorphClassInDictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.endsWithEx(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.containsAttr("к.ф.", None)):
                             if (t.getMorphClassInDictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.endsWithEx(s, "ИН", "ЕН", "ЫН",
                                                   None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.endsWith(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.containsAttr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.getIndexerItem(0).containsAttr("в.ср.ст.", None)):
                 return None
         mc1 = t.getMorphClassInDictionary()
         if (mc1 == MorphClass.VERB):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.containsAttr("дейст.з.", None)):
                         if (LanguageHelper.endsWith((t).term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if ((((isinstance(t0, TextToken))) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.endsWith((t0).term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new470(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun)) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.tryAccordVariant(
                         items, (0 if items is None else len(items)), v)):
                     is_doub = False
                     if (v.containsAttr("к.ф.", None)):
                         continue
                     if (v.containsAttr("собир.", None)
                             and not ((isinstance(t, NumberToken)))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.containsAttr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = (t).term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.endsWith(s, "ОВ")
                               and t.getMorphClassInDictionary().is_noun):
                             ok = False
                         elif (wf is not None
                               and ((wf.normal_case == "САМ"
                                     or wf.normal_case == "ТО"))):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif ((v.class0_.is_pronoun and
                    ((items is None or len(items) == 0 or
                      ((len(items) == 1
                        and items[0].can_be_adj_for_personal_pronoun))))
                    and wf is not None) and
                   ((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                       or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                      or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                     or wf.normal_case == "КТО"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.isValue("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 if (NounPhraseItem.tryAccordVariant(
                         items, (0 if items is None else len(items)), v)):
                     it.noun_morph.append(NounPhraseItemTextVar(v, t))
                     it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), False)
             for v in it.noun_morph:
                 v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.getNormalCaseText(
                             None, False, MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.tryParse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.tryParse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.tryParse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0)
                 if (npt1 is not None):
                     mc = LanguageHelper.getCaseAfterPreposition((t).lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = (tt2).term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.isValue("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None
Example #20
0
 def __try1(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
            ad: 'AnalyzerDataWithOntology') -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 1)):
         return None
     elif (li[0].typ != CityItemToken.ItemType.CITY):
         if (len(li) != 2 or li[0].typ != CityItemToken.ItemType.PROPERNAME
                 or li[1].typ != CityItemToken.ItemType.NOUN):
             return None
     i = 1
     oi.value = li[0].onto_item
     ok = not li[0].doubtful
     if ((ok and li[0].onto_item is not None
          and li[0].onto_item.misc_attr is None) and ad is not None):
         if (li[0].onto_item.owner != ad.local_ontology
                 and not li[0].onto_item.owner.is_ext_ontology):
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 pass
             else:
                 ok = False
     if (len(li) == 1 and li[0].begin_token.morph.class0_.is_adjective):
         sits = StreetItemToken.tryParseList(li[0].begin_token, None, 3)
         if (sits is not None and len(sits) == 2
                 and sits[1].typ == StreetItemType.NOUN):
             return None
     typ = None
     alttyp = None
     mc = li[0].morph
     if (i < len(li)):
         if (li[i].typ == CityItemToken.ItemType.NOUN):
             at = None
             if (not li[i].chars.is_all_lower
                     and (li[i].whitespaces_after_count < 2)):
                 sit = StreetItemToken.tryParse(li[i].end_token.next0_,
                                                None, False, None, False)
                 if (sit is not None and sit.typ == StreetItemType.NOUN):
                     at = AddressItemToken.tryParse(li[i].begin_token, None,
                                                    False, False, None)
                     if (at is not None):
                         at2 = AddressItemToken.tryParse(
                             li[i].end_token.next0_, None, False, False,
                             None)
                         if (at2 is not None and at2.typ
                                 == AddressItemToken.ItemType.STREET):
                             at = (None)
             if (at is None):
                 typ = li[i].value
                 alttyp = li[i].alt_value
                 if (li[i].begin_token.isValue("СТ", None)
                         and li[i].begin_token.chars.is_all_upper):
                     return None
                 if ((i + 1) == len(li)):
                     ok = True
                     if (not li[i].morph.case_.is_undefined):
                         mc = li[i].morph
                     i += 1
                 elif (ok):
                     i += 1
                 else:
                     tt0 = li[0].begin_token.previous
                     if ((isinstance(tt0, TextToken))
                             and (tt0.whitespaces_after_count < 3)):
                         if (tt0.isValue("МЭР", "МЕР")
                                 or tt0.isValue("ГЛАВА", None)
                                 or tt0.isValue("ГРАДОНАЧАЛЬНИК", None)):
                             ok = True
                             i += 1
     if (not ok and oi.value is not None
             and (len(oi.value.canonic_text) < 4)):
         return None
     if (not ok and li[0].begin_token.morph.class0_.is_proper_name):
         return None
     if (not ok):
         if (not MiscHelper.isExistsInDictionary(
                 li[0].begin_token, li[0].end_token, (MorphClass.ADJECTIVE)
                 | MorphClass.NOUN | MorphClass.PRONOUN)):
             ok = (li[0].geo_object_before or li[i - 1].geo_object_after)
             if (ok and li[0].begin_token == li[0].end_token):
                 mcc = li[0].begin_token.getMorphClassInDictionary()
                 if (mcc.is_proper_name or mcc.is_proper_surname):
                     ok = False
                 elif (li[0].geo_object_before
                       and (li[0].whitespaces_after_count < 2)):
                     ad1 = AddressItemToken.tryParse(
                         li[0].begin_token, None, False, False, None)
                     if (ad1 is not None and ad1.typ
                             == AddressItemToken.ItemType.STREET):
                         ad2 = AddressItemToken.tryParse(
                             li[0].end_token.next0_, None, False, False,
                             None)
                         if (ad2 is None or ad2.typ !=
                                 AddressItemToken.ItemType.STREET):
                             ok = False
                     elif (AddressItemToken.tryAttachOrg(li[0].begin_token)
                           is not None):
                         ok = False
         if (ok):
             if (li[0].kit.processReferent("PERSON", li[0].begin_token)
                     is not None):
                 ok = False
     if (not ok):
         ok = CityAttachHelper.checkYearAfter(li[0].end_token.next0_)
     if (not ok and ((not li[0].begin_token.morph.class0_.is_adjective
                      or li[0].begin_token != li[0].end_token))):
         ok = CityAttachHelper.checkCityAfter(li[0].end_token.next0_)
     if (not ok):
         return None
     if (i < len(li)):
         del li[i:i + len(li) - i]
     rt = None
     if (oi.value is None):
         if (li[0].value is not None and li[0].higher_geo is not None):
             cap = GeoReferent()
             cap._addName(li[0].value)
             cap._addTypCity(li[0].kit.base_language)
             cap.higher = li[0].higher_geo
             if (typ is not None):
                 cap._addTyp(typ)
             if (alttyp is not None):
                 cap._addTyp(alttyp)
             rt = ReferentToken(cap, li[0].begin_token, li[0].end_token)
         else:
             if (li[0].value is None):
                 return None
             if (typ is None):
                 if ((len(li) == 1
                      and li[0].begin_token.previous is not None
                      and li[0].begin_token.previous.is_hiphen) and
                     (isinstance(li[0].begin_token.previous.previous,
                                 ReferentToken)) and
                     (isinstance(
                         li[0].begin_token.previous.previous.getReferent(),
                         GeoReferent))):
                     pass
                 else:
                     return None
             else:
                 if (not LanguageHelper.endsWithEx(typ, "ПУНКТ",
                                                   "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ",
                                                   "ПОСЕЛОК")):
                     if (not LanguageHelper.endsWith(typ, "CITY")):
                         if (typ == "СТАНЦИЯ" and
                             ((MiscLocationHelper.checkGeoObjectBefore(
                                 li[0].begin_token)))):
                             pass
                         elif (len(li) > 1
                               and li[1].typ == CityItemToken.ItemType.NOUN
                               and li[0].typ
                               == CityItemToken.ItemType.CITY):
                             pass
                         else:
                             return None
                 if (li[0].begin_token.morph.class0_.is_adjective):
                     li[0].value = ProperNameHelper.getNameEx(
                         li[0].begin_token, li[0].end_token,
                         MorphClass.ADJECTIVE, li[1].morph.case_,
                         li[1].morph.gender, False, False)
     elif (isinstance(oi.value.referent, GeoReferent)):
         rt = ReferentToken._new719(
             Utils.asObjectOrNull(oi.value.referent, GeoReferent),
             li[0].begin_token, li[len(li) - 1].end_token, mc)
     elif (typ is None):
         typ = oi.value.typ
     if (rt is None):
         city = GeoReferent()
         city._addName(
             (li[0].value if oi.value is None else oi.value.canonic_text))
         if (typ is not None):
             city._addTyp(typ)
         else:
             city._addTypCity(li[0].kit.base_language)
         if (alttyp is not None):
             city._addTyp(alttyp)
         rt = ReferentToken._new719(city, li[0].begin_token,
                                    li[len(li) - 1].end_token, mc)
     if ((isinstance(rt.referent, GeoReferent)) and len(li) == 1
             and (rt.referent).is_city):
         if (rt.begin_token.previous is not None
                 and rt.begin_token.previous.isValue("Г", None)):
             rt.begin_token = rt.begin_token.previous
         elif ((rt.begin_token.previous is not None
                and rt.begin_token.previous.isChar('.')
                and rt.begin_token.previous.previous is not None)
               and rt.begin_token.previous.previous.isValue("Г", None)):
             rt.begin_token = rt.begin_token.previous.previous
         elif (rt.end_token.next0_ is not None
               and (rt.whitespaces_after_count < 2)
               and rt.end_token.next0_.isValue("Г", None)):
             rt.end_token = rt.end_token.next0_
             if (rt.end_token.next0_ is not None
                     and rt.end_token.next0_.isChar('.')):
                 rt.end_token = rt.end_token.next0_
     return rt
Example #21
0
 def tryAttachAlternate(
         t0: 'Token', ph0: 'PhoneReferent',
         pli: typing.List['PhoneItemToken']) -> 'PhoneItemToken':
     if (t0 is None):
         return None
     if (t0.isCharOf("\\/") and (isinstance(t0.next0_, NumberToken))
             and (t0.next0_.end_char - t0.next0_.begin_char) <= 1):
         pli1 = PhoneItemToken.tryAttachAll(t0.next0_)
         if (pli1 is not None and len(pli1) > 1):
             if (pli1[len(pli1) -
                      1].item_type == PhoneItemToken.PhoneItemType.DELIM):
                 del pli1[len(pli1) - 1]
             if (len(pli1) <= len(pli)):
                 num = ""
                 ii = 0
                 while ii < len(pli1):
                     p1 = pli1[ii]
                     p0 = pli[(len(pli) - len(pli1)) + ii]
                     if (p1.item_type != p0.item_type):
                         break
                     if (p1.item_type != PhoneItemToken.PhoneItemType.NUMBER
                             and p1.item_type !=
                             PhoneItemToken.PhoneItemType.DELIM):
                         break
                     if (p1.item_type == PhoneItemToken.PhoneItemType.NUMBER
                         ):
                         if (p1.length_char != p0.length_char):
                             break
                         num += p1.value
                     ii += 1
                 if (ii >= len(pli1)):
                     return PhoneItemToken._new2466(
                         t0, pli1[len(pli1) - 1].end_token,
                         PhoneItemToken.PhoneItemType.ALT, num)
         return PhoneItemToken._new2466(t0, t0.next0_,
                                        PhoneItemToken.PhoneItemType.ALT,
                                        t0.next0_.getSourceText())
     if (t0.is_hiphen and (isinstance(t0.next0_, NumberToken))
             and (t0.next0_.end_char - t0.next0_.begin_char) <= 1):
         t1 = t0.next0_.next0_
         ok = False
         if (t1 is None):
             ok = True
         elif (t1.is_newline_before or t1.isCharOf(",.")):
             ok = True
         if (ok):
             return PhoneItemToken._new2466(
                 t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT,
                 t0.next0_.getSourceText())
     if ((t0.isChar('(') and (isinstance(t0.next0_, NumberToken)) and
          (t0.next0_.end_char - t0.next0_.begin_char) == 1)
             and t0.next0_.next0_ is not None
             and t0.next0_.next0_.isChar(')')):
         return PhoneItemToken._new2466(t0, t0.next0_.next0_,
                                        PhoneItemToken.PhoneItemType.ALT,
                                        t0.next0_.getSourceText())
     if ((t0.isCharOf("/-") and (isinstance(t0.next0_, NumberToken))
          and ph0._m_template is not None) and LanguageHelper.endsWith(
              ph0._m_template,
              str(((t0.next0_.end_char - t0.next0_.begin_char) + 1)))):
         return PhoneItemToken._new2466(t0, t0.next0_,
                                        PhoneItemToken.PhoneItemType.ALT,
                                        t0.next0_.getSourceText())
     return None