def __init__(self, v: 'MorphRuleVariant' = None, word: str = None) -> None: super().__init__(None) self.normal_full = None self.normal_case = None self.misc = None self.undef_coef = 0 self.tag = None if (v is None): return v.copyTo(self) self.misc = v.misc_info self.tag = (v) if (v.normal_tail is not None and word is not None): word_begin = word if (LanguageHelper.endsWith(word, v.tail)): word_begin = word[0:0 + len(word) - len(v.tail)] if (len(v.normal_tail) > 0): self.normal_case = (word_begin + v.normal_tail) else: self.normal_case = word_begin if (v.full_normal_tail is not None and word is not None): word_begin = word if (LanguageHelper.endsWith(word, v.tail)): word_begin = word[0:0 + len(word) - len(v.tail)] if (len(v.full_normal_tail) > 0): self.normal_full = (word_begin + v.full_normal_tail) else: self.normal_full = word_begin
def _DelSurnameEnd(s : str) -> str: if (len(s) < 3): return s if (LanguageHelper.endsWithEx(s, "А", "У", "Е", None)): return s[0:0+len(s) - 1] if (LanguageHelper.endsWith(s, "ОМ") or LanguageHelper.endsWith(s, "ЫМ")): return s[0:0+len(s) - 2] if (LanguageHelper.endsWithEx(s, "Я", "Ю", None, None)): ch1 = s[len(s) - 2] if (ch1 == 'Н' or ch1 == 'Л'): return s[0:0+len(s) - 1] + "Ь" return s
def __tryAttachMoscowAO(li: typing.List['TerrItemToken'], ad: 'AnalyzerData') -> 'ReferentToken': if (li[0].termin_item is None or not li[0].termin_item.is_moscow_region): return None if (li[0].is_doubt): ok = False if (CityAttachHelper.checkCityAfter(li[0].end_token.next0_)): ok = True else: ali = AddressItemToken.tryParseList(li[0].end_token.next0_, None, 2) if (ali is not None and len(ali) > 0 and ali[0].typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): return None reg = GeoReferent() typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ" reg._addTyp(typ) name = li[0].termin_item.canonic_text if (LanguageHelper.endsWith(name, typ)): name = name[0:0 + len(name) - len(typ) - 1].strip() reg._addName(name) return ReferentToken(reg, li[0].begin_token, li[0].end_token)
def canHasRef(self, r: 'Referent') -> bool: """ Проверка, что этот референт может выступать в качестве ATTR_REF Args: r(Referent): """ nam = self.name if (nam is None or r is None): return False if (isinstance(r, GeoReferent)): g = Utils.asObjectOrNull(r, GeoReferent) if (LanguageHelper.endsWithEx(nam, "президент", "губернатор", None, None)): return g.is_state or g.is_region if (nam == "мэр" or nam == "градоначальник"): return g.is_city if (nam == "глава"): return True return False if (r.type_name == "ORGANIZATION"): if ((LanguageHelper.endsWith(nam, "губернатор") or nam == "мэр" or nam == "градоначальник") or nam == "президент"): return False if ("министр" in nam): if (r.findSlot(None, "министерство", True) is None): return False if (nam.endswith("директор")): if ((r.findSlot(None, "суд", True)) is not None): return False return True return False
def getDocTypes(name: str, name2: str) -> typing.List[str]: res = list() if (name is None): return res if (name == "АРЕНДОДАТЕЛЬ"): res.append("ДОГОВОР АРЕНДЫ") res.append("ДОГОВОР СУБАРЕНДЫ") elif (name == "АРЕНДАТОР"): res.append("ДОГОВОР АРЕНДЫ") elif (name == "СУБАРЕНДАТОР"): res.append("ДОГОВОР СУБАРЕНДЫ") elif (name == "НАЙМОДАТЕЛЬ" or name == "НАНИМАТЕЛЬ"): res.append("ДОГОВОР НАЙМА") elif (name == "АГЕНТ" or name == "ПРИНЦИПАЛ"): res.append("АГЕНТСКИЙ ДОГОВОР") elif (name == "ПРОДАВЕЦ" or name == "ПОКУПАТЕЛЬ"): res.append("ДОГОВОР КУПЛИ-ПРОДАЖИ") elif (name == "ЗАКАЗЧИК" or name == "ИСПОЛНИТЕЛЬ" or LanguageHelper.endsWith(name, "ПОДРЯДЧИК")): res.append("ДОГОВОР УСЛУГ") elif (name == "ПОСТАВЩИК"): res.append("ДОГОВОР ПОСТАВКИ") elif (name == "ЛИЦЕНЗИАР" or name == "ЛИЦЕНЗИАТ"): res.append("ЛИЦЕНЗИОННЫЙ ДОГОВОР") elif (name == "СТРАХОВЩИК" or name == "СТРАХОВАТЕЛЬ"): res.append("ДОГОВОР СТРАХОВАНИЯ") if (name2 is None): return res tmp = ParticipantToken.getDocTypes(name2, None) for i in range(len(res) - 1, -1, -1): if (not res[i] in tmp): del res[i] return res
def mergeSlots(self, obj: 'Referent', merge_statistic: bool = True) -> None: ph = Utils.asObjectOrNull(obj, PhoneReferent) if (ph is None): return if (ph.country_code is not None and self.country_code is None): self.country_code = ph.country_code if (ph.number is not None and LanguageHelper.endsWith(ph.number, self.number)): self.number = ph.number
def lemma(self) -> str: """ Лемма (вариант морфологической нормализации) """ if (self.__m_lemma is not None): return self.__m_lemma res = None if (self.word_forms is not None and len(self.word_forms) > 0): if (len(self.word_forms) == 1): res = (Utils.ifNotNull(self.word_forms[0].normal_full, self.word_forms[0].normal_case)) if (res is None and not self.char_info.is_all_lower): for m in self.word_forms: if (m.class0_.is_proper_surname): s = Utils.ifNotNull(m.normal_full, Utils.ifNotNull(m.normal_case, "")) if (LanguageHelper.endsWithEx(s, "ОВ", "ЕВ", None, None)): res = s break elif (m.class0_.is_proper_name and m.is_in_dictionary): return m.normal_case if (res is None): best = None for m in self.word_forms: if (best is None): best = m elif (self.__compareForms(best, m) > 0): best = m res = (Utils.ifNotNull(best.normal_full, best.normal_case)) if (res is not None): if (LanguageHelper.endsWithEx(res, "АНЫЙ", "ЕНЫЙ", None, None)): res = (res[0:0 + len(res) - 3] + "ННЫЙ") elif (LanguageHelper.endsWith(res, "ЙСЯ")): res = res[0:0 + len(res) - 2] elif (LanguageHelper.endsWith(res, "АНИЙ") and res == self.term): for wf in self.word_forms: if (wf.is_in_dictionary): return res return res[0:0 + len(res) - 1] + "Е" return res return Utils.ifNotNull(self.term, "?")
def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None: merge_statistic = True for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): nam = s.value if (LanguageHelper.isLatinChar(nam[0])): if (not lang.is_en): continue elif (lang.is_en): continue if (LanguageHelper.endsWith(nam, " ССР")): continue self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_TYPE): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.is_territory): if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True) if (s is not None): self.slots.remove(s) if (self.is_state): for s in self.slots: if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): self.slots.remove(s) break if (self.is_city): s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True))) if (s is not None): for ss in self.slots: if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): self.slots.remove(s) break has = False i = 0 while i < len(self.slots): if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): if (not has): has = True else: del self.slots[i] i -= 1 i += 1 self._mergeExtReferents(obj)
def canBeGeneralFor(self, obj: 'Referent') -> bool: if (not self.__canBeEqual(obj, Referent.EqualType.WITHINONETEXT, True)): return False ph = Utils.asObjectOrNull(obj, PhoneReferent) if (self.country_code is not None and ph.country_code is None): return False if (self.add_number is None): if (ph.add_number is not None): return True elif (ph.add_number is None): return False if (LanguageHelper.endsWith(ph.number, self.number)): return True return False
def __canBeEqual(self, obj: 'Referent', typ: 'EqualType', ignore_add_number: bool) -> bool: ph = Utils.asObjectOrNull(obj, PhoneReferent) if (ph is None): return False if (ph.country_code is not None and self.country_code is not None): if (ph.country_code != self.country_code): return False if (ignore_add_number): if (self.add_number is not None and ph.add_number is not None): if (ph.add_number != self.add_number): return False elif (self.add_number is not None or ph.add_number is not None): if (self.add_number != ph.add_number): return False if (self.number is None or ph.number is None): return False if (self.number == ph.number): return True if (typ != Referent.EqualType.DIFFERENTTEXTS): if (LanguageHelper.endsWith(self.number, ph.number) or LanguageHelper.endsWith(ph.number, self.number)): return True return False
def process(self, word : str) -> typing.List['MorphWordForm']: """ Обработка одного слова Args: word(str): слово должно быть в верхнем регистре """ if (Utils.isNullOrEmpty(word)): return None res = None if (len(word) > 1): i = 0 while i < len(word): ch = word[i] if (LanguageHelper.isCyrillicVowel(ch) or LanguageHelper.isLatinVowel(ch)): break i += 1 if (i >= len(word)): return res mvs = [ ] tn = self.m_root i = 0 while i <= len(word): if (tn.lazy_pos > 0): self.__loadTreeNode(tn) if (tn.rules is not None): word_begin = None word_end = None if (i == 0): word_end = word elif (i < len(word)): word_end = word[i:] else: word_end = "" if (res is None): res = list() for r in tn.rules: wrapmvs14 = RefOutArgWrapper(None) inoutres15 = Utils.tryGetValue(r.variants, word_end, wrapmvs14) mvs = wrapmvs14.value if (inoutres15): if (word_begin is None): if (i == len(word)): word_begin = word elif (i > 0): word_begin = word[0:0+i] else: word_begin = "" r.processResult(res, word_begin, mvs) if (tn.nodes is None or i >= len(word)): break ch = ord(word[i]) wraptn16 = RefOutArgWrapper(None) inoutres17 = Utils.tryGetValue(tn.nodes, ch, wraptn16) tn = wraptn16.value if (not inoutres17): break i += 1 need_test_unknown_vars = True if (res is not None): for r in res: if ((r.class0_.is_pronoun or r.class0_.is_noun or r.class0_.is_adjective) or (r.class0_.is_misc and r.class0_.is_conjunction) or r.class0_.is_preposition): need_test_unknown_vars = False elif (r.class0_.is_adverb and r.normal_case is not None): if (not LanguageHelper.endsWithEx(r.normal_case, "О", "А", None, None)): need_test_unknown_vars = False elif (r.normal_case == "МНОГО"): need_test_unknown_vars = False elif (r.class0_.is_verb and len(res) > 1): ok = False for rr in res: if (rr != r and rr.class0_ != r.class0_): ok = True break if (ok and not LanguageHelper.endsWith(word, "ИМ")): need_test_unknown_vars = False if (need_test_unknown_vars and LanguageHelper.isCyrillicChar(word[0])): gl = 0 sog = 0 j = 0 while j < len(word): if (LanguageHelper.isCyrillicVowel(word[j])): gl += 1 else: sog += 1 j += 1 if ((gl < 2) or (sog < 2)): need_test_unknown_vars = False if (need_test_unknown_vars and res is not None and len(res) == 1): if (res[0].class0_.is_verb): if ("н.вр." in res[0].misc.attrs and "нес.в." in res[0].misc.attrs and not "страд.з." in res[0].misc.attrs): need_test_unknown_vars = False elif ("б.вр." in res[0].misc.attrs and "сов.в." in res[0].misc.attrs): need_test_unknown_vars = False elif (res[0].normal_case is not None and LanguageHelper.endsWith(res[0].normal_case, "СЯ")): need_test_unknown_vars = False if (res[0].class0_.is_undefined and "прдктв." in res[0].misc.attrs): need_test_unknown_vars = False if (need_test_unknown_vars): if (self.m_root_reverce is None): return res tn = self.m_root_reverce tn0 = None for i in range(len(word) - 1, -1, -1): if (tn.lazy_pos > 0): self.__loadTreeNode(tn) ch = ord(word[i]) if (tn.nodes is None): break wrapnext18 = RefOutArgWrapper(None) inoutres19 = Utils.tryGetValue(tn.nodes, ch, wrapnext18) next0_ = wrapnext18.value if (not inoutres19): break tn = next0_ if (tn.lazy_pos > 0): self.__loadTreeNode(tn) if (tn.reverce_variants is not None): tn0 = tn break else: i = -1 if (tn0 is not None): glas = i < 4 while i >= 0: if (LanguageHelper.isCyrillicVowel(word[i]) or LanguageHelper.isLatinVowel(word[i])): glas = True break i -= 1 if (glas): for mv in tn0.reverce_variants: if (((not mv.class0_.is_verb and not mv.class0_.is_adjective and not mv.class0_.is_noun) and not mv.class0_.is_proper_surname and not mv.class0_.is_proper_geo) and not mv.class0_.is_proper_secname): continue ok = False for rr in res: if (rr.is_in_dictionary): if (rr.class0_ == mv.class0_ or rr.class0_.is_noun): ok = True break if (not mv.class0_.is_adjective and rr.class0_.is_verb): ok = True break if (ok): continue if (len(mv.tail) > 0 and not LanguageHelper.endsWith(word, mv.tail)): continue r = MorphWordForm(mv, word) if (not MorphWordForm._hasMorphEquals(res, r)): r.undef_coef = mv.coef if (res is None): res = list() res.append(r) if (word == "ПРИ" and res is not None): for i in range(len(res) - 1, -1, -1): if (res[i].class0_.is_proper_geo): del res[i] else: i = -1 if (res is None or len(res) == 0): return None MorphEngine.__sort(res, word) for v in res: if (v.normal_case is None): v.normal_case = word if (v.class0_.is_verb): if (v.normal_full is None and LanguageHelper.endsWith(v.normal_case, "ТЬСЯ")): v.normal_full = v.normal_case[0:0+len(v.normal_case) - 2] v.language = self.language if (v.class0_.is_preposition): v.normal_case = LanguageHelper.normalizePreposition(v.normal_case) mc = MorphClass() for i in range(len(res) - 1, -1, -1): if (not res[i].is_in_dictionary and res[i].class0_.is_adjective and len(res) > 1): if ("к.ф." in res[i].misc.attrs or "неизм." in res[i].misc.attrs): del res[i] continue if (res[i].is_in_dictionary): mc.value |= res[i].class0_.value else: i = -1 if (mc == MorphClass.VERB and len(res) > 1): for r in res: if (r.undef_coef > (100) and r.class0_ == MorphClass.ADJECTIVE): r.undef_coef = (0) if (len(res) == 0): return None return res
def __tryNounName(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', always: bool) -> 'ReferentToken': oi.value = (None) if (li is None or (len(li) < 2) or ((li[0].typ != CityItemToken.ItemType.NOUN and li[0].typ != CityItemToken.ItemType.MISC))): return None ok = not li[0].doubtful if (ok and li[0].typ == CityItemToken.ItemType.MISC): ok = False typ = (None if li[0].typ == CityItemToken.ItemType.MISC else li[0].value) typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else li[0].alt_value) prob_adj = None i1 = 1 org0_ = None if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and (((LanguageHelper.endsWith(typ, "ПОСЕЛОК") or LanguageHelper.endsWith(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ") or typ == "СЕЛО"))): if (li[i1].begin_token == li[i1].end_token): ooo = AddressItemToken.tryAttachOrg(li[i1].begin_token) if (ooo is not None and ooo.ref_token is not None): return None typ2 = li[i1].value if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.isValue("СТ", None) and ((i1 + 1) < len(li))): m = li[i1 + 1].morph if (m.number == MorphNumber.PLURAL): prob_adj = "СТАРЫЕ" elif (m.gender == MorphGender.FEMINIE): prob_adj = "СТАРАЯ" elif (m.gender == MorphGender.MASCULINE): prob_adj = "СТАРЫЙ" else: prob_adj = "СТАРОЕ" i1 += 1 name = Utils.ifNotNull(li[i1].value, ((None if li[i1].onto_item is None else li[i1].onto_item.canonic_text))) alt_name = li[i1].alt_value if (name is None): return None mc = li[0].morph if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО" or li[0].typ == CityItemToken.ItemType.MISC))): if (typ is None and ((i1 + 1) < len(li)) and li[i1 + 1].typ == CityItemToken.ItemType.NOUN): return None oi.value = li[i1].onto_item if (oi.value is not None): name = oi.value.canonic_text if (len(name) > 2 or oi.value.misc_attr is not None): if (not li[1].doubtful or ((oi.value is not None and oi.value.misc_attr is not None))): ok = True elif (not ok and not li[1].is_newline_before): if (li[0].geo_object_before or li[1].geo_object_after): ok = True elif (StreetDefineHelper.checkStreetAfter( li[1].end_token.next0_)): ok = True elif (li[1].end_token.next0_ is not None and (isinstance(li[1].end_token.next0_.getReferent(), DateReferent))): ok = True elif ((li[1].whitespaces_before_count < 2) and li[1].onto_item is not None): if (li[1].is_newline_after): ok = True if (li[1].doubtful and li[1].end_token.next0_ is not None and li[1].end_token.chars == li[1].end_token.next0_.chars): ok = False if (li[0].begin_token.previous is not None and li[0].begin_token.previous.isValue("В", None)): ok = True if (not ok): ok = CityAttachHelper.checkYearAfter(li[1].end_token.next0_) if (not ok): ok = CityAttachHelper.checkCityAfter(li[1].end_token.next0_) elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME or li[i1].typ == CityItemToken.ItemType.CITY)): if (((li[0].value == "АДМИНИСТРАЦИЯ" or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1): return None if (li[i1].is_newline_before): if (len(li) != 2): return None if (not li[0].doubtful): ok = True if (len(name) < 2): ok = False elif ((len(name) < 3) and li[0].morph.number != MorphNumber.SINGULAR): ok = False if (li[i1].doubtful and not li[i1].geo_object_after and not li[0].geo_object_before): if (li[i1].morph.case_.is_genitive): if (((li[0].begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore( li[0].begin_token))) and ((li[i1].end_token.next0_ is None or MiscLocationHelper.checkGeoObjectAfter( li[i1].end_token.next0_) or AddressItemToken.checkHouseAfter( li[i1].end_token.next0_, False, True)))): pass else: ok = False else: rt0 = li[i1].kit.processReferent( "PERSONPROPERTY", li[0].begin_token.previous) if (rt0 is not None): rt1 = li[i1].kit.processReferent( "PERSON", li[i1].begin_token) if (rt1 is not None): ok = False npt = NounPhraseHelper.tryParse(li[i1].begin_token, NounPhraseParseAttr.NO, 0) if (npt is not None): if (npt.end_token.end_char > li[i1].end_char and len(npt.adjectives) > 0 and not npt.adjectives[0].end_token.next0_.is_comma): ok = False elif (TerrItemToken._m_unknown_regions.tryParse( npt.end_token, TerminParseAttr.FULLWORDSONLY) is not None): ok1 = False if (li[0].begin_token.previous is not None): ttt = li[0].begin_token.previous if (ttt.is_comma and ttt.previous is not None): ttt = ttt.previous geo_ = Utils.asObjectOrNull( ttt.getReferent(), GeoReferent) if (geo_ is not None and not geo_.is_city): ok1 = True if (npt.end_token.next0_ is not None): ttt = npt.end_token.next0_ if (ttt.is_comma and ttt.next0_ is not None): ttt = ttt.next0_ geo_ = Utils.asObjectOrNull( ttt.getReferent(), GeoReferent) if (geo_ is not None and not geo_.is_city): ok1 = True if (not ok1): return None if (li[0].value == "ПОРТ"): if (li[i1].chars.is_all_upper or li[i1].chars.is_latin_letter): return None elif (li[0].geo_object_before): ok = True elif (li[i1].geo_object_after and not li[i1].is_newline_after): ok = True else: ok = CityAttachHelper.checkYearAfter(li[i1].end_token.next0_) if (not ok): ok = CityAttachHelper.checkStreetAfter(li[i1].end_token.next0_) if (not ok and li[0].begin_token.previous is not None and li[0].begin_token.previous.isValue("В", None)): ok = True else: return None if (not ok and not always): if (MiscLocationHelper.checkNearBefore(li[0].begin_token.previous) is None): return None if (len(li) > (i1 + 1)): del li[i1 + 1:i1 + 1 + len(li) - i1 - 1] city = GeoReferent() if (oi.value is not None and oi.value.referent is not None): city = (Utils.asObjectOrNull(oi.value.referent.clone(), GeoReferent)) city.occurrence.clear() if (not li[0].morph.case_.is_undefined and li[0].morph.gender != MorphGender.UNDEFINED): if (li[i1].end_token.morph.class0_.is_adjective and li[i1].begin_token == li[i1].end_token): nam = ProperNameHelper.getNameEx( li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE, li[0].morph.case_, li[0].morph.gender, False, False) if (nam is not None and nam != name): name = nam if (li[0].morph.case_.is_nominative): if (alt_name is not None): city._addName(alt_name) alt_name = (None) city._addName(name) if (prob_adj is not None): city._addName(prob_adj + " " + name) if (alt_name is not None): city._addName(alt_name) if (prob_adj is not None): city._addName(prob_adj + " " + alt_name) if (typ is not None): city._addTyp(typ) elif (not city.is_city): city._addTypCity(li[0].kit.base_language) if (typ2 is not None): city._addTyp(typ2.lower()) if (li[0].higher_geo is not None and GeoOwnerHelper.canBeHigher(li[0].higher_geo, city)): city.higher = li[0].higher_geo if (li[0].typ == CityItemToken.ItemType.MISC): del li[0] res = ReferentToken._new719(city, li[0].begin_token, li[len(li) - 1].end_token, mc) if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): num = Utils.asObjectOrNull(res.end_token.next0_.next0_, NumberToken) if ((num.typ == NumberSpellingType.DIGIT and not num.morph.class0_.is_adjective and num.int_value is not None) and (num.int_value < 50)): for s in city.slots: if (s.type_name == GeoReferent.ATTR_NAME): city.uploadSlot(s, "{0}-{1}".format(s.value, num.value)) res.end_token = num if (li[0].begin_token == li[0].end_token and li[0].begin_token.isValue("ГОРОДОК", None)): if (AddressItemToken.checkHouseAfter(res.end_token.next0_, True, False)): return None return res
def __TryAttach_(self, pli: typing.List['PhoneItemToken'], ind: int, is_phone_before: bool, prev_phone: 'PhoneReferent', lev: int = 0) -> 'ReferentToken': if (ind >= len(pli) or lev > 4): return None country_code = None city_code = None j = ind if (prev_phone is not None and prev_phone._m_template is not None and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): tmp = io.StringIO() jj = j first_pass3119 = True while True: if first_pass3119: first_pass3119 = False else: jj += 1 if (not (jj < len(pli))): break if (pli[jj].item_type == PhoneItemToken.PhoneItemType.NUMBER): print(len(pli[jj].value), end="", file=tmp) elif (pli[jj].item_type == PhoneItemToken.PhoneItemType.DELIM): if (pli[jj].value == " "): break print(pli[jj].value, end="", file=tmp) continue else: break templ0 = Utils.toStringStringIO(tmp) if (templ0 == prev_phone._m_template): if ((jj + 1) < len(pli)): if (pli[jj + 1].item_type == PhoneItemToken.PhoneItemType.PREFIX and (jj + 2) == len(pli)): pass else: del pli[jj + 1:jj + 1 + len(pli) - jj - 1] break if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE): country_code = pli[j].value if (country_code != "8"): cc = PhoneHelper.getCountryPrefix(country_code) if (cc is not None and (len(cc) < len(country_code))): city_code = country_code[len(cc):] country_code = cc j += 1 elif ((j < len(pli)) and pli[j].can_be_country_prefix): k = j + 1 if ((k < len(pli)) and pli[k].item_type == PhoneItemToken.PhoneItemType.DELIM): k += 1 rrt = self.__TryAttach_(pli, k, is_phone_before, None, lev + 1) if (rrt is not None): if ((((is_phone_before and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM and pli[j + 1].begin_token.is_hiphen) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and len(pli[j].value) == 3) and ((j + 2) < len(pli)) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER) and len(pli[j + 2].value) == 3): pass else: country_code = pli[j].value j += 1 if (((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and ((pli[j].value[0] == '8' or pli[j].value[0] == '7'))) and country_code is None): if (len(pli[j].value) == 1): country_code = pli[j].value j += 1 elif (len(pli[j].value) == 4): country_code = pli[j].value[0:0 + 1] if (city_code is None): city_code = pli[j].value[1:] else: city_code += pli[j].value[1:] j += 1 elif (len(pli[j].value) == 11 and j == (len(pli) - 1) and is_phone_before): ph0 = PhoneReferent() if (pli[j].value[0] != '8'): ph0.country_code = pli[j].value[0:0 + 1] ph0.number = pli[j].value[1:1 + 3] + pli[j].value[4:] return ReferentToken(ph0, pli[0].begin_token, pli[j].end_token) elif (city_code is None and len(pli[j].value) > 3 and ((j + 1) < len(pli))): sum0_ = 0 for it in pli: if (it.item_type == PhoneItemToken.PhoneItemType.NUMBER): sum0_ += len(it.value) if (sum0_ == 11): city_code = pli[j].value[1:] j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.CITYCODE): if (city_code is None): city_code = pli[j].value else: city_code += pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): j += 1 if ((country_code == "8" and city_code is None and ((j + 3) < len(pli))) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (len(pli[j].value) == 3 or len(pli[j].value) == 4): city_code = pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): j += 1 normal_num_len = 0 if (country_code == "421"): normal_num_len = 9 num = io.StringIO() templ = io.StringIO() part_length = list() delim = None ok = False additional = None std = False if (country_code is not None and ((j + 4) < len(pli)) and j > 0): if (((((pli[j - 1].value == "-" or pli[j - 1].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 3].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 4].item_type == PhoneItemToken.PhoneItemType.NUMBER): if ((((len(pli[j].value) + len(pli[j + 2].value)) == 6 or ( (len(pli[j].value) == 4 and len(pli[j + 2].value) == 5)))) and ((len(pli[j + 4].value) == 4 or len(pli[j + 4].value) == 1))): print(pli[j].value, end="", file=num) print(pli[j + 2].value, end="", file=num) print(pli[j + 4].value, end="", file=num) print("{0}{1}{2}{3}{4}".format(len(pli[j].value), pli[j + 1].value, len(pli[j + 2].value), pli[j + 3].value, len(pli[j + 4].value)), end="", file=templ, flush=True) std = True ok = True j += 5 first_pass3120 = True while True: if first_pass3120: first_pass3120 = False else: j += 1 if (not (j < len(pli))): break if (std): break if (pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): if (pli[j].is_in_brackets): continue if (j > 0 and pli[j - 1].is_in_brackets): continue if (templ.tell() > 0): print(pli[j].value, end="", file=templ) if (delim is None): delim = pli[j].value elif (pli[j].value != delim): if ((len(part_length) == 2 and ((part_length[0] == 3 or part_length[0] == 4)) and city_code is None) and part_length[1] == 3): city_code = Utils.toStringStringIO(num)[0:0 + part_length[0]] Utils.removeStringIO(num, 0, part_length[0]) del part_length[0] delim = pli[j].value continue if (is_phone_before and ((j + 1) < len(pli)) and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (num.tell() < 6): continue if (normal_num_len > 0 and (num.tell() + len(pli[j + 1].value)) == normal_num_len): continue break else: continue ok = False elif (pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): if ((num.tell() + len(pli[j].value)) > 13): if (j > 0 and pli[j - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): j -= 1 ok = True break print(pli[j].value, end="", file=num) part_length.append(len(pli[j].value)) print(len(pli[j].value), end="", file=templ) ok = True if (num.tell() > 10): j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): additional = pli[j].value j += 1 break elif (pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): additional = pli[j].value j += 1 break else: break if ((j == (len(pli) - 1) and pli[j].is_in_brackets and ((len(pli[j].value) == 3 or len(pli[j].value) == 4))) and additional is None): additional = pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[j].is_in_brackets): is_phone_before = True j += 1 if ((country_code is None and city_code is not None and len(city_code) > 3) and (num.tell() < 8) and city_code[0] != '8'): if ((len(city_code) + num.tell()) == 10): pass else: cc = PhoneHelper.getCountryPrefix(city_code) if (cc is not None): if (len(cc) > 1 and (len(city_code) - len(cc)) > 1): country_code = cc city_code = city_code[len(cc):] if (country_code is None and city_code is not None and city_code.startswith("00")): cc = PhoneHelper.getCountryPrefix(city_code[2:]) if (cc is not None): if (len(city_code) > (len(cc) + 3)): country_code = cc city_code = city_code[len(cc) + 2:] if (num.tell() == 0 and city_code is not None): if (len(city_code) == 10): print(city_code[3:], end="", file=num) part_length.append(num.tell()) city_code = city_code[0:0 + 3] ok = True elif (((len(city_code) == 9 or len(city_code) == 11 or len(city_code) == 8)) and ((is_phone_before or country_code is not None))): print(city_code, end="", file=num) part_length.append(num.tell()) city_code = (None) ok = True if (num.tell() < 4): ok = False if (num.tell() < 7): if (city_code is not None and (len(city_code) + num.tell()) > 7): if (not is_phone_before and len(city_code) == 3): ii = 0 while ii < len(part_length): if (part_length[ii] == 3): pass elif (part_length[ii] > 3): break elif ((ii < (len(part_length) - 1)) or (part_length[ii] < 2)): break ii += 1 if (ii >= len(part_length)): if (country_code == "61"): pass else: ok = False elif (((num.tell() == 6 or num.tell() == 5)) and ((len(part_length) >= 1 and len(part_length) <= 3)) and is_phone_before): if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].kind == PhoneKind.HOME): ok = False elif (prev_phone is not None and prev_phone.number is not None and ((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)))): pass elif (num.tell() > 4 and prev_phone is not None and Utils.toStringStringIO(templ) == prev_phone._m_template): ok = True else: ok = False if (delim == "." and country_code is None and city_code is None): ok = False if ((is_phone_before and country_code is None and city_code is None) and num.tell() > 10): cc = PhoneHelper.getCountryPrefix(Utils.toStringStringIO(num)) if (cc is not None): if ((num.tell() - len(cc)) == 9): country_code = cc Utils.removeStringIO(num, 0, len(cc)) ok = True if (ok): if (std): pass elif (prev_phone is not None and prev_phone.number is not None and (((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)) or prev_phone._m_template == Utils.toStringStringIO(templ)))): pass elif ((len(part_length) == 3 and part_length[0] == 3 and part_length[1] == 2) and part_length[2] == 2): pass elif (len(part_length) == 3 and is_phone_before): pass elif ((len(part_length) == 4 and (((part_length[0] + part_length[1]) == 3)) and part_length[2] == 2) and part_length[3] == 2): pass elif ((len(part_length) == 4 and part_length[0] == 3 and part_length[1] == 3) and part_length[2] == 2 and part_length[3] == 2): pass elif (len(part_length) == 5 and (part_length[1] + part_length[2]) == 4 and (part_length[3] + part_length[4]) == 4): pass elif (len(part_length) > 4): ok = False elif (len(part_length) > 3 and city_code is not None): ok = False elif ((is_phone_before or city_code is not None or country_code is not None) or additional is not None): ok = True else: ok = False if (((num.tell() == 6 or num.tell() == 7)) and (len(part_length) < 4) and j > 0): next_ph = self.__getNextPhone(pli[j - 1].end_token.next0_, lev + 1) if (next_ph is not None): d = len(next_ph.number) - num.tell() if (d == 0 or d == 3 or d == 4): ok = True end = (pli[j - 1].end_token if j > 0 else None) if (end is None): ok = False if ((ok and city_code is None and country_code is None) and prev_phone is None and not is_phone_before): if (not end.is_whitespace_after and end.next0_ is not None): tt = end.next0_ if (tt.isCharOf(".,)") and tt.next0_ is not None): tt = tt.next0_ if (not tt.is_whitespace_before): ok = False if (not ok): return None if (templ.tell() > 0 and not str.isdigit( Utils.getCharAtStringIO(templ, templ.tell() - 1))): Utils.setLengthStringIO(templ, templ.tell() - 1) if ((country_code is None and city_code is not None and len(city_code) > 3) and num.tell() > 6): cc = PhoneHelper.getCountryPrefix(city_code) if (cc is not None and ((len(cc) + 1) < len(city_code))): country_code = cc city_code = city_code[len(cc):] ph = PhoneReferent() if (country_code != "8" and country_code is not None): ph.country_code = country_code number = Utils.toStringStringIO(num) if ((city_code is None and num.tell() > 7 and len(part_length) > 0) and (part_length[0] < 5)): city_code = number[0:0 + part_length[0]] number = number[part_length[0]:] if (city_code is None and num.tell() == 11 and Utils.getCharAtStringIO(num, 0) == '8'): city_code = number[1:1 + 3] number = number[4:] if (city_code is None and num.tell() == 10): city_code = number[0:0 + 3] number = number[3:] if (city_code is not None): number = (city_code + number) elif (country_code is None and prev_phone is not None): ok1 = False if (len(prev_phone.number) >= (len(number) + 2)): ok1 = True elif (templ.tell() > 0 and prev_phone._m_template is not None and LanguageHelper.endsWith(prev_phone._m_template, Utils.toStringStringIO(templ))): ok1 = True if (ok1 and len(prev_phone.number) > len(number)): number = (prev_phone.number[0:0 + len(prev_phone.number) - len(number)] + number) if (ph.country_code is None and prev_phone is not None and prev_phone.country_code is not None): if (len(prev_phone.number) == len(number)): ph.country_code = prev_phone.country_code ok = False for d in number: if (d != '0'): ok = True break if (not ok): return None if (country_code is not None): if (len(number) < 7): return None else: s = PhoneHelper.getCountryPrefix(number) if (s is not None): num2 = number[len(s):] if (len(num2) >= 10 and len(num2) <= 11): number = num2 if (s != "7"): ph.country_code = s if (len(number) == 8 and prev_phone is None): return None if (len(number) > 11): if ((len(number) < 14) and ((country_code == "1" or country_code == "43"))): pass else: return None ph.number = number if (additional is not None): ph.addSlot(PhoneReferent.ATTR_ADDNUMBER, additional, True, 0) if (not is_phone_before and end.next0_ is not None and not end.is_newline_after): if (end.next0_.isCharOf("+=") or end.next0_.is_hiphen): return None if (country_code is not None and country_code == "7"): if (len(number) != 10): return None ph._m_template = Utils.toStringStringIO(templ) if (j == (len(pli) - 1) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and not pli[j].is_newline_before): end = pli[j].end_token if (pli[j].kind != PhoneKind.UNDEFINED): ph.kind = pli[j].kind res = ReferentToken(ph, pli[0].begin_token, end) if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].end_token.next0_.is_table_control_char): res.begin_token = pli[1].begin_token return res
def getWordform(self, word : str, cla : 'MorphClass', gender : 'MorphGender', cas : 'MorphCase', num : 'MorphNumber', add_info : 'MorphWordForm') -> str: tn = self.m_root find = False res = None max_coef = -10 i = 0 while i <= len(word): if (tn.lazy_pos > 0): self.__loadTreeNode(tn) if (tn.rules is not None): word_begin = "" word_end = "" if (i > 0): word_begin = word[0:0+i] else: word_end = word if (i < len(word)): word_end = word[i:] else: word_begin = word for r in tn.rules: if (word_end in r.variants): for li in r.variants_list: for v in li: if ((((cla.value) & (v.class0_.value))) != 0 and v.normal_tail is not None): if (cas.is_undefined): if (v.case_.is_nominative or v.case_.is_undefined): pass else: continue elif (((v.case_) & cas).is_undefined): continue sur = cla.is_proper_surname sur0 = v.class0_.is_proper_surname if (sur or sur0): if (sur != sur0): continue find = True if (gender != MorphGender.UNDEFINED): if ((((gender) & (v.gender))) == (MorphGender.UNDEFINED)): continue if (num != MorphNumber.UNDEFINED): if ((((num) & (v.number))) == (MorphNumber.UNDEFINED)): continue re = word_begin + v.tail co = 0 if (add_info is not None): co = v.calcEqCoef(add_info) if (res is None or co > max_coef): res = re max_coef = co if (max_coef == 0): if ((word_begin + v.normal_tail) == word): return re if (tn.nodes is None or i >= len(word)): break ch = ord(word[i]) wraptn22 = RefOutArgWrapper(None) inoutres23 = Utils.tryGetValue(tn.nodes, ch, wraptn22) tn = wraptn22.value if (not inoutres23): break i += 1 if (find): return res tn = self.m_root_reverce tn0 = None for i in range(len(word) - 1, -1, -1): if (tn.lazy_pos > 0): self.__loadTreeNode(tn) ch = ord(word[i]) if (tn.nodes is None): break wrapnext24 = RefOutArgWrapper(None) inoutres25 = Utils.tryGetValue(tn.nodes, ch, wrapnext24) next0_ = wrapnext24.value if (not inoutres25): break tn = next0_ if (tn.lazy_pos > 0): self.__loadTreeNode(tn) if (tn.reverce_variants is not None): tn0 = tn break else: i = -1 if (tn0 is None): return None for mv in tn0.reverce_variants: if ((((mv.class0_.value) & (cla.value))) != 0 and mv.rule is not None): if (len(mv.tail) > 0 and not LanguageHelper.endsWith(word, mv.tail)): continue word_begin = word[0:0+len(word) - len(mv.tail)] for liv in mv.rule.variants_list: for v in liv: if ((((v.class0_.value) & (cla.value))) != 0): sur = cla.is_proper_surname sur0 = v.class0_.is_proper_surname if (sur or sur0): if (sur != sur0): continue if (not cas.is_undefined): if (((cas) & v.case_).is_undefined and not v.case_.is_undefined): continue if (num != MorphNumber.UNDEFINED): if (v.number != MorphNumber.UNDEFINED): if ((((v.number) & (num))) == (MorphNumber.UNDEFINED)): continue if (gender != MorphGender.UNDEFINED): if (v.gender != MorphGender.UNDEFINED): if ((((v.gender) & (gender))) == (MorphGender.UNDEFINED)): continue res = (word_begin + v.tail) if (res == word): return word return res if (cla.is_proper_surname): if ((gender == MorphGender.FEMINIE and cla.is_proper_surname and not cas.is_undefined) and not cas.is_nominative): if (word.endswith("ВА") or word.endswith("НА")): if (cas.is_accusative): return word[0:0+len(word) - 1] + "У" return word[0:0+len(word) - 1] + "ОЙ" if (gender == MorphGender.FEMINIE): last = word[len(word) - 1] if (last == 'А' or last == 'Я' or last == 'О'): return word if (LanguageHelper.isCyrillicVowel(last)): return word[0:0+len(word) - 1] + "А" elif (last == 'Й'): return word[0:0+len(word) - 2] + "АЯ" else: return word + "А" return res
def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken': if (sli is None or len(sli) == 0): return None i = 0 while i < len(sli): if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): return StreetDefineHelper.__tryParseFix(sli) elif (sli[i].typ == StreetItemType.NOUN): if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): sli[i + 1].begin_token = sli[i].begin_token del sli[i] if (sli[i].termin.canonic_text == "МЕТРО"): if ((i + 1) < len(sli)): sli1 = list() ii = i + 1 while ii < len(sli): sli1.append(sli[ii]) ii += 1 str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True) if (str1 is not None): str1.begin_token = sli[i].begin_token str1.is_doubt = sli[i].is_abridge if (sli[i + 1].is_in_brackets): str1.is_doubt = False return str1 elif (i == 1 and sli[0].typ == StreetItemType.NAME): for_metro = True break if (i == 0 and len(sli) > 0): for_metro = True break return None if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0) stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): tt = sli[i].end_token.next0_ if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): tt = tt.next0_ nex = NumberHelper.tryParseNumberWithPostfix(tt) if (nex is not None): return None break i += 1 if (i >= len(sli)): return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro) name = None number = None age = None adj = None noun = sli[i] alt_noun = None is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК") before = 0 after = 0 j = 0 while j < i: if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): before += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].is_newline_after): return None if (sli[j].number.morph.class0_.is_adjective): before += 1 elif (is_micro_raion): before += 1 elif (sli[i].number_has_prefix): before += 1 else: before += 1 j += 1 j = (i + 1) while j < len(sli): if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): after += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): after += 1 elif (is_micro_raion): after += 1 elif (sli[j].number_has_prefix): after += 1 elif (ext_onto_regim): after += 1 elif (sli[j].typ == StreetItemType.NOUN): break else: after += 1 j += 1 rli = list() if (before > after): if (noun.termin.canonic_text == "МЕТРО"): return None tt = sli[0].begin_token if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): ok = False if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): ok = True elif (noun.end_token.next0_ is None): ok = True elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): ok = True if (not ok): if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): ok = True if (not ok): return None n0 = 0 n1 = (i - 1) elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): if (not sli[0].is_whitespace_after): return None number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value)) if (sli[0].is_number_km): number += "км" n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[0]) rli.append(sli[i]) elif (after > before): n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[i]) elif (after == 0): return None elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): n0 = 0 n1 = 0 num = False tt2 = sli[2].end_token.next0_ if (sli[2].is_number_km): num = True elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): sli[2].is_number_km = True num = True elif (sli[2].begin_token.previous.is_comma): pass elif (sli[2].begin_token != sli[2].end_token): num = True elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): num = True elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): num = True if (num): number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value)) if (sli[2].is_number_km): number += "км" rli.append(sli[2]) else: del sli[2:2+len(sli) - 2] else: return None sec_number = None j = n0 first_pass2732 = True while True: if first_pass2732: first_pass2732 = False else: j += 1 if (not (j <= n1)): break if (sli[j].typ == StreetItemType.NUMBER): if (age is not None or ((sli[j].is_newline_before and j > 0))): break if (number is not None): if (name is not None and name.typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue break if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): if (sli[j].whitespaces_before_count > 2 and j > 0): break if (sli[j].number is not None and sli[j].number.int_value > 20): if (j > n0): if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): pass else: break if (j == n0 and n0 > 0): pass elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): pass elif (sli[j].number_has_prefix): pass elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): pass else: break number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): number += "км" rli.append(sli[j]) elif (sli[j].typ == StreetItemType.AGE): if (number is not None or age is not None): break age = str(sli[j].number.int_value) rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDADJECTIVE): if (adj is not None): return None adj = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): if (name is not None): if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): break elif (i < j): break else: return None name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): if (name is not None): break name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NOUN): if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): alt_noun = noun noun = sli[j] rli.append(sli[j]) else: break if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value)) rli.append(sli[i + 1]) elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): alt_noun = noun noun = sli[j] rli.append(sli[j]) if (name is None): if (number is None and adj is None): return None if (noun.is_abridge): if (is_micro_raion): pass elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): pass elif (adj is None or adj.is_abridge): return None if (adj is not None and adj.is_abridge): return None if (not sli[i] in rli): rli.append(sli[i]) street = StreetReferent() if (not for_metro): street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0) if (noun.alt_termin is not None): if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): pass else: street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0) else: street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0) res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street) for r in rli: if (res.begin_char > r.begin_char): res.begin_token = r.begin_token if (res.end_char < r.end_char): res.end_token = r.end_token if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): rli.remove(noun) if (noun.is_abridge and (noun.length_char < 4)): res.is_doubt = True elif (noun.noun_is_doubt_coef > 0): res.is_doubt = True if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0) if (npt2 is not None and npt2.end_char > name.end_char): pass elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): res.is_doubt = False elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): res.is_doubt = False name_base = io.StringIO() name_alt = io.StringIO() name_alt2 = None gen = noun.termin.gender adj_gen = MorphGender.UNDEFINED if (number is not None): street.number = number if (sec_number is not None): street.sec_number = sec_number if (age is not None): if (street.number is None): street.number = age else: street.sec_number = age if (name is not None and name.value is not None): if (street.kind == StreetKind.ROAD): for r in rli: if (r.typ == StreetItemType.NAME and r != name): print(r.value, end="", file=name_alt) break if (name.alt_value is not None and name_alt.tell() == 0): print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True) print(" {0}".format(name.value), end="", file=name_base, flush=True) elif (name is not None): is_adj = False if (isinstance(name.end_token, TextToken)): for wf in name.end_token.morph.items: if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo) adj_gen = wf.gender break elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): is_adj = True if (is_adj): tmp = io.StringIO() vars0_ = list() t = name.begin_token while t is not None: tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break if (tmp.tell() > 0): print(' ', end="", file=tmp) if (t == name.end_token): is_padez = False if (not noun.is_abridge): if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): is_padez = True elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): is_padez = True if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): is_padez = True if (not is_padez): print(tt.term, end="", file=tmp) break for wf in tt.morph.items: if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): continue if (not wff.normal_case in vars0_): vars0_.append(wff.normal_case) if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): vars0_.append(tt.term) if (len(vars0_) == 0): vars0_.append(tt.term) break if (not tt.is_hiphen): print(tt.term, end="", file=tmp) t = t.next0_ if (len(vars0_) == 0): print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True) else: head = Utils.toStringStringIO(name_base) print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True) if (len(vars0_) > 1): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True) if (len(vars0_) > 2): name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2]) else: str_nam = None nits = list() has_adj = False has_proper_name = False t = name.begin_token while t is not None: if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): has_adj = True if ((isinstance(t, TextToken)) and not t.is_hiphen): if (name.termin is not None): nits.append(name.termin.canonic_text) break elif (not t.chars.is_letter and len(nits) > 0): nits[len(nits) - 1] += (t).term else: nits.append((t).term) if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): has_proper_name = True elif ((isinstance(t, ReferentToken)) and name.termin is None): nits.append(t.getSourceText().upper()) if (t == name.end_token): break t = t.next0_ if (not has_adj and not has_proper_name): nits.sort() str_nam = Utils.joinStrings(" ", list(nits)) if (has_proper_name and len(nits) == 2): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True) print(" {0}".format(str_nam), end="", file=name_base, flush=True) adj_str = None adj_can_be_initial = False if (adj is not None): if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): adj_gen = name.morph.gender if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL)) elif (adj_gen != MorphGender.UNDEFINED): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen)) elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender)) else: s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen)) adj_str = s if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): adj_can_be_initial = True s1 = Utils.toStringStringIO(name_base).strip() s2 = Utils.toStringStringIO(name_alt).strip() if (len(s1) < 3): if (street.number is not None): if (adj_str is not None): if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_str is None): if (len(s1) < 1): return None if (is_micro_raion): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) if (not Utils.isNullOrEmpty(s2)): street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0) else: return None else: if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_can_be_initial): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0) street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) elif (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt.tell() > 0): s1 = Utils.toStringStringIO(name_alt).strip() if (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt2 is not None): if (adj_str is None): if (for_metro and noun is not None): street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0) if (name is not None and name.alt_value2 is not None): street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0) if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): for n in name.exist_street.names: street.addSlot(StreetReferent.ATTR_NAME, n, False, 0) if (alt_noun is not None and not for_metro): street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0) if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): res.is_doubt = True if (name is not None and name.is_in_dictionary): res.is_doubt = False elif (alt_noun is not None or for_metro): res.is_doubt = False elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): res.is_doubt = False if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): for s in street.slots: if (s.type_name == StreetReferent.ATTR_TYP): street.uploadSlot(s, "микрорайон") elif (s.type_name == StreetReferent.ATTR_NAME): street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value)) if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0) t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma): t1 = t1.next0_ non = StreetItemToken.tryParse(t1, None, False, None, False) if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): street._correct() nams = street.names for t in street.typs: for n in nams: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0) street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0) res.end_token = non.end_token if (res.is_doubt): if (noun.is_road): if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): res.is_doubt = False elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): res.is_doubt = False elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): res.is_doubt = False elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): res.is_doubt = False tt0 = res.begin_token.previous first_pass2733 = True while True: if first_pass2733: first_pass2733 = False else: tt0 = tt0.previous if (not (tt0 is not None)): break if (tt0.isCharOf(",,") or tt0.is_comma_and): continue str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent) if (str0 is not None): res.is_doubt = False break if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None) if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0) res.end_token = ait.end_token return res
def tryParse(t: 'Token', typ: 'BracketParseAttr' = BracketParseAttr.NO, max_tokens: int = 100) -> 'BracketSequenceToken': """ Попробовать восстановить последовательность, обрамляемой кавычками Args: t(Token): typ(BracketParseAttr): параметры выделения max_tokens(int): максимально токенов (вдруг забыли закрывающую ккавычку) """ t0 = t cou = 0 if (not BracketHelper.canBeStartOfSequence(t0, False, False)): return None br_list = list() br_list.append(BracketHelper.Bracket(t0)) cou = 0 crlf = 0 last = None lev = 1 is_assim = br_list[ 0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find( br_list[0].char0_) >= 0 t = t0.next0_ first_pass2802 = True while True: if first_pass2802: first_pass2802 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break last = t if (t.isCharOf(BracketHelper.M_OPEN_CHARS) or t.isCharOf(BracketHelper.M_CLOSE_CHARS)): if (t.is_newline_before and (((typ) & (BracketParseAttr.CANBEMANYLINES))) == (BracketParseAttr.NO)): if (t.whitespaces_before_count > 10 or BracketHelper.canBeStartOfSequence( t, False, False)): if (t.isChar('(') and not t0.isChar('(')): pass else: last = t.previous break bb = BracketHelper.Bracket(t) br_list.append(bb) if (len(br_list) > 20): break if ((len(br_list) == 3 and br_list[1].can_be_open and bb.can_be_close) and BracketHelper.__mustBeCloseChar( bb.char0_, br_list[1].char0_) and BracketHelper.__mustBeCloseChar( bb.char0_, br_list[0].char0_)): ok = False tt = t.next0_ while tt is not None: if (tt.is_newline_before): break if (tt.isChar(',')): break if (tt.isChar('.')): tt = tt.next0_ while tt is not None: if (tt.is_newline_before): break elif (tt.isCharOf(BracketHelper.M_OPEN_CHARS) or tt.isCharOf( BracketHelper.M_CLOSE_CHARS)): bb2 = BracketHelper.Bracket(tt) if (BracketHelper.canBeEndOfSequence( tt, False, None, False) and BracketHelper.__canBeCloseChar( bb2.char0_, br_list[0].char0_)): ok = True break tt = tt.next0_ break if (t.isCharOf(BracketHelper.M_OPEN_CHARS) or t.isCharOf(BracketHelper.M_CLOSE_CHARS)): ok = True break tt = tt.next0_ if (not ok): break if (is_assim): if (bb.can_be_open and not bb.can_be_close and bb.char0_ == br_list[0].char0_): lev += 1 elif (bb.can_be_close and not bb.can_be_open and BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_) == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)): lev -= 1 if (lev == 0): break else: cou += 1 if ((cou) > max_tokens): break if ((((typ) & (BracketParseAttr.CANCONTAINSVERBS))) == ( BracketParseAttr.NO)): if (t.morph.language.is_cyrillic): if (t.getMorphClassInDictionary() == MorphClass.VERB): if (not t.morph.class0_.is_adjective and not t.morph.containsAttr( "страд.з.", None)): if (t.chars.is_all_lower): norm = t.getNormalCaseText( None, False, MorphGender.UNDEFINED, False) if (not LanguageHelper.endsWith( norm, "СЯ")): if (len(br_list) > 1): break if (br_list[0].char0_ != '('): break elif (t.morph.language.is_en): if (t.morph.class0_ == MorphClass.VERB and t.chars.is_all_lower): break r = t.getReferent() if (r is not None and r.type_name == "ADDRESS"): if (not t0.isChar('(')): break if ((((typ) & (BracketParseAttr.CANBEMANYLINES))) != (BracketParseAttr.NO)): if (t.is_newline_before): if (t.newlines_before_count > 1): break crlf += 1 continue if (t.is_newline_before): if (t.whitespaces_before_count > 15): break crlf += 1 if (not t.chars.is_all_lower): if (t.previous is not None and t.previous.isChar('.')): break if ((isinstance(t.previous, MetaToken)) and BracketHelper.canBeEndOfSequence( (t.previous).end_token, False, None, False)): break if (crlf > 1): if (len(br_list) > 1): break if (crlf > 10): break if (t.isChar(';') and t.is_newline_after): break if ((len(br_list) == 1 and br_list[0].can_be_open and (isinstance(last, MetaToken))) and last.is_newline_after): if (BracketHelper.canBeEndOfSequence((last).end_token, False, None, False)): return BracketSequenceToken(t0, last) if (len(br_list) < 1): return None i = 1 while i < (len(br_list) - 1): if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'): br_list[i].can_be_open = True br_list[i + 1].can_be_close = True i += 1 internals = None while len(br_list) > 3: i = len(br_list) - 1 if ((br_list[i].can_be_close and br_list[i - 1].can_be_open and not BracketHelper.__canBeCloseChar( br_list[i].char0_, br_list[0].char0_)) and BracketHelper.__canBeCloseChar(br_list[i].char0_, br_list[i - 1].char0_)): del br_list[len(br_list) - 2:len(br_list) - 2 + 2] continue break while len(br_list) >= 4: changed = False i = 1 while i < (len(br_list) - 2): if ((br_list[i].can_be_open and not br_list[i].can_be_close and br_list[i + 1].can_be_close) and not br_list[i + 1].can_be_open): ok = False if (BracketHelper.__mustBeCloseChar( br_list[i + 1].char0_, br_list[i].char0_) or br_list[i].char0_ != br_list[0].char0_): ok = True if ((i == 1 and ((i + 2) < len(br_list)) and br_list[i + 2].char0_ == ')') and br_list[i + 1].char0_ != ')' and BracketHelper.__canBeCloseChar( br_list[i + 1].char0_, br_list[i - 1].char0_)): br_list[i + 2] = br_list[i + 1] elif (i > 1 and ((i + 2) < len(br_list)) and BracketHelper.__mustBeCloseChar( br_list[i + 2].char0_, br_list[i - 1].char0_)): ok = True if (ok): if (internals is None): internals = list() internals.append( BracketSequenceToken(br_list[i].source, br_list[i + 1].source)) del br_list[i:i + 2] changed = True break i += 1 if (not changed): break res = None if ((len(br_list) >= 4 and br_list[1].can_be_open and br_list[2].can_be_close) and br_list[3].can_be_close and not br_list[3].can_be_open): if (BracketHelper.__canBeCloseChar(br_list[3].char0_, br_list[0].char0_)): res = BracketSequenceToken(br_list[0].source, br_list[3].source) if (br_list[0].source.next0_ != br_list[1].source or br_list[2].source.next0_ != br_list[3].source): res.internal.append( BracketSequenceToken(br_list[1].source, br_list[2].source)) if (internals is not None): res.internal.extend(internals) if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close) and not br_list[2].can_be_open): if ((((typ) & (BracketParseAttr.NEARCLOSEBRACKET))) != (BracketParseAttr.NO)): if (BracketHelper.__canBeCloseChar(br_list[1].char0_, br_list[0].char0_)): return BracketSequenceToken(br_list[0].source, br_list[1].source) ok = True if (BracketHelper.__canBeCloseChar(br_list[2].char0_, br_list[0].char0_) and BracketHelper.__canBeCloseChar(br_list[1].char0_, br_list[0].char0_) and br_list[1].can_be_close): t = br_list[1].source while t != br_list[2].source and t is not None: if (t.is_newline_before): ok = False break if (t.chars.is_letter and t.chars.is_all_lower): ok = False break npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None): t = npt.end_token t = t.next0_ if (ok): t = br_list[0].source.next0_ while t != br_list[1].source and t is not None: if (t.is_newline_before): return BracketSequenceToken( br_list[0].source, t.previous) t = t.next0_ lev1 = 0 tt = br_list[0].source.previous first_pass2803 = True while True: if first_pass2803: first_pass2803 = False else: tt = tt.previous if (not (tt is not None)): break if (tt.is_newline_after or tt.is_table_control_char): break if (not ((isinstance(tt, TextToken)))): continue if (tt.chars.is_letter or tt.length_char > 1): continue ch = (tt).term[0] if (BracketHelper.__canBeCloseChar(ch, br_list[0].char0_)): lev1 += 1 elif (BracketHelper.__canBeCloseChar( br_list[1].char0_, ch)): lev1 -= 1 if (lev1 < 0): return BracketSequenceToken( br_list[0].source, br_list[1].source) if (ok and BracketHelper.__canBeCloseChar(br_list[2].char0_, br_list[0].char0_)): intern = BracketSequenceToken(br_list[1].source, br_list[2].source) res = BracketSequenceToken(br_list[0].source, br_list[2].source) res.internal.append(intern) elif (ok and BracketHelper.__canBeCloseChar( br_list[2].char0_, br_list[1].char0_) and br_list[0].can_be_open): if (BracketHelper.__canBeCloseChar(br_list[2].char0_, br_list[0].char0_)): intern = BracketSequenceToken(br_list[1].source, br_list[2].source) res = BracketSequenceToken(br_list[0].source, br_list[2].source) res.internal.append(intern) elif (len(br_list) == 3): return None if (res is None and len(br_list) > 1 and br_list[1].can_be_close): res = BracketSequenceToken(br_list[0].source, br_list[1].source) if (res is None and len(br_list) > 1 and BracketHelper.__canBeCloseChar( br_list[1].char0_, br_list[0].char0_)): res = BracketSequenceToken(br_list[0].source, br_list[1].source) if (res is None and len(br_list) == 2 and br_list[0].char0_ == br_list[1].char0_): res = BracketSequenceToken(br_list[0].source, br_list[1].source) if (res is not None and internals is not None): for i in internals: if (i.begin_char < res.end_char): res.internal.append(i) if (res is None): cou = 0 tt = t0.next0_ first_pass2804 = True while True: if first_pass2804: first_pass2804 = False else: tt = tt.next0_ cou += 1 if (not (tt is not None)): break if (tt.is_table_control_char): break if (MiscHelper.canBeStartOfSentence(tt)): break if (max_tokens > 0 and cou > max_tokens): break mt = Utils.asObjectOrNull(tt, MetaToken) if (mt is None): continue if (isinstance(mt.end_token, TextToken)): if ((mt.end_token).isCharOf(BracketHelper.M_CLOSE_CHARS)): bb = BracketHelper.Bracket( Utils.asObjectOrNull(mt.end_token, TextToken)) if (bb.can_be_close and BracketHelper.__canBeCloseChar( bb.char0_, br_list[0].char0_)): return BracketSequenceToken(t0, tt) return res
def __compareForms(self, x: 'MorphWordForm', y: 'MorphWordForm') -> int: vx = Utils.ifNotNull(x.normal_full, x.normal_case) vy = Utils.ifNotNull(y.normal_full, y.normal_case) if (vx == vy): return 0 if (Utils.isNullOrEmpty(vx)): return 1 if (Utils.isNullOrEmpty(vy)): return -1 lastx = vx[len(vx) - 1] lasty = vy[len(vy) - 1] if (x.class0_.is_proper_surname and not self.char_info.is_all_lower): if (LanguageHelper.endsWithEx(vx, "ОВ", "ЕВ", "ИН", None)): if (not y.class0_.is_proper_surname): return -1 if (y.class0_.is_proper_surname and not self.char_info.is_all_lower): if (LanguageHelper.endsWithEx(vy, "ОВ", "ЕВ", "ИН", None)): if (not x.class0_.is_proper_surname): return 1 if (len(vx) > len(vy)): return -1 if (len(vx) < len(vy)): return 1 return 0 if (x.class0_ == y.class0_): if (x.class0_.is_adjective): if (lastx == 'Й' and lasty != 'Й'): return -1 if (lastx != 'Й' and lasty == 'Й'): return 1 if (not LanguageHelper.endsWith(vx, "ОЙ") and LanguageHelper.endsWith(vy, "ОЙ")): return -1 if (LanguageHelper.endsWith(vx, "ОЙ") and not LanguageHelper.endsWith(vy, "ОЙ")): return 1 if (x.class0_.is_noun): if (x.number == MorphNumber.SINGULAR and y.number == MorphNumber.PLURAL and len(vx) <= (len(vy) + 1)): return -1 if (x.number == MorphNumber.PLURAL and y.number == MorphNumber.SINGULAR and len(vx) >= (len(vy) - 1)): return 1 if (len(vx) < len(vy)): return -1 if (len(vx) > len(vy)): return 1 return 0 if (x.class0_.is_adverb): return 1 if (x.class0_.is_noun and x.is_in_dictionary): if (y.class0_.is_adjective and y.is_in_dictionary): if (not "к.ф." in y.misc.attrs): return 1 return -1 if (x.class0_.is_adjective): if (not x.is_in_dictionary and y.class0_.is_noun and y.is_in_dictionary): return 1 return -1 if (x.class0_.is_verb): if (y.class0_.is_noun or y.class0_.is_adjective or y.class0_.is_preposition): return 1 return -1 if (y.class0_.is_adverb): return -1 if (y.class0_.is_noun and y.is_in_dictionary): return 1 if (y.class0_.is_adjective): if (((x.class0_.is_noun or x.class0_.is_proper_secname)) and x.is_in_dictionary): return -1 if (x.class0_.is_noun and not y.is_in_dictionary): if (len(vx) < len(vy)): return -1 return 1 if (y.class0_.is_verb): if (x.class0_.is_noun or x.class0_.is_adjective or x.class0_.is_preposition): return -1 if (x.class0_.is_proper): return -1 return 1 if (len(vx) < len(vy)): return -1 if (len(vx) > len(vy)): return 1 return 0
def canBeEquals(self, obj : 'Referent', typ : 'EqualType') -> bool: geo_ = Utils.asObjectOrNull(obj, GeoReferent) if (geo_ is None): return False if (geo_.alpha2 is not None and geo_.alpha2 == self.alpha2): return True if (self.is_city != geo_.is_city): return False if (self.is_union != geo_.is_union): return False if (self.is_union): for s in self.slots: if (s.type_name == GeoReferent.ATTR_REF): if (obj.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): return False for s in obj.slots: if (s.type_name == GeoReferent.ATTR_REF): if (self.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): return False return True ref1 = Utils.asObjectOrNull(self.getSlotValue(GeoReferent.ATTR_REF), Referent) ref2 = Utils.asObjectOrNull(geo_.getSlotValue(GeoReferent.ATTR_REF), Referent) if (ref1 is not None and ref2 is not None): if (ref1 != ref2): return False r = self.is_region or self.is_state r1 = geo_.is_region or geo_.is_state if (r != r1): if (self.is_territory != geo_.is_territory): return False return False eq_names = False for s in self.slots: if (s.type_name == GeoReferent.ATTR_NAME): if (geo_.findSlot(s.type_name, s.value, True) is not None): eq_names = True break if (not eq_names): return False if (self.is_region and geo_.is_region): typs1 = self.typs typs2 = geo_.typs ok = False for t in typs1: if (t in typs2): ok = True else: for tt in typs2: if (LanguageHelper.endsWith(tt, t) or LanguageHelper.endsWith(t, tt)): ok = True if (not ok): return False if (self.higher is not None and geo_.higher is not None): if (GeoReferent.__checkRoundDep(self) or GeoReferent.__checkRoundDep(geo_)): return False if (self.higher.canBeEquals(geo_.higher, typ)): pass elif (geo_.higher.higher is not None and self.higher.canBeEquals(geo_.higher.higher, typ)): pass elif (self.higher.higher is not None and self.higher.higher.canBeEquals(geo_.higher, typ)): pass else: return False return True
def tryParse(t: 'Token', items: typing.List['NounPhraseItem'], attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem': if (t is None): return None t0 = t _can_be_surname = False _is_doubt_adj = False rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token == rt.end_token): res = NounPhraseItem.tryParse(rt.begin_token, items, attrs) if (res is not None): res.begin_token = res.end_token = t return res if (rt is not None and items is not None and len(items) > 0): res = NounPhraseItem(t, t) for m in t.morph.items: v = NounPhraseItemTextVar(m, None) v.normal_value = str(t.getReferent()) res.noun_morph.append(v) res.can_be_noun = True return res if (isinstance(t, NumberToken)): pass has_legal_verb = False if (isinstance(t, TextToken)): if (not t.chars.is_letter): return None str0_ = (t).term if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'): for wf in t.morph.items: if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): if (wf.class0_.is_verb): mc = t.getMorphClassInDictionary() if (not mc.is_noun and (((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.NO)): if (not LanguageHelper.endsWithEx( str0_, "ОГО", "ЕГО", None, None)): return None has_legal_verb = True if (wf.class0_.is_adverb): if (t.next0_ is None or not t.next0_.is_hiphen): if ((str0_ == "ВСЕГО" or str0_ == "ДОМА" or str0_ == "НЕСКОЛЬКО") or str0_ == "МНОГО" or str0_ == "ПОРЯДКА"): pass else: return None if (wf.class0_.is_adjective): if (wf.containsAttr("к.ф.", None)): if (t.getMorphClassInDictionary() == MorphClass.ADJECTIVE): pass else: _is_doubt_adj = True mc0 = t.morph.class0_ if (mc0.is_proper_surname and not t.chars.is_all_lower): for wf in t.morph.items: if (wf.class0_.is_proper_surname and wf.number != MorphNumber.PLURAL): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue s = Utils.ifNotNull((Utils.ifNotNull( wff.normal_full, wff.normal_case)), "") if (LanguageHelper.endsWithEx(s, "ИН", "ЕН", "ЫН", None)): if (not wff.is_in_dictionary): _can_be_surname = True else: return None if (wff.is_in_dictionary and LanguageHelper.endsWith(s, "ОВ")): _can_be_surname = True if (mc0.is_proper_name and not t.chars.is_all_lower): for wff in t.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (wf.normal_case == "ГОР"): continue if (wf.class0_.is_proper_name and wf.is_in_dictionary): if (wf.normal_case is None or not wf.normal_case.startswith("ЛЮБ")): if (mc0.is_adjective and t.morph.containsAttr("неизм.", None)): pass elif ( (((attrs) & (NounPhraseParseAttr.REFERENTCANBENOUN)) ) == (NounPhraseParseAttr.REFERENTCANBENOUN)): pass else: if (items is None or (len(items) < 1)): return None if (not items[0].is_std_adjective): return None if (mc0.is_adjective and t.morph.items_count == 1): if (t.morph.getIndexerItem(0).containsAttr("в.ср.ст.", None)): return None mc1 = t.getMorphClassInDictionary() if (mc1 == MorphClass.VERB): return None if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.IGNOREPARTICIPLES) and t.morph.class0_.is_verb and not t.morph.class0_.is_noun) and not t.morph.class0_.is_proper): for wf in t.morph.items: if (wf.class0_.is_verb): if (wf.containsAttr("дейст.з.", None)): if (LanguageHelper.endsWith((t).term, "СЯ")): pass else: return None t1 = None for k in range(2): t = (Utils.ifNotNull(t1, t0)) if (k == 0): if ((((isinstance(t0, TextToken))) and t0.next0_ is not None and t0.next0_.is_hiphen) and t0.next0_.next0_ is not None): if (not t0.is_whitespace_after and not t0.morph.class0_.is_pronoun): if (not t0.next0_.is_whitespace_after): t = t0.next0_.next0_ elif (t0.next0_.next0_.chars.is_all_lower and LanguageHelper.endsWith((t0).term, "О")): t = t0.next0_.next0_ it = NounPhraseItem._new470(t0, t, _can_be_surname) if (t0 == t and (isinstance(t0, ReferentToken))): it.can_be_noun = True it.morph = MorphCollection(t0.morph) can_be_prepos = False for v in t.morph.items: wf = Utils.asObjectOrNull(v, MorphWordForm) if (v.class0_.is_preposition): can_be_prepos = True if (v.class0_.is_adjective or ((v.class0_.is_pronoun and not v.class0_.is_personal_pronoun)) or ((v.class0_.is_noun and (isinstance(t, NumberToken))))): if (NounPhraseItem.tryAccordVariant( items, (0 if items is None else len(items)), v)): is_doub = False if (v.containsAttr("к.ф.", None)): continue if (v.containsAttr("собир.", None) and not ((isinstance(t, NumberToken)))): if (wf is not None and wf.is_in_dictionary): return None continue if (v.containsAttr("сравн.", None)): continue ok = True if (isinstance(t, TextToken)): s = (t).term if (s == "ПРАВО" or s == "ПРАВА"): ok = False elif (LanguageHelper.endsWith(s, "ОВ") and t.getMorphClassInDictionary().is_noun): ok = False elif (wf is not None and ((wf.normal_case == "САМ" or wf.normal_case == "ТО"))): ok = False elif (isinstance(t, NumberToken)): if (v.class0_.is_noun and t.morph.class0_.is_adjective): ok = False elif (t.morph.class0_.is_noun and (( (attrs) & (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))) == (NounPhraseParseAttr.NO)): ok = False if (ok): it.adj_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_adj = True if (_is_doubt_adj and t0 == t): it.is_doubt_adjective = True if (has_legal_verb and wf is not None and wf.is_in_dictionary): it.can_be_noun = True can_be_noun_ = False if (isinstance(t, NumberToken)): pass elif (v.class0_.is_noun or ((wf is not None and wf.normal_case == "САМ"))): can_be_noun_ = True elif (v.class0_.is_personal_pronoun): if (items is None or len(items) == 0): can_be_noun_ = True else: for it1 in items: if (it1.is_verb): return None if (len(items) == 1): if (items[0].can_be_adj_for_personal_pronoun): can_be_noun_ = True elif ((v.class0_.is_pronoun and ((items is None or len(items) == 0 or ((len(items) == 1 and items[0].can_be_adj_for_personal_pronoun)))) and wf is not None) and ((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО" or wf.normal_case == "ТО") or wf.normal_case == "ЭТО" or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО" or wf.normal_case == "КТО"))): if (wf.normal_case == "ВСЕ"): if (t.next0_ is not None and t.next0_.isValue("РАВНО", None)): return None can_be_noun_ = True elif (wf is not None and ((Utils.ifNotNull( wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"): return None elif (v.class0_.is_proper and (isinstance(t, TextToken))): if (t.length_char > 4 or v.class0_.is_proper_name): can_be_noun_ = True if (can_be_noun_): if (NounPhraseItem.tryAccordVariant( items, (0 if items is None else len(items)), v)): it.noun_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_noun = True if (t0 != t): for v in it.adj_morph: v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), False) for v in it.noun_morph: v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), True) if (k == 1 and it.can_be_noun and not it.can_be_adj): if (t1 is not None): it.end_token = t1 else: it.end_token = t0.next0_.next0_ for v in it.noun_morph: if (v.normal_value is not None and (v.normal_value.find('-') < 0)): v.normal_value = "{0}-{1}".format( v.normal_value, it.end_token.getNormalCaseText( None, False, MorphGender.UNDEFINED, False)) if (it.can_be_adj): if (NounPhraseItem.__m_std_adjectives.tryParse( it.begin_token, TerminParseAttr.NO) is not None): it.is_std_adjective = True if (can_be_prepos and it.can_be_noun): if (items is not None and len(items) > 0): npt1 = NounPhraseHelper.tryParse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0) if (npt1 is not None and npt1.end_char > t.end_char): return None else: npt1 = NounPhraseHelper.tryParse( t.next0_, Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0) if (npt1 is not None): mc = LanguageHelper.getCaseAfterPreposition((t).lemma) if (not ((mc) & npt1.morph.case_).is_undefined): return None if (it.can_be_noun or it.can_be_adj or k == 1): if (it.begin_token.morph.class0_.is_pronoun): tt2 = it.end_token.next0_ if ((tt2 is not None and tt2.is_hiphen and not tt2.is_whitespace_after) and not tt2.is_whitespace_before): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): ss = (tt2).term if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ") or ss == "Ж"): it.end_token = tt2 elif (ss == "НИБУДЬ" or ss == "ЛИБО" or (((ss == "ТО" and tt2.previous.is_hiphen)) and it.can_be_adj)): it.end_token = tt2 for m in it.adj_morph: m.normal_value = "{0}-{1}".format( m.normal_value, ss) if (m.single_number_value is not None): m.single_number_value = "{0}-{1}".format( m.single_number_value, ss) return it if (t0 == t): if (t0.isValue("БИЗНЕС", None) and t0.next0_ is not None and t0.next0_.chars == t0.chars): t1 = t0.next0_ continue return it return None
def __try1(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', ad: 'AnalyzerDataWithOntology') -> 'ReferentToken': oi.value = (None) if (li is None or (len(li) < 1)): return None elif (li[0].typ != CityItemToken.ItemType.CITY): if (len(li) != 2 or li[0].typ != CityItemToken.ItemType.PROPERNAME or li[1].typ != CityItemToken.ItemType.NOUN): return None i = 1 oi.value = li[0].onto_item ok = not li[0].doubtful if ((ok and li[0].onto_item is not None and li[0].onto_item.misc_attr is None) and ad is not None): if (li[0].onto_item.owner != ad.local_ontology and not li[0].onto_item.owner.is_ext_ontology): if (li[0].begin_token.previous is not None and li[0].begin_token.previous.isValue("В", None)): pass else: ok = False if (len(li) == 1 and li[0].begin_token.morph.class0_.is_adjective): sits = StreetItemToken.tryParseList(li[0].begin_token, None, 3) if (sits is not None and len(sits) == 2 and sits[1].typ == StreetItemType.NOUN): return None typ = None alttyp = None mc = li[0].morph if (i < len(li)): if (li[i].typ == CityItemToken.ItemType.NOUN): at = None if (not li[i].chars.is_all_lower and (li[i].whitespaces_after_count < 2)): sit = StreetItemToken.tryParse(li[i].end_token.next0_, None, False, None, False) if (sit is not None and sit.typ == StreetItemType.NOUN): at = AddressItemToken.tryParse(li[i].begin_token, None, False, False, None) if (at is not None): at2 = AddressItemToken.tryParse( li[i].end_token.next0_, None, False, False, None) if (at2 is not None and at2.typ == AddressItemToken.ItemType.STREET): at = (None) if (at is None): typ = li[i].value alttyp = li[i].alt_value if (li[i].begin_token.isValue("СТ", None) and li[i].begin_token.chars.is_all_upper): return None if ((i + 1) == len(li)): ok = True if (not li[i].morph.case_.is_undefined): mc = li[i].morph i += 1 elif (ok): i += 1 else: tt0 = li[0].begin_token.previous if ((isinstance(tt0, TextToken)) and (tt0.whitespaces_after_count < 3)): if (tt0.isValue("МЭР", "МЕР") or tt0.isValue("ГЛАВА", None) or tt0.isValue("ГРАДОНАЧАЛЬНИК", None)): ok = True i += 1 if (not ok and oi.value is not None and (len(oi.value.canonic_text) < 4)): return None if (not ok and li[0].begin_token.morph.class0_.is_proper_name): return None if (not ok): if (not MiscHelper.isExistsInDictionary( li[0].begin_token, li[0].end_token, (MorphClass.ADJECTIVE) | MorphClass.NOUN | MorphClass.PRONOUN)): ok = (li[0].geo_object_before or li[i - 1].geo_object_after) if (ok and li[0].begin_token == li[0].end_token): mcc = li[0].begin_token.getMorphClassInDictionary() if (mcc.is_proper_name or mcc.is_proper_surname): ok = False elif (li[0].geo_object_before and (li[0].whitespaces_after_count < 2)): ad1 = AddressItemToken.tryParse( li[0].begin_token, None, False, False, None) if (ad1 is not None and ad1.typ == AddressItemToken.ItemType.STREET): ad2 = AddressItemToken.tryParse( li[0].end_token.next0_, None, False, False, None) if (ad2 is None or ad2.typ != AddressItemToken.ItemType.STREET): ok = False elif (AddressItemToken.tryAttachOrg(li[0].begin_token) is not None): ok = False if (ok): if (li[0].kit.processReferent("PERSON", li[0].begin_token) is not None): ok = False if (not ok): ok = CityAttachHelper.checkYearAfter(li[0].end_token.next0_) if (not ok and ((not li[0].begin_token.morph.class0_.is_adjective or li[0].begin_token != li[0].end_token))): ok = CityAttachHelper.checkCityAfter(li[0].end_token.next0_) if (not ok): return None if (i < len(li)): del li[i:i + len(li) - i] rt = None if (oi.value is None): if (li[0].value is not None and li[0].higher_geo is not None): cap = GeoReferent() cap._addName(li[0].value) cap._addTypCity(li[0].kit.base_language) cap.higher = li[0].higher_geo if (typ is not None): cap._addTyp(typ) if (alttyp is not None): cap._addTyp(alttyp) rt = ReferentToken(cap, li[0].begin_token, li[0].end_token) else: if (li[0].value is None): return None if (typ is None): if ((len(li) == 1 and li[0].begin_token.previous is not None and li[0].begin_token.previous.is_hiphen) and (isinstance(li[0].begin_token.previous.previous, ReferentToken)) and (isinstance( li[0].begin_token.previous.previous.getReferent(), GeoReferent))): pass else: return None else: if (not LanguageHelper.endsWithEx(typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК")): if (not LanguageHelper.endsWith(typ, "CITY")): if (typ == "СТАНЦИЯ" and ((MiscLocationHelper.checkGeoObjectBefore( li[0].begin_token)))): pass elif (len(li) > 1 and li[1].typ == CityItemToken.ItemType.NOUN and li[0].typ == CityItemToken.ItemType.CITY): pass else: return None if (li[0].begin_token.morph.class0_.is_adjective): li[0].value = ProperNameHelper.getNameEx( li[0].begin_token, li[0].end_token, MorphClass.ADJECTIVE, li[1].morph.case_, li[1].morph.gender, False, False) elif (isinstance(oi.value.referent, GeoReferent)): rt = ReferentToken._new719( Utils.asObjectOrNull(oi.value.referent, GeoReferent), li[0].begin_token, li[len(li) - 1].end_token, mc) elif (typ is None): typ = oi.value.typ if (rt is None): city = GeoReferent() city._addName( (li[0].value if oi.value is None else oi.value.canonic_text)) if (typ is not None): city._addTyp(typ) else: city._addTypCity(li[0].kit.base_language) if (alttyp is not None): city._addTyp(alttyp) rt = ReferentToken._new719(city, li[0].begin_token, li[len(li) - 1].end_token, mc) if ((isinstance(rt.referent, GeoReferent)) and len(li) == 1 and (rt.referent).is_city): if (rt.begin_token.previous is not None and rt.begin_token.previous.isValue("Г", None)): rt.begin_token = rt.begin_token.previous elif ((rt.begin_token.previous is not None and rt.begin_token.previous.isChar('.') and rt.begin_token.previous.previous is not None) and rt.begin_token.previous.previous.isValue("Г", None)): rt.begin_token = rt.begin_token.previous.previous elif (rt.end_token.next0_ is not None and (rt.whitespaces_after_count < 2) and rt.end_token.next0_.isValue("Г", None)): rt.end_token = rt.end_token.next0_ if (rt.end_token.next0_ is not None and rt.end_token.next0_.isChar('.')): rt.end_token = rt.end_token.next0_ return rt
def tryAttachAlternate( t0: 'Token', ph0: 'PhoneReferent', pli: typing.List['PhoneItemToken']) -> 'PhoneItemToken': if (t0 is None): return None if (t0.isCharOf("\\/") and (isinstance(t0.next0_, NumberToken)) and (t0.next0_.end_char - t0.next0_.begin_char) <= 1): pli1 = PhoneItemToken.tryAttachAll(t0.next0_) if (pli1 is not None and len(pli1) > 1): if (pli1[len(pli1) - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): del pli1[len(pli1) - 1] if (len(pli1) <= len(pli)): num = "" ii = 0 while ii < len(pli1): p1 = pli1[ii] p0 = pli[(len(pli) - len(pli1)) + ii] if (p1.item_type != p0.item_type): break if (p1.item_type != PhoneItemToken.PhoneItemType.NUMBER and p1.item_type != PhoneItemToken.PhoneItemType.DELIM): break if (p1.item_type == PhoneItemToken.PhoneItemType.NUMBER ): if (p1.length_char != p0.length_char): break num += p1.value ii += 1 if (ii >= len(pli1)): return PhoneItemToken._new2466( t0, pli1[len(pli1) - 1].end_token, PhoneItemToken.PhoneItemType.ALT, num) return PhoneItemToken._new2466(t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.getSourceText()) if (t0.is_hiphen and (isinstance(t0.next0_, NumberToken)) and (t0.next0_.end_char - t0.next0_.begin_char) <= 1): t1 = t0.next0_.next0_ ok = False if (t1 is None): ok = True elif (t1.is_newline_before or t1.isCharOf(",.")): ok = True if (ok): return PhoneItemToken._new2466( t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.getSourceText()) if ((t0.isChar('(') and (isinstance(t0.next0_, NumberToken)) and (t0.next0_.end_char - t0.next0_.begin_char) == 1) and t0.next0_.next0_ is not None and t0.next0_.next0_.isChar(')')): return PhoneItemToken._new2466(t0, t0.next0_.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.getSourceText()) if ((t0.isCharOf("/-") and (isinstance(t0.next0_, NumberToken)) and ph0._m_template is not None) and LanguageHelper.endsWith( ph0._m_template, str(((t0.next0_.end_char - t0.next0_.begin_char) + 1)))): return PhoneItemToken._new2466(t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.getSourceText()) return None