def __init__(self, v : 'MorphRuleVariant'=None, word : str=None) -> None: super().__init__(None) self.normal_full = None; self.normal_case = None; self.misc = None; self.undef_coef = 0 self.tag = None; if (v is None): return v.copy_to(self) self.misc = v.misc_info self.tag = (v) if (v.normal_tail is not None and word is not None): word_begin = word if (LanguageHelper.ends_with(word, v.tail)): word_begin = word[0:0+len(word) - len(v.tail)] if (len(v.normal_tail) > 0): self.normal_case = (word_begin + v.normal_tail) else: self.normal_case = word_begin if (v.full_normal_tail is not None and word is not None): word_begin = word if (LanguageHelper.ends_with(word, v.tail)): word_begin = word[0:0+len(word) - len(v.tail)] if (len(v.full_normal_tail) > 0): self.normal_full = (word_begin + v.full_normal_tail) else: self.normal_full = word_begin
def _del_surname_end(s: str) -> str: if (len(s) < 3): return s if (LanguageHelper.ends_with_ex(s, "А", "У", "Е", None)): return s[0:0 + len(s) - 1] if (LanguageHelper.ends_with(s, "ОМ") or LanguageHelper.ends_with(s, "ЫМ")): return s[0:0 + len(s) - 2] if (LanguageHelper.ends_with_ex(s, "Я", "Ю", None, None)): ch1 = s[len(s) - 2] if (ch1 == 'Н' or ch1 == 'Л'): return s[0:0 + len(s) - 1] + "Ь" return s
def __try_attach_moscowao(li: typing.List['TerrItemToken'], ad: 'AnalyzerData') -> 'ReferentToken': if (li[0].termin_item is None or not li[0].termin_item.is_moscow_region): return None if (li[0].is_doubt): ok = False if (CityAttachHelper.check_city_after(li[0].end_token.next0_)): ok = True else: ali = AddressItemToken.try_parse_list(li[0].end_token.next0_, None, 2) if (ali is not None and len(ali) > 0 and ali[0].typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): return None reg = GeoReferent() typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ" reg._add_typ(typ) name = li[0].termin_item.canonic_text if (LanguageHelper.ends_with(name, typ)): name = name[0:0 + len(name) - len(typ) - 1].strip() reg._add_name(name) return ReferentToken(reg, li[0].begin_token, li[0].end_token)
def get_doc_types(name : str, name2 : str) -> typing.List[str]: res = list() if (name is None): return res if (name == "АРЕНДОДАТЕЛЬ"): res.append("ДОГОВОР АРЕНДЫ") res.append("ДОГОВОР СУБАРЕНДЫ") elif (name == "АРЕНДАТОР"): res.append("ДОГОВОР АРЕНДЫ") elif (name == "СУБАРЕНДАТОР"): res.append("ДОГОВОР СУБАРЕНДЫ") elif (name == "НАЙМОДАТЕЛЬ" or name == "НАНИМАТЕЛЬ"): res.append("ДОГОВОР НАЙМА") elif (name == "АГЕНТ" or name == "ПРИНЦИПАЛ"): res.append("АГЕНТСКИЙ ДОГОВОР") elif (name == "ПРОДАВЕЦ" or name == "ПОКУПАТЕЛЬ"): res.append("ДОГОВОР КУПЛИ-ПРОДАЖИ") elif (name == "ЗАКАЗЧИК" or name == "ИСПОЛНИТЕЛЬ" or LanguageHelper.ends_with(name, "ПОДРЯДЧИК")): res.append("ДОГОВОР УСЛУГ") elif (name == "ПОСТАВЩИК"): res.append("ДОГОВОР ПОСТАВКИ") elif (name == "ЛИЦЕНЗИАР" or name == "ЛИЦЕНЗИАТ"): res.append("ЛИЦЕНЗИОННЫЙ ДОГОВОР") elif (name == "СТРАХОВЩИК" or name == "СТРАХОВАТЕЛЬ"): res.append("ДОГОВОР СТРАХОВАНИЯ") if (name2 is None): return res tmp = ParticipantToken.get_doc_types(name2, None) for i in range(len(res) - 1, -1, -1): if (not res[i] in tmp): del res[i] return res
def can_has_ref(self, r: 'Referent') -> bool: # Проверка, что этот референт может выступать в качестве ATTR_REF nam = self.name if (nam is None or r is None): return False if (isinstance(r, GeoReferent)): g = Utils.asObjectOrNull(r, GeoReferent) if (LanguageHelper.ends_with_ex(nam, "президент", "губернатор", None, None)): return g.is_state or g.is_region if (nam == "мэр" or nam == "градоначальник"): return g.is_city if (nam == "глава"): return True return False if (r.type_name == "ORGANIZATION"): if ((LanguageHelper.ends_with(nam, "губернатор") or nam == "мэр" or nam == "градоначальник") or nam == "президент"): return False if ("министр" in nam): if (r.find_slot(None, "министерство", True) is None): return False if (nam.endswith("директор")): if ((r.find_slot(None, "суд", True)) is not None): return False return True return False
def merge_slots(self, obj: 'Referent', merge_statistic: bool = True) -> None: ph = Utils.asObjectOrNull(obj, PhoneReferent) if (ph is None): return if (ph.country_code is not None and self.country_code is None): self.country_code = ph.country_code if (ph.number is not None and LanguageHelper.ends_with(ph.number, self.number)): self.number = ph.number
def get_lemma(self) -> str: """ Лемма (вариант морфологической нормализации) """ if (self.__m_lemma is not None): return self.__m_lemma res = None if (self.word_forms is not None and len(self.word_forms) > 0): if (len(self.word_forms) == 1): res = (Utils.ifNotNull(self.word_forms[0].normal_full, self.word_forms[0].normal_case)) if (res is None and not self.char_info.is_all_lower): for m in self.word_forms: if (m.class0_.is_proper_surname): s = Utils.ifNotNull(m.normal_full, Utils.ifNotNull(m.normal_case, "")) if (LanguageHelper.ends_with_ex( s, "ОВ", "ЕВ", None, None)): res = s break elif (m.class0_.is_proper_name and m.is_in_dictionary): return m.normal_case if (res is None): best = None for m in self.word_forms: if (best is None): best = m elif (self.__compare_forms(best, m) > 0): best = m res = (Utils.ifNotNull(best.normal_full, best.normal_case)) if (res is not None): if (LanguageHelper.ends_with_ex(res, "АНЫЙ", "ЕНЫЙ", None, None)): res = (res[0:0 + len(res) - 3] + "ННЫЙ") elif (LanguageHelper.ends_with(res, "ЙСЯ")): res = res[0:0 + len(res) - 2] elif (LanguageHelper.ends_with(res, "АНИЙ") and res == self.term): for wf in self.word_forms: if (wf.is_in_dictionary): return res return res[0:0 + len(res) - 1] + "Е" return res return Utils.ifNotNull(self.term, "?")
def __can_be_equal(self, obj: 'Referent', typ: 'ReferentsEqualType', ignore_add_number: bool) -> bool: ph = Utils.asObjectOrNull(obj, PhoneReferent) if (ph is None): return False if (ph.country_code is not None and self.country_code is not None): if (ph.country_code != self.country_code): return False if (ignore_add_number): if (self.add_number is not None and ph.add_number is not None): if (ph.add_number != self.add_number): return False elif (self.add_number is not None or ph.add_number is not None): if (self.add_number != ph.add_number): return False if (self.number is None or ph.number is None): return False if (self.number == ph.number): return True if (typ != ReferentsEqualType.DIFFERENTTEXTS): if (LanguageHelper.ends_with(self.number, ph.number) or LanguageHelper.ends_with(ph.number, self.number)): return True return False
def can_be_general_for(self, obj: 'Referent') -> bool: if (not self.__can_be_equal(obj, ReferentsEqualType.WITHINONETEXT, True)): return False ph = Utils.asObjectOrNull(obj, PhoneReferent) if (self.country_code is not None and ph.country_code is None): return False if (self.add_number is None): if (ph.add_number is not None): return True elif (ph.add_number is None): return False if (LanguageHelper.ends_with(ph.number, self.number)): return True return False
def get_wordform(self, word: str, cla: 'MorphClass', gender: 'MorphGender', cas: 'MorphCase', num: 'MorphNumber', add_info: 'MorphWordForm') -> str: tn = self.m_root find = False res = None max_coef = -10 i = 0 while i <= len(word): if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.rules is not None): word_begin = "" word_end = "" if (i > 0): word_begin = word[0:0 + i] else: word_end = word if (i < len(word)): word_end = word[i:] else: word_begin = word for r in tn.rules: if (word_end in r.variants): for li in r.variants_list: for v in li: if ((((cla.value) & (v.class0_.value))) != 0 and v.normal_tail is not None): if (cas.is_undefined): if (v.case_.is_nominative or v.case_.is_undefined): pass else: continue elif (((v.case_) & cas).is_undefined): continue sur = cla.is_proper_surname sur0 = v.class0_.is_proper_surname if (sur or sur0): if (sur != sur0): continue find = True if (gender != MorphGender.UNDEFINED): if ((((gender) & (v.gender))) == ( MorphGender.UNDEFINED)): if (num is not None and num == MorphNumber.PLURAL): pass else: continue if (num != MorphNumber.UNDEFINED): if ((((num) & (v.number))) == ( MorphNumber.UNDEFINED)): continue re = word_begin + v.tail co = 0 if (add_info is not None): co = v.calc_eq_coef(add_info) if (res is None or co > max_coef): res = re max_coef = co if (max_coef == 0): if ((word_begin + v.normal_tail) == word): return re if (tn.nodes is None or i >= len(word)): break ch = ord(word[i]) wraptn28 = RefOutArgWrapper(None) inoutres29 = Utils.tryGetValue(tn.nodes, ch, wraptn28) tn = wraptn28.value if (not inoutres29): break i += 1 if (find): return res tn = self.m_root_reverce tn0 = None for i in range(len(word) - 1, -1, -1): if (tn.lazy_pos > 0): self.__load_tree_node(tn) ch = ord(word[i]) if (tn.nodes is None): break wrapnext30 = RefOutArgWrapper(None) inoutres31 = Utils.tryGetValue(tn.nodes, ch, wrapnext30) next0_ = wrapnext30.value if (not inoutres31): break tn = next0_ if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.reverce_variants is not None): tn0 = tn break else: i = -1 if (tn0 is None): return None for mv in tn0.reverce_variants: if ((((mv.class0_.value) & (cla.value))) != 0 and mv.rule is not None): if (len(mv.tail) > 0 and not LanguageHelper.ends_with(word, mv.tail)): continue word_begin = word[0:0 + len(word) - len(mv.tail)] for liv in mv.rule.variants_list: for v in liv: if ((((v.class0_.value) & (cla.value))) != 0): sur = cla.is_proper_surname sur0 = v.class0_.is_proper_surname if (sur or sur0): if (sur != sur0): continue if (not cas.is_undefined): if (((cas) & v.case_).is_undefined and not v.case_.is_undefined): continue if (num != MorphNumber.UNDEFINED): if (v.number != MorphNumber.UNDEFINED): if ((((v.number) & (num))) == (MorphNumber.UNDEFINED)): continue if (gender != MorphGender.UNDEFINED): if (v.gender != MorphGender.UNDEFINED): if ((((v.gender) & (gender))) == ( MorphGender.UNDEFINED)): continue if (add_info is not None): if (v.calc_eq_coef(add_info) < 0): continue res = (word_begin + v.tail) if (res == word): return word return res if (cla.is_proper_surname): if ((gender == MorphGender.FEMINIE and cla.is_proper_surname and not cas.is_undefined) and not cas.is_nominative): if (word.endswith("ВА") or word.endswith("НА")): if (cas.is_accusative): return word[0:0 + len(word) - 1] + "У" return word[0:0 + len(word) - 1] + "ОЙ" if (gender == MorphGender.FEMINIE): last = word[len(word) - 1] if (last == 'А' or last == 'Я' or last == 'О'): return word if (LanguageHelper.is_cyrillic_vowel(last)): return word[0:0 + len(word) - 1] + "А" elif (last == 'Й'): return word[0:0 + len(word) - 2] + "АЯ" else: return word + "А" return res
def __try_attach_(self, pli : typing.List['PhoneItemToken'], ind : int, is_phone_before : bool, prev_phone : 'PhoneReferent', lev : int=0) -> 'ReferentToken': if (ind >= len(pli) or lev > 4): return None country_code = None city_code = None j = ind if (prev_phone is not None and prev_phone._m_template is not None and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): tmp = io.StringIO() jj = j first_pass3391 = True while True: if first_pass3391: first_pass3391 = False else: jj += 1 if (not (jj < len(pli))): break if (pli[jj].item_type == PhoneItemToken.PhoneItemType.NUMBER): print(len(pli[jj].value), end="", file=tmp) elif (pli[jj].item_type == PhoneItemToken.PhoneItemType.DELIM): if (pli[jj].value == " "): break print(pli[jj].value, end="", file=tmp) continue else: break templ0 = Utils.toStringStringIO(tmp) if (templ0 == prev_phone._m_template): if ((jj + 1) < len(pli)): if (pli[jj + 1].item_type == PhoneItemToken.PhoneItemType.PREFIX and (jj + 2) == len(pli)): pass else: del pli[jj + 1:jj + 1+len(pli) - jj - 1] break if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE): country_code = pli[j].value if (country_code != "8"): cc = PhoneHelper.get_country_prefix(country_code) if (cc is not None and (len(cc) < len(country_code))): city_code = country_code[len(cc):] country_code = cc j += 1 elif ((j < len(pli)) and pli[j].can_be_country_prefix): k = j + 1 if ((k < len(pli)) and pli[k].item_type == PhoneItemToken.PhoneItemType.DELIM): k += 1 rrt = self.__try_attach_(pli, k, is_phone_before, None, lev + 1) if (rrt is not None): if ((((is_phone_before and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM and pli[j + 1].begin_token.is_hiphen) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and len(pli[j].value) == 3) and ((j + 2) < len(pli)) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER) and len(pli[j + 2].value) == 3): pass else: country_code = pli[j].value j += 1 if (((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and ((pli[j].value[0] == '8' or pli[j].value[0] == '7'))) and country_code is None): if (len(pli[j].value) == 1): country_code = pli[j].value j += 1 elif (len(pli[j].value) == 4): country_code = pli[j].value[0:0+1] if (city_code is None): city_code = pli[j].value[1:] else: city_code += pli[j].value[1:] j += 1 elif (len(pli[j].value) == 11 and j == (len(pli) - 1) and is_phone_before): ph0 = PhoneReferent() if (pli[j].value[0] != '8'): ph0.country_code = pli[j].value[0:0+1] ph0.number = pli[j].value[1:1+3] + pli[j].value[4:] return ReferentToken(ph0, pli[0].begin_token, pli[j].end_token) elif (city_code is None and len(pli[j].value) > 3 and ((j + 1) < len(pli))): sum0_ = 0 for it in pli: if (it.item_type == PhoneItemToken.PhoneItemType.NUMBER): sum0_ += len(it.value) if (sum0_ == 11): city_code = pli[j].value[1:] j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.CITYCODE): if (city_code is None): city_code = pli[j].value else: city_code += pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): j += 1 if ((country_code == "8" and city_code is None and ((j + 3) < len(pli))) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (len(pli[j].value) == 3 or len(pli[j].value) == 4): city_code = pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): j += 1 normal_num_len = 0 if (country_code == "421"): normal_num_len = 9 num = io.StringIO() templ = io.StringIO() part_length = list() delim = None ok = False additional = None std = False if (country_code is not None and ((j + 4) < len(pli)) and j > 0): if (((((pli[j - 1].value == "-" or pli[j - 1].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 3].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 4].item_type == PhoneItemToken.PhoneItemType.NUMBER): if ((((len(pli[j].value) + len(pli[j + 2].value)) == 6 or ((len(pli[j].value) == 4 and len(pli[j + 2].value) == 5)))) and ((len(pli[j + 4].value) == 4 or len(pli[j + 4].value) == 1))): print(pli[j].value, end="", file=num) print(pli[j + 2].value, end="", file=num) print(pli[j + 4].value, end="", file=num) print("{0}{1}{2}{3}{4}".format(len(pli[j].value), pli[j + 1].value, len(pli[j + 2].value), pli[j + 3].value, len(pli[j + 4].value)), end="", file=templ, flush=True) std = True ok = True j += 5 first_pass3392 = True while True: if first_pass3392: first_pass3392 = False else: j += 1 if (not (j < len(pli))): break if (std): break if (pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): if (pli[j].is_in_brackets): continue if (j > 0 and pli[j - 1].is_in_brackets): continue if (templ.tell() > 0): print(pli[j].value, end="", file=templ) if (delim is None): delim = pli[j].value elif (pli[j].value != delim): if ((len(part_length) == 2 and ((part_length[0] == 3 or part_length[0] == 4)) and city_code is None) and part_length[1] == 3): city_code = Utils.toStringStringIO(num)[0:0+part_length[0]] Utils.removeStringIO(num, 0, part_length[0]) del part_length[0] delim = pli[j].value continue if (is_phone_before and ((j + 1) < len(pli)) and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (num.tell() < 6): continue if (normal_num_len > 0 and (num.tell() + len(pli[j + 1].value)) == normal_num_len): continue break else: continue ok = False elif (pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): if (num.tell() == 0 and pli[j].begin_token.previous is not None and pli[j].begin_token.previous.is_table_control_char): tt = pli[len(pli) - 1].end_token.next0_ if (tt is not None and tt.is_char_of(",.")): tt = tt.next0_ if (isinstance(tt, NumberToken)): return None if ((num.tell() + len(pli[j].value)) > 13): if (j > 0 and pli[j - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): j -= 1 ok = True break print(pli[j].value, end="", file=num) part_length.append(len(pli[j].value)) print(len(pli[j].value), end="", file=templ) ok = True if (num.tell() > 10): j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): additional = pli[j].value j += 1 break elif (pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): additional = pli[j].value j += 1 break else: break if ((j == (len(pli) - 1) and pli[j].is_in_brackets and ((len(pli[j].value) == 3 or len(pli[j].value) == 4))) and additional is None): additional = pli[j].value j += 1 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[j].is_in_brackets): is_phone_before = True j += 1 if ((country_code is None and city_code is not None and len(city_code) > 3) and (num.tell() < 8) and city_code[0] != '8'): if ((len(city_code) + num.tell()) == 10): pass else: cc = PhoneHelper.get_country_prefix(city_code) if (cc is not None): if (len(cc) > 1 and (len(city_code) - len(cc)) > 1): country_code = cc city_code = city_code[len(cc):] if (country_code is None and city_code is not None and city_code.startswith("00")): cc = PhoneHelper.get_country_prefix(city_code[2:]) if (cc is not None): if (len(city_code) > (len(cc) + 3)): country_code = cc city_code = city_code[len(cc) + 2:] if (num.tell() == 0 and city_code is not None): if (len(city_code) == 10): print(city_code[3:], end="", file=num) part_length.append(num.tell()) city_code = city_code[0:0+3] ok = True elif (((len(city_code) == 9 or len(city_code) == 11 or len(city_code) == 8)) and ((is_phone_before or country_code is not None))): print(city_code, end="", file=num) part_length.append(num.tell()) city_code = (None) ok = True if (num.tell() < 4): ok = False if (num.tell() < 7): if (city_code is not None and (len(city_code) + num.tell()) > 7): if (not is_phone_before and len(city_code) == 3): ii = 0 ii = 0 while ii < len(part_length): if (part_length[ii] == 3): pass elif (part_length[ii] > 3): break elif ((ii < (len(part_length) - 1)) or (part_length[ii] < 2)): break ii += 1 if (ii >= len(part_length)): if (country_code == "61"): pass else: ok = False elif (((num.tell() == 6 or num.tell() == 5)) and ((len(part_length) >= 1 and len(part_length) <= 3)) and is_phone_before): if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].kind == PhoneKind.HOME): ok = False elif (prev_phone is not None and prev_phone.number is not None and ((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)))): pass elif (num.tell() > 4 and prev_phone is not None and Utils.toStringStringIO(templ) == prev_phone._m_template): ok = True else: ok = False if (delim == "." and country_code is None and city_code is None): ok = False if ((is_phone_before and country_code is None and city_code is None) and num.tell() > 10): cc = PhoneHelper.get_country_prefix(Utils.toStringStringIO(num)) if (cc is not None): if ((num.tell() - len(cc)) == 9): country_code = cc Utils.removeStringIO(num, 0, len(cc)) ok = True if (ok): if (std): pass elif (prev_phone is not None and prev_phone.number is not None and (((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)) or prev_phone._m_template == Utils.toStringStringIO(templ)))): pass elif ((len(part_length) == 3 and part_length[0] == 3 and part_length[1] == 2) and part_length[2] == 2): pass elif (len(part_length) == 3 and is_phone_before): pass elif ((len(part_length) == 4 and (((part_length[0] + part_length[1]) == 3)) and part_length[2] == 2) and part_length[3] == 2): pass elif ((len(part_length) == 4 and part_length[0] == 3 and part_length[1] == 3) and part_length[2] == 2 and part_length[3] == 2): pass elif (len(part_length) == 5 and (part_length[1] + part_length[2]) == 4 and (part_length[3] + part_length[4]) == 4): pass elif (len(part_length) > 4): ok = False elif (len(part_length) > 3 and city_code is not None): ok = False elif ((is_phone_before or city_code is not None or country_code is not None) or additional is not None): ok = True else: ok = False if (((num.tell() == 6 or num.tell() == 7)) and (len(part_length) < 4) and j > 0): next_ph = self.__get_next_phone(pli[j - 1].end_token.next0_, lev + 1) if (next_ph is not None): d = len(next_ph.number) - num.tell() if (d == 0 or d == 3 or d == 4): ok = True end = (pli[j - 1].end_token if j > 0 else None) if (end is None): ok = False if ((ok and city_code is None and country_code is None) and prev_phone is None and not is_phone_before): if (not end.is_whitespace_after and end.next0_ is not None): tt = end.next0_ if (tt.is_char_of(".,)") and tt.next0_ is not None): tt = tt.next0_ if (not tt.is_whitespace_before): ok = False if (not ok): return None if (templ.tell() > 0 and not str.isdigit(Utils.getCharAtStringIO(templ, templ.tell() - 1))): Utils.setLengthStringIO(templ, templ.tell() - 1) if ((country_code is None and city_code is not None and len(city_code) > 3) and num.tell() > 6): cc = PhoneHelper.get_country_prefix(city_code) if (cc is not None and ((len(cc) + 1) < len(city_code))): country_code = cc city_code = city_code[len(cc):] if (pli[0].begin_token.previous is not None): if (pli[0].begin_token.previous.is_value("ГОСТ", None) or pli[0].begin_token.previous.is_value("ТУ", None)): return None ph = PhoneReferent() if (country_code is not None): ph.country_code = country_code number = Utils.toStringStringIO(num) if ((city_code is None and num.tell() > 7 and len(part_length) > 0) and (part_length[0] < 5)): city_code = number[0:0+part_length[0]] number = number[part_length[0]:] if (city_code is None and num.tell() == 11 and Utils.getCharAtStringIO(num, 0) == '8'): city_code = number[1:1+3] number = number[4:] if (city_code is None and num.tell() == 10): city_code = number[0:0+3] number = number[3:] if (city_code is not None): number = (city_code + number) elif (country_code is None and prev_phone is not None): ok1 = False if (len(prev_phone.number) >= (len(number) + 2)): ok1 = True elif (templ.tell() > 0 and prev_phone._m_template is not None and LanguageHelper.ends_with(prev_phone._m_template, Utils.toStringStringIO(templ))): ok1 = True if (ok1 and len(prev_phone.number) > len(number)): number = (prev_phone.number[0:0+len(prev_phone.number) - len(number)] + number) if (ph.country_code is None and prev_phone is not None and prev_phone.country_code is not None): if (len(prev_phone.number) == len(number)): ph.country_code = prev_phone.country_code ok = False for d in number: if (d != '0'): ok = True break if (not ok): return None if (country_code is not None): if (len(number) < 7): return None else: s = PhoneHelper.get_country_prefix(number) if (s is not None): num2 = number[len(s):] if (len(num2) >= 10 and len(num2) <= 11): number = num2 if (s != "7"): ph.country_code = s if (len(number) == 8 and prev_phone is None): return None if (len(number) > 11): if ((len(number) < 14) and ((country_code == "1" or country_code == "43"))): pass else: return None ph.number = number if (additional is not None): ph.add_slot(PhoneReferent.ATTR_ADDNUMBER, additional, True, 0) if (not is_phone_before and end.next0_ is not None and not end.is_newline_after): if (end.next0_.is_char_of("+=") or end.next0_.is_hiphen): return None if (country_code is not None and country_code == "7"): if (len(number) != 10): return None ph._m_template = Utils.toStringStringIO(templ) if (j == (len(pli) - 1) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and not pli[j].is_newline_before): end = pli[j].end_token if (pli[j].kind != PhoneKind.UNDEFINED): ph.kind = pli[j].kind res = ReferentToken(ph, pli[0].begin_token, end) if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].end_token.next0_.is_table_control_char): res.begin_token = pli[1].begin_token return res
def try_attach_alternate( t0: 'Token', ph0: 'PhoneReferent', pli: typing.List['PhoneItemToken']) -> 'PhoneItemToken': if (t0 is None): return None if (t0.is_char_of("\\/") and (isinstance(t0.next0_, NumberToken)) and (t0.next0_.end_char - t0.next0_.begin_char) <= 1): pli1 = PhoneItemToken.try_attach_all(t0.next0_, 15) if (pli1 is not None and len(pli1) > 1): if (pli1[len(pli1) - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): del pli1[len(pli1) - 1] if (len(pli1) <= len(pli)): ii = 0 num = "" ii = 0 while ii < len(pli1): p1 = pli1[ii] p0 = pli[(len(pli) - len(pli1)) + ii] if (p1.item_type != p0.item_type): break if (p1.item_type != PhoneItemToken.PhoneItemType.NUMBER and p1.item_type != PhoneItemToken.PhoneItemType.DELIM): break if (p1.item_type == PhoneItemToken.PhoneItemType.NUMBER ): if (p1.length_char != p0.length_char): break num += p1.value ii += 1 if (ii >= len(pli1)): return PhoneItemToken._new2625( t0, pli1[len(pli1) - 1].end_token, PhoneItemToken.PhoneItemType.ALT, num) return PhoneItemToken._new2625(t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.get_source_text()) if (t0.is_hiphen and (isinstance(t0.next0_, NumberToken)) and (t0.next0_.end_char - t0.next0_.begin_char) <= 1): t1 = t0.next0_.next0_ ok = False if (t1 is None): ok = True elif (t1.is_newline_before or t1.is_char_of(",.")): ok = True if (ok): return PhoneItemToken._new2625( t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.get_source_text()) if ((t0.is_char('(') and (isinstance(t0.next0_, NumberToken)) and (t0.next0_.end_char - t0.next0_.begin_char) == 1) and t0.next0_.next0_ is not None and t0.next0_.next0_.is_char(')')): return PhoneItemToken._new2625(t0, t0.next0_.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.get_source_text()) if ((t0.is_char_of("/-") and (isinstance(t0.next0_, NumberToken)) and ph0._m_template is not None) and LanguageHelper.ends_with( ph0._m_template, str(((t0.next0_.end_char - t0.next0_.begin_char) + 1)))): return PhoneItemToken._new2625(t0, t0.next0_, PhoneItemToken.PhoneItemType.ALT, t0.next0_.get_source_text()) return None
def try_parse(t : 'Token', attrs : 'BracketParseAttr'=BracketParseAttr.NO, max_tokens : int=100) -> 'BracketSequenceToken': """ Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается вложенность, возможность отсутствия закрывающего элемента и др. Args: t(Token): начальный токен attrs(BracketParseAttr): параметры выделения max_tokens(int): максимально токенов (вдруг забыли закрывающую кавычку) Returns: BracketSequenceToken: метатокен BracketSequenceToken """ t0 = t cou = 0 if (not BracketHelper.can_be_start_of_sequence(t0, False, False)): return None br_list = list() br_list.append(BracketHelper.Bracket(t0)) cou = 0 crlf = 0 last = None lev = 1 is_assim = br_list[0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find(br_list[0].char0_) >= 0 gen_case = False t = t0.next0_ first_pass3057 = True while True: if first_pass3057: first_pass3057 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break last = t if (t.is_char_of(BracketHelper.M_OPEN_CHARS) or t.is_char_of(BracketHelper.M_CLOSE_CHARS)): if (t.is_newline_before and (((attrs) & (BracketParseAttr.CANBEMANYLINES))) == (BracketParseAttr.NO)): if (t.whitespaces_before_count > 10 or BracketHelper.can_be_start_of_sequence(t, False, False)): if (t.is_char('(') and not t0.is_char('(')): pass else: last = t.previous break bb = BracketHelper.Bracket(t) br_list.append(bb) if (len(br_list) > 20): break if ((len(br_list) == 3 and br_list[1].can_be_open and bb.can_be_close) and BracketHelper.__must_be_close_char(bb.char0_, br_list[1].char0_) and BracketHelper.__must_be_close_char(bb.char0_, br_list[0].char0_)): ok = False tt = t.next0_ while tt is not None: if (tt.is_newline_before): break if (tt.is_char(',')): break if (tt.is_char('.')): tt = tt.next0_ while tt is not None: if (tt.is_newline_before): break elif (tt.is_char_of(BracketHelper.M_OPEN_CHARS) or tt.is_char_of(BracketHelper.M_CLOSE_CHARS)): bb2 = BracketHelper.Bracket(tt) if (BracketHelper.can_be_end_of_sequence(tt, False, None, False) and BracketHelper.__can_be_close_char(bb2.char0_, br_list[0].char0_)): ok = True break tt = tt.next0_ break if (t.is_char_of(BracketHelper.M_OPEN_CHARS) or t.is_char_of(BracketHelper.M_CLOSE_CHARS)): ok = True break tt = tt.next0_ if (not ok): break if (is_assim): if (bb.can_be_open and not bb.can_be_close and bb.char0_ == br_list[0].char0_): lev += 1 elif (bb.can_be_close and not bb.can_be_open and BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_) == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)): lev -= 1 if (lev == 0): break else: cou += 1 if (cou > max_tokens): break if ((((attrs) & (BracketParseAttr.CANCONTAINSVERBS))) == (BracketParseAttr.NO)): if (t.morph.language.is_cyrillic): if (t.get_morph_class_in_dictionary() == MorphClass.VERB): if (not t.morph.class0_.is_adjective and not t.morph.contains_attr("страд.з.", None)): if (t.chars.is_all_lower): norm = t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) if (not LanguageHelper.ends_with(norm, "СЯ")): if (len(br_list) > 1): break if (br_list[0].char0_ != '('): break elif (t.morph.language.is_en): if (t.morph.class0_ == MorphClass.VERB and t.chars.is_all_lower): break r = t.get_referent() if (r is not None and r.type_name == "ADDRESS"): if (not t0.is_char('(')): break if ((((attrs) & (BracketParseAttr.CANBEMANYLINES))) != (BracketParseAttr.NO)): if (t.is_newline_before): if (t.newlines_before_count > 1): break crlf += 1 continue if (t.is_newline_before): if (t.whitespaces_before_count > 15): last = t.previous break crlf += 1 if (not t.chars.is_all_lower): if (MiscHelper.can_be_start_of_sentence(t)): has = False tt = t.next0_ while tt is not None: if (tt.is_newline_before): break elif (tt.length_char == 1 and tt.is_char_of(BracketHelper.M_OPEN_CHARS) and tt.is_whitespace_before): break elif (tt.length_char == 1 and tt.is_char_of(BracketHelper.M_CLOSE_CHARS) and not tt.is_whitespace_before): has = True break tt = tt.next0_ if (not has): last = t.previous break if ((isinstance(t.previous, MetaToken)) and BracketHelper.can_be_end_of_sequence(t.previous.end_token, False, None, False)): last = t.previous break if (crlf > 1): if (len(br_list) > 1): break if (crlf > 10): break if (t.is_char(';') and t.is_newline_after): break npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None): if (t.is_newline_before): gen_case = npt.morph.case_.is_genitive t = npt.end_token last = t if ((len(br_list) == 1 and br_list[0].can_be_open and (isinstance(last, MetaToken))) and last.is_newline_after): if (BracketHelper.can_be_end_of_sequence(last.end_token, False, None, False)): return BracketSequenceToken(t0, last) if ((len(br_list) == 1 and br_list[0].can_be_open and gen_case) and last.is_newline_after and crlf <= 2): return BracketSequenceToken(t0, last) if (len(br_list) < 1): return None i = 1 while i < (len(br_list) - 1): if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'): br_list[i].can_be_open = True br_list[i + 1].can_be_close = True i += 1 internals = None while len(br_list) > 3: i = len(br_list) - 1 if ((br_list[i].can_be_close and br_list[i - 1].can_be_open and not BracketHelper.__can_be_close_char(br_list[i].char0_, br_list[0].char0_)) and BracketHelper.__can_be_close_char(br_list[i].char0_, br_list[i - 1].char0_)): del br_list[len(br_list) - 2:len(br_list) - 2+2] continue break while len(br_list) >= 4: changed = False i = 1 while i < (len(br_list) - 2): if ((br_list[i].can_be_open and not br_list[i].can_be_close and br_list[i + 1].can_be_close) and not br_list[i + 1].can_be_open): ok = False if (BracketHelper.__must_be_close_char(br_list[i + 1].char0_, br_list[i].char0_) or br_list[i].char0_ != br_list[0].char0_): ok = True if ((i == 1 and ((i + 2) < len(br_list)) and br_list[i + 2].char0_ == ')') and br_list[i + 1].char0_ != ')' and BracketHelper.__can_be_close_char(br_list[i + 1].char0_, br_list[i - 1].char0_)): br_list[i + 2] = br_list[i + 1] elif (i > 1 and ((i + 2) < len(br_list)) and BracketHelper.__must_be_close_char(br_list[i + 2].char0_, br_list[i - 1].char0_)): ok = True if (ok): if (internals is None): internals = list() internals.append(BracketSequenceToken(br_list[i].source, br_list[i + 1].source)) del br_list[i:i+2] changed = True break i += 1 if (not changed): break res = None if ((len(br_list) >= 4 and br_list[1].can_be_open and br_list[2].can_be_close) and br_list[3].can_be_close and not br_list[3].can_be_open): if (BracketHelper.__can_be_close_char(br_list[3].char0_, br_list[0].char0_)): res = BracketSequenceToken(br_list[0].source, br_list[3].source) if (br_list[0].source.next0_ != br_list[1].source or br_list[2].source.next0_ != br_list[3].source): res.internal.append(BracketSequenceToken(br_list[1].source, br_list[2].source)) if (internals is not None): res.internal.extend(internals) if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close) and not br_list[2].can_be_open): if ((((attrs) & (BracketParseAttr.NEARCLOSEBRACKET))) != (BracketParseAttr.NO)): if (BracketHelper.__can_be_close_char(br_list[1].char0_, br_list[0].char0_)): return BracketSequenceToken(br_list[0].source, br_list[1].source) ok = True if (BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[0].char0_) and BracketHelper.__can_be_close_char(br_list[1].char0_, br_list[0].char0_) and br_list[1].can_be_close): t = br_list[1].source while t != br_list[2].source and t is not None: if (t.is_newline_before): ok = False break if (t.chars.is_letter and t.chars.is_all_lower): ok = False break npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None): t = npt.end_token t = t.next0_ if (ok): t = br_list[0].source.next0_ while t != br_list[1].source and t is not None: if (t.is_newline_before): return BracketSequenceToken(br_list[0].source, t.previous) t = t.next0_ lev1 = 0 tt = br_list[0].source.previous first_pass3058 = True while True: if first_pass3058: first_pass3058 = False else: tt = tt.previous if (not (tt is not None)): break if (tt.is_newline_after or tt.is_table_control_char): break if (not (isinstance(tt, TextToken))): continue if (tt.chars.is_letter or tt.length_char > 1): continue ch = tt.term[0] if (BracketHelper.__can_be_close_char(ch, br_list[0].char0_)): lev1 += 1 elif (BracketHelper.__can_be_close_char(br_list[1].char0_, ch)): lev1 -= 1 if (lev1 < 0): return BracketSequenceToken(br_list[0].source, br_list[1].source) if (ok and BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[0].char0_)): intern = BracketSequenceToken(br_list[1].source, br_list[2].source) res = BracketSequenceToken(br_list[0].source, br_list[2].source) res.internal.append(intern) elif (ok and BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[1].char0_) and br_list[0].can_be_open): if (BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[0].char0_)): intern = BracketSequenceToken(br_list[1].source, br_list[2].source) res = BracketSequenceToken(br_list[0].source, br_list[2].source) res.internal.append(intern) elif (len(br_list) == 3): return None if (res is None and len(br_list) > 1 and br_list[1].can_be_close): res = BracketSequenceToken(br_list[0].source, br_list[1].source) if (res is None and len(br_list) > 1 and BracketHelper.__can_be_close_char(br_list[1].char0_, br_list[0].char0_)): res = BracketSequenceToken(br_list[0].source, br_list[1].source) if (res is None and len(br_list) == 2 and br_list[0].char0_ == br_list[1].char0_): res = BracketSequenceToken(br_list[0].source, br_list[1].source) if (res is not None and internals is not None): for i in internals: if (i.begin_char < res.end_char): res.internal.append(i) if (res is None): cou = 0 tt = t0.next0_ first_pass3059 = True while True: if first_pass3059: first_pass3059 = False else: tt = tt.next0_; cou += 1 if (not (tt is not None)): break if (tt.is_table_control_char): break if (MiscHelper.can_be_start_of_sentence(tt)): break if (max_tokens > 0 and cou > max_tokens): break mt = Utils.asObjectOrNull(tt, MetaToken) if (mt is None): continue if (isinstance(mt.end_token, TextToken)): if (mt.end_token.is_char_of(BracketHelper.M_CLOSE_CHARS)): bb = BracketHelper.Bracket(Utils.asObjectOrNull(mt.end_token, TextToken)) if (bb.can_be_close and BracketHelper.__can_be_close_char(bb.char0_, br_list[0].char0_)): return BracketSequenceToken(t0, tt) return res
def process(self, word: str) -> typing.List['MorphWordForm']: """ Обработка одного слова Args: word(str): слово должно быть в верхнем регистре """ if (Utils.isNullOrEmpty(word)): return None res = None if (len(word) > 1): i = 0 while i < len(word): ch = word[i] if (LanguageHelper.is_cyrillic_vowel(ch) or LanguageHelper.is_latin_vowel(ch)): break i += 1 if (i >= len(word)): return res mvs = [] tn = self.m_root i = 0 while i <= len(word): if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.rules is not None): word_begin = None word_end = None if (i == 0): word_end = word elif (i < len(word)): word_end = word[i:] else: word_end = "" if (res is None): res = list() for r in tn.rules: wrapmvs20 = RefOutArgWrapper(None) inoutres21 = Utils.tryGetValue(r.variants, word_end, wrapmvs20) mvs = wrapmvs20.value if (inoutres21): if (word_begin is None): if (i == len(word)): word_begin = word elif (i > 0): word_begin = word[0:0 + i] else: word_begin = "" r.process_result(res, word_begin, mvs) if (tn.nodes is None or i >= len(word)): break ch = ord(word[i]) wraptn22 = RefOutArgWrapper(None) inoutres23 = Utils.tryGetValue(tn.nodes, ch, wraptn22) tn = wraptn22.value if (not inoutres23): break i += 1 need_test_unknown_vars = True if (res is not None): for r in res: if ((r.class0_.is_pronoun or r.class0_.is_noun or r.class0_.is_adjective) or (r.class0_.is_misc and r.class0_.is_conjunction) or r.class0_.is_preposition): need_test_unknown_vars = False elif (r.class0_.is_adverb and r.normal_case is not None): if (not LanguageHelper.ends_with_ex( r.normal_case, "О", "А", None, None)): need_test_unknown_vars = False elif (r.normal_case == "МНОГО"): need_test_unknown_vars = False elif (r.class0_.is_verb and len(res) > 1): ok = False for rr in res: if (rr != r and rr.class0_ != r.class0_): ok = True break if (ok and not LanguageHelper.ends_with(word, "ИМ")): need_test_unknown_vars = False if (need_test_unknown_vars and LanguageHelper.is_cyrillic_char(word[0])): gl = 0 sog = 0 j = 0 while j < len(word): if (LanguageHelper.is_cyrillic_vowel(word[j])): gl += 1 else: sog += 1 j += 1 if ((gl < 2) or (sog < 2)): need_test_unknown_vars = False if (need_test_unknown_vars and res is not None and len(res) == 1): if (res[0].class0_.is_verb): if ("н.вр." in res[0].misc.attrs and "нес.в." in res[0].misc.attrs and not "страд.з." in res[0].misc.attrs): need_test_unknown_vars = False elif ("б.вр." in res[0].misc.attrs and "сов.в." in res[0].misc.attrs): need_test_unknown_vars = False elif (res[0].normal_case is not None and LanguageHelper.ends_with(res[0].normal_case, "СЯ")): need_test_unknown_vars = False if (res[0].class0_.is_undefined and "прдктв." in res[0].misc.attrs): need_test_unknown_vars = False if (need_test_unknown_vars): if (self.m_root_reverce is None): return res tn = self.m_root_reverce tn0 = None for i in range(len(word) - 1, -1, -1): if (tn.lazy_pos > 0): self.__load_tree_node(tn) ch = ord(word[i]) if (tn.nodes is None): break wrapnext24 = RefOutArgWrapper(None) inoutres25 = Utils.tryGetValue(tn.nodes, ch, wrapnext24) next0_ = wrapnext24.value if (not inoutres25): break tn = next0_ if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.reverce_variants is not None): tn0 = tn break else: i = -1 if (tn0 is not None): glas = i < 4 while i >= 0: if (LanguageHelper.is_cyrillic_vowel(word[i]) or LanguageHelper.is_latin_vowel(word[i])): glas = True break i -= 1 if (glas): for mv in tn0.reverce_variants: if (((not mv.class0_.is_verb and not mv.class0_.is_adjective and not mv.class0_.is_noun) and not mv.class0_.is_proper_surname and not mv.class0_.is_proper_geo) and not mv.class0_.is_proper_secname): continue ok = False for rr in res: if (rr.is_in_dictionary): if (rr.class0_ == mv.class0_ or rr.class0_.is_noun): ok = True break if (not mv.class0_.is_adjective and rr.class0_.is_verb): ok = True break if (ok): continue if (len(mv.tail) > 0 and not LanguageHelper.ends_with(word, mv.tail)): continue r = MorphWordForm(mv, word) if (not MorphWordForm._has_morph_equals(res, r)): r.undef_coef = mv.coef if (res is None): res = list() res.append(r) if (word == "ПРИ" and res is not None): for i in range(len(res) - 1, -1, -1): if (res[i].class0_.is_proper_geo): del res[i] else: i = -1 if (res is None or len(res) == 0): return None MorphEngine.__sort(res, word) for v in res: if (v.normal_case is None): v.normal_case = word if (v.class0_.is_verb): if (v.normal_full is None and LanguageHelper.ends_with(v.normal_case, "ТЬСЯ")): v.normal_full = v.normal_case[0:0 + len(v.normal_case) - 2] v.language = self.language if (v.class0_.is_preposition): v.normal_case = LanguageHelper.normalize_preposition( v.normal_case) mc = MorphClass() for i in range(len(res) - 1, -1, -1): if (not res[i].is_in_dictionary and res[i].class0_.is_adjective and len(res) > 1): if ("к.ф." in res[i].misc.attrs or "неизм." in res[i].misc.attrs): del res[i] continue if (res[i].is_in_dictionary): mc.value |= res[i].class0_.value else: i = -1 if (mc == MorphClass.VERB and len(res) > 1): for r in res: if (r.undef_coef > (100) and r.class0_ == MorphClass.ADJECTIVE): r.undef_coef = (0) if (len(res) == 0): return None return res
def __compare_forms(self, x: 'MorphWordForm', y: 'MorphWordForm') -> int: vx = Utils.ifNotNull(x.normal_full, x.normal_case) vy = Utils.ifNotNull(y.normal_full, y.normal_case) if (vx == vy): return 0 if (Utils.isNullOrEmpty(vx)): return 1 if (Utils.isNullOrEmpty(vy)): return -1 lastx = vx[len(vx) - 1] lasty = vy[len(vy) - 1] if (x.class0_.is_proper_surname and not self.char_info.is_all_lower): if (LanguageHelper.ends_with_ex(vx, "ОВ", "ЕВ", "ИН", None)): if (not y.class0_.is_proper_surname): return -1 if (y.class0_.is_proper_surname and not self.char_info.is_all_lower): if (LanguageHelper.ends_with_ex(vy, "ОВ", "ЕВ", "ИН", None)): if (not x.class0_.is_proper_surname): return 1 if (len(vx) > len(vy)): return -1 if (len(vx) < len(vy)): return 1 return 0 if (x.class0_ == y.class0_): if (x.class0_.is_adjective): if (lastx == 'Й' and lasty != 'Й'): return -1 if (lastx != 'Й' and lasty == 'Й'): return 1 if (not LanguageHelper.ends_with(vx, "ОЙ") and LanguageHelper.ends_with(vy, "ОЙ")): return -1 if (LanguageHelper.ends_with(vx, "ОЙ") and not LanguageHelper.ends_with(vy, "ОЙ")): return 1 if (x.class0_.is_noun): if (x.number == MorphNumber.SINGULAR and y.number == MorphNumber.PLURAL and len(vx) <= (len(vy) + 1)): return -1 if (x.number == MorphNumber.PLURAL and y.number == MorphNumber.SINGULAR and len(vx) >= (len(vy) - 1)): return 1 if (len(vx) < len(vy)): return -1 if (len(vx) > len(vy)): return 1 return 0 if (x.class0_.is_adverb): return 1 if (x.class0_.is_noun and x.is_in_dictionary): if (y.class0_.is_adjective and y.is_in_dictionary): if (not "к.ф." in y.misc.attrs): return 1 return -1 if (x.class0_.is_adjective): if (not x.is_in_dictionary and y.class0_.is_noun and y.is_in_dictionary): return 1 return -1 if (x.class0_.is_verb): if (y.class0_.is_noun or y.class0_.is_adjective or y.class0_.is_preposition): return 1 return -1 if (y.class0_.is_adverb): return -1 if (y.class0_.is_noun and y.is_in_dictionary): return 1 if (y.class0_.is_adjective): if (((x.class0_.is_noun or x.class0_.is_proper_secname)) and x.is_in_dictionary): return -1 if (x.class0_.is_noun and not y.is_in_dictionary): if (len(vx) < len(vy)): return -1 return 1 if (y.class0_.is_verb): if (x.class0_.is_noun or x.class0_.is_adjective or x.class0_.is_preposition): return -1 if (x.class0_.is_proper): return -1 return 1 if (len(vx) < len(vy)): return -1 if (len(vx) > len(vy)): return 1 return 0
def try_parse(t: 'Token', items: typing.List['NounPhraseItem'], attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem': if (t is None): return None t0 = t _can_be_surname = False _is_doubt_adj = False rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token == rt.end_token and (isinstance(rt.begin_token, TextToken))): res = NounPhraseItem.try_parse(rt.begin_token, items, attrs) if (res is not None): res.begin_token = res.end_token = t res.can_be_noun = True return res if (rt is not None): res = NounPhraseItem(t, t) for m in t.morph.items: v = NounPhraseItemTextVar(m, None) v.normal_value = str(t.get_referent()) res.noun_morph.append(v) res.can_be_noun = True return res if (isinstance(t, NumberToken)): pass has_legal_verb = False if (isinstance(t, TextToken)): if (not t.chars.is_letter): return None str0_ = t.term if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'): for wf in t.morph.items: if ((isinstance(wf, MorphWordForm)) and wf.is_in_dictionary): if (wf.class0_.is_verb): mc = t.get_morph_class_in_dictionary() if (not mc.is_noun and (((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.NO)): if (not LanguageHelper.ends_with_ex( str0_, "ОГО", "ЕГО", None, None)): return None has_legal_verb = True if (wf.class0_.is_adverb): if (t.next0_ is None or not t.next0_.is_hiphen): if ((str0_ == "ВСЕГО" or str0_ == "ДОМА" or str0_ == "НЕСКОЛЬКО") or str0_ == "МНОГО" or str0_ == "ПОРЯДКА"): pass else: return None if (wf.class0_.is_adjective): if (wf.contains_attr("к.ф.", None)): if (t.get_morph_class_in_dictionary() == MorphClass.ADJECTIVE): pass else: _is_doubt_adj = True mc0 = t.morph.class0_ if (mc0.is_proper_surname and not t.chars.is_all_lower): for wf in t.morph.items: if (wf.class0_.is_proper_surname and wf.number != MorphNumber.PLURAL): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue s = Utils.ifNotNull((Utils.ifNotNull( wff.normal_full, wff.normal_case)), "") if (LanguageHelper.ends_with_ex( s, "ИН", "ЕН", "ЫН", None)): if (not wff.is_in_dictionary): _can_be_surname = True else: return None if (wff.is_in_dictionary and LanguageHelper.ends_with(s, "ОВ")): _can_be_surname = True if (mc0.is_proper_name and not t.chars.is_all_lower): for wff in t.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (wf.normal_case == "ГОР"): continue if (wf.class0_.is_proper_name and wf.is_in_dictionary): if (wf.normal_case is None or not wf.normal_case.startswith("ЛЮБ")): if (mc0.is_adjective and t.morph.contains_attr("неизм.", None)): pass elif ( (((attrs) & (NounPhraseParseAttr.REFERENTCANBENOUN)) ) == (NounPhraseParseAttr.REFERENTCANBENOUN)): pass else: if (items is None or (len(items) < 1)): return None if (not items[0].is_std_adjective): return None if (mc0.is_adjective and t.morph.items_count == 1): if (t.morph.get_indexer_item(0).contains_attr( "в.ср.ст.", None)): return None mc1 = t.get_morph_class_in_dictionary() if (mc1 == MorphClass.VERB and t.morph.case_.is_undefined): return None if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.IGNOREPARTICIPLES) and t.morph.class0_.is_verb and not t.morph.class0_.is_noun) and not t.morph.class0_.is_proper): for wf in t.morph.items: if (wf.class0_.is_verb): if (wf.contains_attr("дейст.з.", None)): if (LanguageHelper.ends_with(t.term, "СЯ")): pass else: return None t1 = None for k in range(2): t = (Utils.ifNotNull(t1, t0)) if (k == 0): if (((isinstance(t0, TextToken)) and t0.next0_ is not None and t0.next0_.is_hiphen) and t0.next0_.next0_ is not None): if (not t0.is_whitespace_after and not t0.morph.class0_.is_pronoun and not (isinstance(t0.next0_.next0_, NumberToken))): if (not t0.next0_.is_whitespace_after): t = t0.next0_.next0_ elif (t0.next0_.next0_.chars.is_all_lower and LanguageHelper.ends_with(t0.term, "О")): t = t0.next0_.next0_ it = NounPhraseItem._new404(t0, t, _can_be_surname) if (t0 == t and (isinstance(t0, ReferentToken))): it.can_be_noun = True it.morph = MorphCollection(t0.morph) can_be_prepos = False for v in t.morph.items: wf = Utils.asObjectOrNull(v, MorphWordForm) if (v.class0_.is_verb and not v.case_.is_undefined): it.can_be_adj = True it.adj_morph.append(NounPhraseItemTextVar(v, t)) continue if (v.class0_.is_preposition): can_be_prepos = True if (v.class0_.is_adjective or ((v.class0_.is_pronoun and not v.class0_.is_personal_pronoun and not v.contains_attr("неизм.", None))) or ((v.class0_.is_noun and (isinstance(t, NumberToken))))): if (NounPhraseItem.try_accord_variant( items, (0 if items is None else len(items)), v, False)): is_doub = False if (v.contains_attr("к.ф.", None)): continue if (v.contains_attr("собир.", None) and not (isinstance(t, NumberToken))): if (wf is not None and wf.is_in_dictionary): return None continue if (v.contains_attr("сравн.", None)): continue ok = True if (isinstance(t, TextToken)): s = t.term if (s == "ПРАВО" or s == "ПРАВА"): ok = False elif (LanguageHelper.ends_with(s, "ОВ") and t.get_morph_class_in_dictionary().is_noun): ok = False elif (isinstance(t, NumberToken)): if (v.class0_.is_noun and t.morph.class0_.is_adjective): ok = False elif (t.morph.class0_.is_noun and (( (attrs) & (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))) == (NounPhraseParseAttr.NO)): ok = False if (ok): it.adj_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_adj = True if (_is_doubt_adj and t0 == t): it.is_doubt_adjective = True if (has_legal_verb and wf is not None and wf.is_in_dictionary): it.can_be_noun = True if (wf is not None and wf.class0_.is_pronoun): it.can_be_noun = True it.noun_morph.append( NounPhraseItemTextVar(v, t)) can_be_noun_ = False if (isinstance(t, NumberToken)): pass elif (v.class0_.is_noun or ((wf is not None and wf.normal_case == "САМ"))): can_be_noun_ = True elif (v.class0_.is_personal_pronoun): if (items is None or len(items) == 0): can_be_noun_ = True else: for it1 in items: if (it1.is_verb): if (len(items) == 1 and not v.case_.is_nominative): can_be_noun_ = True else: return None if (len(items) == 1): if (items[0].can_be_adj_for_personal_pronoun): can_be_noun_ = True elif ( (v.class0_.is_pronoun and ((items is None or len(items) == 0 or ((len(items) == 1 and items[0].can_be_adj_for_personal_pronoun)))) and wf is not None) and (((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО" or wf.normal_case == "ТО") or wf.normal_case == "ЭТО" or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО" or wf.normal_case == "КТО") or wf.normal_full == "КОТОРЫЙ" or wf.normal_case == "КОТОРЫЙ"))): if (wf.normal_case == "ВСЕ"): if (t.next0_ is not None and t.next0_.is_value("РАВНО", None)): return None can_be_noun_ = True elif (wf is not None and ((Utils.ifNotNull( wf.normal_full, wf.normal_case))) == "КОТОРЫЙ" and (((attrs) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO)): return None elif (v.class0_.is_proper and (isinstance(t, TextToken))): if (t.length_char > 4 or v.class0_.is_proper_name): can_be_noun_ = True if (can_be_noun_): added = False if (items is not None and len(items) > 1 and (((attrs) & (NounPhraseParseAttr.MULTINOUNS))) != (NounPhraseParseAttr.NO)): ok1 = True ii = 1 while ii < len(items): if (not items[ii].conj_before): ok1 = False break ii += 1 if (ok1): if (NounPhraseItem.try_accord_variant( items, (0 if items is None else len(items)), v, True)): it.noun_morph.append( NounPhraseItemTextVar(v, t)) it.can_be_noun = True it.multi_nouns = True added = True if (not added): if (NounPhraseItem.try_accord_variant( items, (0 if items is None else len(items)), v, False)): it.noun_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_noun = True if (v.class0_.is_personal_pronoun and t.morph.contains_attr("неизм.", None) and not it.can_be_adj): itt = NounPhraseItemTextVar(v, t) itt.case_ = MorphCase.ALL_CASES itt.number = MorphNumber.UNDEFINED if (itt.normal_value is None): pass it.adj_morph.append(itt) it.can_be_adj = True elif ((len(items) > 0 and len(items[0].adj_morph) > 0 and items[0].adj_morph[0].number == MorphNumber.PLURAL) and not ((items[0].adj_morph[0].case_) & v.case_).is_undefined and not items[0].adj_morph[0].class0_.is_verb): if (t.next0_ is not None and t.next0_.is_comma_and and (isinstance(t.next0_.next0_, TextToken))): npt2 = NounPhraseHelper.try_parse( t.next0_.next0_, attrs, 0, None) if (npt2 is not None and npt2.preposition is None and not ((npt2.morph.case_) & v.case_ & items[0].adj_morph[0].case_ ).is_undefined): it.noun_morph.append( NounPhraseItemTextVar(v, t)) it.can_be_noun = True if (t0 != t): for v in it.adj_morph: v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), False) for v in it.noun_morph: v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), True) if (k == 1 and it.can_be_noun and not it.can_be_adj): if (t1 is not None): it.end_token = t1 else: it.end_token = t0.next0_.next0_ for v in it.noun_morph: if (v.normal_value is not None and (v.normal_value.find('-') < 0)): v.normal_value = "{0}-{1}".format( v.normal_value, it.end_token.get_normal_case_text( None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)) if (it.can_be_adj): if (NounPhraseItem.__m_std_adjectives.try_parse( it.begin_token, TerminParseAttr.NO) is not None): it.is_std_adjective = True if (can_be_prepos and it.can_be_noun): if (items is not None and len(items) > 0): npt1 = NounPhraseHelper.try_parse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0, None) if (npt1 is not None and npt1.end_char > t.end_char): return None else: npt1 = NounPhraseHelper.try_parse( t.next0_, Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0, None) if (npt1 is not None): mc = LanguageHelper.get_case_after_preposition(t.lemma) if (not ((mc) & npt1.morph.case_).is_undefined): return None if (it.can_be_noun or it.can_be_adj or k == 1): if (it.begin_token.morph.class0_.is_pronoun): tt2 = it.end_token.next0_ if ((tt2 is not None and tt2.is_hiphen and not tt2.is_whitespace_after) and not tt2.is_whitespace_before): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): ss = tt2.term if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ") or ss == "Ж"): it.end_token = tt2 elif (ss == "НИБУДЬ" or ss == "ЛИБО" or (((ss == "ТО" and tt2.previous.is_hiphen)) and it.can_be_adj)): it.end_token = tt2 for m in it.adj_morph: m.normal_value = "{0}-{1}".format( m.normal_value, ss) if (m.single_number_value is not None): m.single_number_value = "{0}-{1}".format( m.single_number_value, ss) return it if (t0 == t): if (t0.is_value("БИЗНЕС", None) and t0.next0_ is not None and t0.next0_.chars == t0.chars): t1 = t0.next0_ continue return it return None
def __try_noun_name(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', always: bool) -> 'ReferentToken': oi.value = (None) if (li is None or (len(li) < 2) or ((li[0].typ != CityItemToken.ItemType.NOUN and li[0].typ != CityItemToken.ItemType.MISC))): return None ok = not li[0].doubtful if (ok and li[0].typ == CityItemToken.ItemType.MISC): ok = False typ = (None if li[0].typ == CityItemToken.ItemType.MISC else li[0].value) typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else li[0].alt_value) prob_adj = None i1 = 1 org0_ = None if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and (((LanguageHelper.ends_with(typ, "ПОСЕЛОК") or LanguageHelper.ends_with(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ") or typ == "СЕЛО"))): if (li[i1].begin_token == li[i1].end_token): ooo = AddressItemToken.try_attach_org(li[i1].begin_token) if (ooo is not None and ooo.ref_token is not None): return None typ2 = li[i1].value if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.is_value("СТ", None) and ((i1 + 1) < len(li))): m = li[i1 + 1].morph if (m.number == MorphNumber.PLURAL): prob_adj = "СТАРЫЕ" elif (m.gender == MorphGender.FEMINIE): prob_adj = "СТАРАЯ" elif (m.gender == MorphGender.MASCULINE): prob_adj = "СТАРЫЙ" else: prob_adj = "СТАРОЕ" i1 += 1 name = Utils.ifNotNull(li[i1].value, ((None if li[i1].onto_item is None else li[i1].onto_item.canonic_text))) alt_name = li[i1].alt_value if (name is None): return None mc = li[0].morph if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО" or li[0].typ == CityItemToken.ItemType.MISC))): if (typ is None and ((i1 + 1) < len(li)) and li[i1 + 1].typ == CityItemToken.ItemType.NOUN): return None oi.value = li[i1].onto_item if (oi.value is not None): name = oi.value.canonic_text if (len(name) > 2 or oi.value.misc_attr is not None): if (not li[1].doubtful or ((oi.value is not None and oi.value.misc_attr is not None))): ok = True elif (not ok and not li[1].is_newline_before): if (li[0].geo_object_before or li[1].geo_object_after): ok = True elif (StreetDefineHelper.check_street_after( li[1].end_token.next0_)): ok = True elif (li[1].end_token.next0_ is not None and (isinstance(li[1].end_token.next0_.get_referent(), DateReferent))): ok = True elif ((li[1].whitespaces_before_count < 2) and li[1].onto_item is not None): if (li[1].is_newline_after): ok = True else: ok = True if (li[1].doubtful and li[1].end_token.next0_ is not None and li[1].end_token.chars == li[1].end_token.next0_.chars): ok = False if (li[0].begin_token.previous is not None and li[0].begin_token.previous.is_value("В", None)): ok = True if (not ok): ok = CityAttachHelper.check_year_after(li[1].end_token.next0_) if (not ok): ok = CityAttachHelper.check_city_after(li[1].end_token.next0_) elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME or li[i1].typ == CityItemToken.ItemType.CITY)): if (((li[0].value == "АДМИНИСТРАЦИЯ" or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1): return None if (li[i1].is_newline_before): if (len(li) != 2): return None if (not li[0].doubtful): ok = True if (len(name) < 2): ok = False elif ((len(name) < 3) and li[0].morph.number != MorphNumber.SINGULAR): ok = False if (li[i1].doubtful and not li[i1].geo_object_after and not li[0].geo_object_before): if (li[i1].morph.case_.is_genitive): if (li[i1].end_token.next0_ is None or MiscLocationHelper.check_geo_object_after( li[i1].end_token.next0_, False) or AddressItemToken.check_house_after( li[i1].end_token.next0_, False, True)): pass elif (li[0].begin_token.previous is None or MiscLocationHelper.check_geo_object_before( li[0].begin_token)): pass else: ok = False if (ok): rt0 = li[i1].kit.process_referent( "PERSONPROPERTY", li[0].begin_token.previous) if (rt0 is not None): rt1 = li[i1].kit.process_referent( "PERSON", li[i1].begin_token) if (rt1 is not None): ok = False npt = NounPhraseHelper.try_parse(li[i1].begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None): if (npt.end_token.end_char > li[i1].end_char and len(npt.adjectives) > 0 and not npt.adjectives[0].end_token.next0_.is_comma): ok = False elif (TerrItemToken._m_unknown_regions.try_parse( npt.end_token, TerminParseAttr.FULLWORDSONLY) is not None): ok1 = False if (li[0].begin_token.previous is not None): ttt = li[0].begin_token.previous if (ttt.is_comma and ttt.previous is not None): ttt = ttt.previous geo_ = Utils.asObjectOrNull( ttt.get_referent(), GeoReferent) if (geo_ is not None and not geo_.is_city): ok1 = True if (npt.end_token.next0_ is not None): ttt = npt.end_token.next0_ if (ttt.is_comma and ttt.next0_ is not None): ttt = ttt.next0_ geo_ = Utils.asObjectOrNull( ttt.get_referent(), GeoReferent) if (geo_ is not None and not geo_.is_city): ok1 = True if (not ok1): return None if (li[0].value == "ПОРТ"): if (li[i1].chars.is_all_upper or li[i1].chars.is_latin_letter): return None elif (li[0].geo_object_before): ok = True elif (li[i1].geo_object_after and not li[i1].is_newline_after): ok = True else: ok = CityAttachHelper.check_year_after(li[i1].end_token.next0_) if (not ok): ok = CityAttachHelper.check_street_after( li[i1].end_token.next0_) if (not ok and li[0].begin_token.previous is not None and li[0].begin_token.previous.is_value("В", None)): ok = True else: return None if (not ok and not always): if (MiscLocationHelper.check_near_before( li[0].begin_token.previous) is None): return None if (len(li) > (i1 + 1)): del li[i1 + 1:i1 + 1 + len(li) - i1 - 1] city = GeoReferent() if (oi.value is not None and oi.value.referent is not None): city = (Utils.asObjectOrNull(oi.value.referent.clone(), GeoReferent)) city.occurrence.clear() if (not li[0].morph.case_.is_undefined and li[0].morph.gender != MorphGender.UNDEFINED): if (li[i1].end_token.morph.class0_.is_adjective and li[i1].begin_token == li[i1].end_token): nam = ProperNameHelper.get_name_ex( li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE, li[0].morph.case_, li[0].morph.gender, False, False) if (nam is not None and nam != name): name = nam if (li[0].morph.case_.is_nominative): if (alt_name is not None): city._add_name(alt_name) alt_name = (None) city._add_name(name) if (prob_adj is not None): city._add_name(prob_adj + " " + name) if (alt_name is not None): city._add_name(alt_name) if (prob_adj is not None): city._add_name(prob_adj + " " + alt_name) if (typ is not None): city._add_typ(typ) elif (not city.is_city): city._add_typ_city(li[0].kit.base_language) if (typ2 is not None): city._add_typ(typ2.lower()) if (li[0].higher_geo is not None and GeoOwnerHelper.can_be_higher(li[0].higher_geo, city)): city.higher = li[0].higher_geo if (li[0].typ == CityItemToken.ItemType.MISC): del li[0] res = ReferentToken._new734(city, li[0].begin_token, li[len(li) - 1].end_token, mc) if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): num = Utils.asObjectOrNull(res.end_token.next0_.next0_, NumberToken) if ((num.typ == NumberSpellingType.DIGIT and not num.morph.class0_.is_adjective and num.int_value is not None) and (num.int_value < 50)): for s in city.slots: if (s.type_name == GeoReferent.ATTR_NAME): city.upload_slot(s, "{0}-{1}".format(s.value, num.value)) res.end_token = num if (li[0].begin_token == li[0].end_token and li[0].begin_token.is_value("ГОРОДОК", None)): if (AddressItemToken.check_house_after(res.end_token.next0_, True, False)): return None return res
def __try1(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', ad: 'AnalyzerDataWithOntology') -> 'ReferentToken': oi.value = (None) if (li is None or (len(li) < 1)): return None elif (li[0].typ != CityItemToken.ItemType.CITY): if (len(li) != 2 or li[0].typ != CityItemToken.ItemType.PROPERNAME or li[1].typ != CityItemToken.ItemType.NOUN): return None i = 1 oi.value = li[0].onto_item ok = not li[0].doubtful if ((ok and li[0].onto_item is not None and li[0].onto_item.misc_attr is None) and ad is not None): if (li[0].onto_item.owner != ad.local_ontology and not li[0].onto_item.owner.is_ext_ontology): if (li[0].begin_token.previous is not None and li[0].begin_token.previous.is_value("В", None)): pass else: ok = False if (len(li) == 1 and li[0].begin_token.morph.class0_.is_adjective): sits = StreetItemToken.try_parse_list(li[0].begin_token, None, 3) if (sits is not None and len(sits) == 2 and sits[1].typ == StreetItemType.NOUN): return None typ = None alttyp = None mc = li[0].morph if (i < len(li)): if (li[i].typ == CityItemToken.ItemType.NOUN): at = None if (not li[i].chars.is_all_lower and (li[i].whitespaces_after_count < 2)): sit = StreetItemToken.try_parse(li[i].end_token.next0_, None, False, None, False) if (sit is not None and sit.typ == StreetItemType.NOUN): at = AddressItemToken.try_parse( li[i].begin_token, None, False, False, None) if (at is not None): at2 = AddressItemToken.try_parse( li[i].end_token.next0_, None, False, False, None) if (at2 is not None and at2.typ == AddressItemToken.ItemType.STREET): at = (None) if (at is None): typ = li[i].value alttyp = li[i].alt_value if (li[i].begin_token.is_value("СТ", None) and li[i].begin_token.chars.is_all_upper): return None if ((i + 1) == len(li)): ok = True if (not li[i].morph.case_.is_undefined): mc = li[i].morph i += 1 elif (ok): i += 1 else: tt0 = li[0].begin_token.previous if ((isinstance(tt0, TextToken)) and (tt0.whitespaces_after_count < 3)): if (tt0.is_value("МЭР", "МЕР") or tt0.is_value("ГЛАВА", None) or tt0.is_value("ГРАДОНАЧАЛЬНИК", None)): ok = True i += 1 if (not ok and oi.value is not None and (len(oi.value.canonic_text) < 4)): return None if (not ok and li[0].begin_token.morph.class0_.is_proper_name): return None if (not ok): if (not MiscHelper.is_exists_in_dictionary( li[0].begin_token, li[0].end_token, (MorphClass.ADJECTIVE) | MorphClass.NOUN | MorphClass.PRONOUN)): ok = (li[0].geo_object_before or li[i - 1].geo_object_after) if (ok and li[0].begin_token == li[0].end_token): mcc = li[0].begin_token.get_morph_class_in_dictionary() if (mcc.is_proper_name or mcc.is_proper_surname): ok = False elif (li[0].geo_object_before and (li[0].whitespaces_after_count < 2)): ad1 = AddressItemToken.try_parse( li[0].begin_token, None, False, False, None) if (ad1 is not None and ad1.typ == AddressItemToken.ItemType.STREET): ad2 = AddressItemToken.try_parse( li[0].end_token.next0_, None, False, False, None) if (ad2 is None or ad2.typ != AddressItemToken.ItemType.STREET): ok = False elif (AddressItemToken.try_attach_org( li[0].begin_token) is not None): ok = False if (ok): if (li[0].kit.process_referent("PERSON", li[0].begin_token) is not None): ok = False if (not ok): ok = CityAttachHelper.check_year_after(li[0].end_token.next0_) if (not ok and ((not li[0].begin_token.morph.class0_.is_adjective or li[0].begin_token != li[0].end_token))): ok = CityAttachHelper.check_city_after(li[0].end_token.next0_) if (not ok): return None if (i < len(li)): del li[i:i + len(li) - i] rt = None if (oi.value is None): if (li[0].value is not None and li[0].higher_geo is not None): cap = GeoReferent() cap._add_name(li[0].value) cap._add_typ_city(li[0].kit.base_language) cap.higher = li[0].higher_geo if (typ is not None): cap._add_typ(typ) if (alttyp is not None): cap._add_typ(alttyp) rt = ReferentToken(cap, li[0].begin_token, li[0].end_token) else: if (li[0].value is None): return None if (typ is None): if ((len(li) == 1 and li[0].begin_token.previous is not None and li[0].begin_token.previous.is_hiphen) and (isinstance(li[0].begin_token.previous.previous, ReferentToken)) and (isinstance( li[0].begin_token.previous.previous.get_referent(), GeoReferent))): pass else: return None else: if (not LanguageHelper.ends_with_ex( typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК")): if (not LanguageHelper.ends_with(typ, "CITY")): if (typ == "СТАНЦИЯ" and ((MiscLocationHelper.check_geo_object_before( li[0].begin_token)))): pass elif (len(li) > 1 and li[1].typ == CityItemToken.ItemType.NOUN and li[0].typ == CityItemToken.ItemType.CITY): pass elif ((len(li) == 2 and li[1].typ == CityItemToken.ItemType.NOUN and li[0].typ == CityItemToken.ItemType.PROPERNAME) and ((li[0].geo_object_before or li[1].geo_object_after))): pass else: return None if (li[0].begin_token.morph.class0_.is_adjective): li[0].value = ProperNameHelper.get_name_ex( li[0].begin_token, li[0].end_token, MorphClass.ADJECTIVE, li[1].morph.case_, li[1].morph.gender, False, False) elif (isinstance(oi.value.referent, GeoReferent)): city = Utils.asObjectOrNull(oi.value.referent.clone(), GeoReferent) city.occurrence.clear() rt = ReferentToken._new734(city, li[0].begin_token, li[len(li) - 1].end_token, mc) elif (typ is None): typ = oi.value.typ if (rt is None): city = GeoReferent() city._add_name( (li[0].value if oi.value is None else oi.value.canonic_text)) if (typ is not None): city._add_typ(typ) else: city._add_typ_city(li[0].kit.base_language) if (alttyp is not None): city._add_typ(alttyp) rt = ReferentToken._new734(city, li[0].begin_token, li[len(li) - 1].end_token, mc) if ((isinstance(rt.referent, GeoReferent)) and len(li) == 1 and rt.referent.is_city): if (rt.begin_token.previous is not None and rt.begin_token.previous.is_value("Г", None)): rt.begin_token = rt.begin_token.previous elif ((rt.begin_token.previous is not None and rt.begin_token.previous.is_char('.') and rt.begin_token.previous.previous is not None) and rt.begin_token.previous.previous.is_value("Г", None)): rt.begin_token = rt.begin_token.previous.previous elif (rt.end_token.next0_ is not None and (rt.whitespaces_after_count < 2) and rt.end_token.next0_.is_value("Г", None)): rt.end_token = rt.end_token.next0_ if (rt.end_token.next0_ is not None and rt.end_token.next0_.is_char('.')): rt.end_token = rt.end_token.next0_ return rt