def trunc_oborot(self, is_participle : bool) -> bool: if (self.best_var is None or len(self.best_var.segs) == 0): if (len(self.items) > 1): del self.items[1:1+len(self.items) - 1] return True return False ret = False ind = 0 if (self.best_var.segs[0] is None and not is_participle): ind = 1 while ind < len(self.items): if (self.items[ind].can_be_comma_end): break ind += 1 else: for seg in self.best_var.segs: if (seg is None): break for li in seg.links: if (li is None): continue ret = True ii = Utils.indexOfList(self.items, li.from0_.source, 0) if (ii < 0): continue if (li.to_verb is not None): if (li.to_verb == seg.source.before_verb): ind = (ii + 1) elif (not is_participle and seg == self.best_var.segs[0] and li.to_verb == seg.source.after_verb): ii = ind while ii < len(self.items): if (self.items[ii].source == li.to_verb): ind = (ii + 1) break ii += 1 else: break else: jj = Utils.indexOfList(self.items, li.to.source, 0) if (jj < 0): continue if (jj < ii): ind = (ii + 1) else: break if (not is_participle and seg == self.best_var.segs[0]): pass else: break if (not ret and ind == 0): ind = 1 while ind < len(self.items): if (self.items[ind].can_be_comma_end): break ind += 1 if (ind > 0 and (ind < (len(self.items) - 1))): del self.items[ind:ind+len(self.items) - ind] return ret
def _checkAbbr(self, abbr : str) -> bool: if (len(abbr) != 2): return False nameq = False typeq = False nameq2 = False typeq2 = False for s in self.slots: if (s.type_name == GeoReferent.ATTR_NAME): val = Utils.asObjectOrNull(s.value, str) ch = val[0] if (ch == abbr[0]): nameq = True ii = val.find(' ') if (ii > 0): if (abbr[1] == val[ii + 1]): if (Utils.indexOfList(val, ' ', ii + 1) < 0): return True if (ch == abbr[1]): nameq2 = True elif (s.type_name == GeoReferent.ATTR_TYPE): ty = s.value if (ty == "государство" or ty == "держава" or ty == "country"): continue ch = str.upper(ty[0]) if (ch == abbr[1]): typeq = True if (ch == abbr[0]): typeq2 = True if (typeq and nameq): return True if (typeq2 and nameq2): return True return False
def addReferent(self, referent : 'Referent') -> bool: """ Добавить в онтологию сущность Args: referent(Referent): """ if (referent is None): return False oi = None if (referent._int_ontology_item is not None and referent._int_ontology_item.owner == self): oi1 = referent.createOntologyItem() if (oi1 is None or len(oi1.termins) == len(referent._int_ontology_item.termins)): return True for t in referent._int_ontology_item.termins: self.__m_termins.remove(t) i = Utils.indexOfList(self.__m_items, referent._int_ontology_item, 0) if (i >= 0): del self.__m_items[i] oi = oi1 else: oi = referent.createOntologyItem() if (oi is None): return False oi.referent = referent referent._int_ontology_item = oi self.addItem(oi) return True
def remove(self, t : 'Termin') -> None: for v in t._get_hash_variants(): self.__remove_from_tree(v, t) for li in self.__m_hash1.values(): for tt in li: if (tt == t): li.remove(tt) break i = Utils.indexOfList(self.termins, t, 0) if (i >= 0): del self.termins[i]
def extract_main_sequence(lines : typing.List['InstrToken1'], check_spec_texts : bool, can_sub_numbers : bool) -> typing.List['InstrToken1']: res = None many_spec_char_lines = 0 i = 0 first_pass3279 = True while True: if first_pass3279: first_pass3279 = False else: i += 1 if (not (i < len(lines))): break li = lines[i] if (li.all_upper and li.title_typ != InstrToken1.StdTitleType.UNDEFINED): if (res is not None and len(res) > 0 and res[len(res) - 1].tag is None): res[len(res) - 1].tag = (li) if (len(li.numbers) == 0): continue if (li.last_number == 901): pass if (li.num_typ == NumberTypes.LETTER): pass if (li.typ != InstrToken1.Types.LINE): continue if (res is None): res = list() if (len(li.numbers) == 1 and li.numbers[0] == "1" and li.num_typ == NumberTypes.DIGIT): if ((((i + 1) < len(lines)) and len(lines[i + 1].numbers) == 1 and lines[i + 1].numbers[0] == "1") and lines[i + 1].num_typ == NumberTypes.DIGIT): ii = i + 2 while ii < len(lines): if (lines[ii].num_typ == NumberTypes.ROMAN and len(lines[ii].numbers) > 0): if (lines[ii].numbers[0] == "2"): li.num_typ = NumberTypes.ROMAN break ii += 1 else: if (res[0].num_suffix is not None): if (li.num_suffix is not None and li.num_suffix != res[0].num_suffix): continue if (len(res[0].numbers) != len(li.numbers)): if (li.begin_token.previous is not None and li.begin_token.previous.is_char(':')): continue if (res[0].num_suffix is None or NumberingHelper.calc_delta(res[len(res) - 1], li, True) != 1): continue if (not can_sub_numbers): if (((i + 1) < len(lines)) and NumberingHelper.calc_delta(res[len(res) - 1], lines[i + 1], False) == 1 and NumberingHelper.calc_delta(li, lines[i + 1], True) == 1): pass else: continue else: if (res[0].num_typ == NumberTypes.ROMAN and li.num_typ != NumberTypes.ROMAN): continue if (res[0].num_typ != NumberTypes.ROMAN and li.num_typ == NumberTypes.ROMAN): if (len(li.numbers) == 1 and li.numbers[0] == "1" and len(res) == 1): res.clear() res.append(li) continue continue if (res[0].num_typ != NumberTypes.LETTER and li.num_typ == NumberTypes.LETTER): continue res.append(li) if (li.has_many_spec_chars): many_spec_char_lines += 1 if (res is None): return None if (check_spec_texts): if (many_spec_char_lines > (math.floor(len(res) / 2))): return None i = 0 while i < (len(res) - 1): if (NumberingHelper.calc_delta(res[i], res[i + 1], False) == 2): ii0 = Utils.indexOfList(lines, res[i], 0) ii1 = Utils.indexOfList(lines, res[i + 1], ii0) j = ii0 + 1 while j < ii1: if (len(lines[j].numbers) > 0): if (NumberingHelper.calc_delta(res[i], lines[j], True) == 1 and NumberingHelper.calc_delta(lines[j], res[i + 1], True) == 1): res.insert(i + 1, lines[j]) break j += 1 i += 1 ch = True while ch: ch = False i = 1 first_pass3280 = True while True: if first_pass3280: first_pass3280 = False else: i += 1 if (not (i < len(res))): break d = NumberingHelper.calc_delta(res[i - 1], res[i], False) if (res[i - 1].num_suffix == res[i].num_suffix): if (d == 1): continue if (((d > 1 and (d < 20))) or ((d == 0 and res[i - 1].num_typ == res[i].num_typ and len(res[i - 1].numbers) == len(res[i].numbers)))): if (NumberingHelper.calc_delta(res[i], res[i - 1], False) > 0): if (res[i - 1].tag is not None and i > 2): del res[i:i+len(res) - i] ch = True i -= 1 continue if ((i + 1) < len(res)): dd = NumberingHelper.calc_delta(res[i], res[i + 1], False) if (dd == 1): if (res[i].last_number == 1 and len(res[i].numbers) == len(res[i - 1].numbers)): pass else: continue else: dd = NumberingHelper.calc_delta(res[i - 1], res[i + 1], False) if (dd == 1): del res[i] i -= 1 ch = True continue elif (d > 3): del res[i] i -= 1 ch = True continue else: continue j = 0 j = (i + 1) while j < len(res): dd = NumberingHelper.calc_delta(res[j - 1], res[j], False) if (dd != 1 and dd != 2): break if (res[j - 1].num_suffix != res[j].num_suffix): break j += 1 if ((d == 0 and NumberingHelper.calc_delta(res[i - 1], res[i], True) == 1 and res[i - 1].num_suffix is not None) and res[i].num_suffix == res[i - 1].num_suffix): d = 1 if (d != 1 and j > (i + 1)): del res[i:i+j - i] i -= 1 ch = True continue if (d == 1): if ((i + 1) >= len(res)): continue dd = NumberingHelper.calc_delta(res[i], res[i + 1], False) if (dd == 1 and res[i - 1].num_suffix == res[i + 1].num_suffix): if (res[i].num_suffix != res[i - 1].num_suffix): res[i].num_suffix = res[i - 1].num_suffix res[i].is_num_doubt = False ch = True continue if ((i + 1) < len(res)): dd = NumberingHelper.calc_delta(res[i - 1], res[i + 1], False) if (dd == 1 and res[i - 1].num_suffix == res[i + 1].num_suffix): if (d == 1 and NumberingHelper.calc_delta(res[i], res[i + 1], True) == 1): pass else: del res[i] ch = True continue elif (d == 0 or d > 10 or res[i - 1].num_suffix != res[i].num_suffix): del res[i] ch = True continue has_suf = 0 for r in res: if ((r.num_suffix is not None or r.typ_container_rank > 0 or len(r.numbers) > 1) or r.all_upper or r.num_typ == NumberTypes.ROMAN): has_suf += 1 if (has_suf == 0): if (len(res) < 5): return None if (len(res) >= 2): if (res[0] != lines[0]): tot = res[0].begin_token.begin_char - lines[0].begin_token.begin_char tot += (lines[len(lines) - 1].end_token.end_char - res[len(res) - 1].end_token.end_char) blk = res[len(res) - 1].end_token.end_char - res[0].begin_token.begin_char i = Utils.indexOfList(lines, res[len(res) - 1], 0) if (i > 0): lines1 = list(lines) del lines1[0:0+i + 1] res1 = NumberingHelper.extract_main_sequence(lines1, check_spec_texts, can_sub_numbers) if (res1 is not None and len(res1) > 2): blk += (res1[len(res1) - 1].end_char - res1[0].begin_char) if ((blk * 3) < tot): if ((blk * 5) < tot): return None for r in res: if (not r.all_upper and not r.has_changes): return None if (res[0].last_number == 1 and len(res[0].numbers) == 1): res0 = list() res0.append(res[0]) i = 0 i = 1 while i < len(res): j = 0 j = (i + 1) while j < len(res): if (res[j].last_number == 1 and len(res[j].numbers) == 1): break j += 1 if ((j - i) < 3): break j -= 1 jj = 0 errs = 0 jj = (i + 1) while jj < j: d = NumberingHelper.calc_delta(res[jj - 1], res[jj], False) if (d == 1): pass elif (d > 1 and (d < 3)): errs += 1 else: break jj += 1 if ((jj < j) or errs > 1): break if (j < (len(res) - 1)): if (NumberingHelper.calc_delta(res0[len(res0) - 1], res[j], False) != 1): break res0.append(res[j]) i = j i += 1 if (i >= len(res) and len(res0) > 1): return res0 if (len(res) > 500): return None return res if (len(res) == 1 and lines[0] == res[0]): if (has_suf > 0): return res if (len(lines) > 1 and len(lines[1].numbers) == (len(lines[0].numbers) + 1)): i = 0 while i < len(lines[0].numbers): if (lines[1].numbers[i] != lines[0].numbers[i]): return None i += 1 return res return None
def try_attach_territory( li: typing.List['TerrItemToken'], ad: 'AnalyzerData', attach_always: bool = False, cits: typing.List['CityItemToken'] = None, exists: typing.List['GeoReferent'] = None) -> 'ReferentToken': if (li is None or len(li) == 0): return None ex_obj = None new_name = None adj_list = list() noun = None add_noun = None rt = TerrAttachHelper.__try_attach_moscowao(li, ad) if (rt is not None): return rt if (li[0].termin_item is not None and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"): res2 = TerrAttachHelper.__try_attach_pure_terr(li, ad) return res2 if (len(li) == 2): if (li[0].rzd is not None and li[1].rzd_dir is not None): rzd = GeoReferent() rzd._add_name(li[1].rzd_dir) rzd._add_typ_ter(li[0].kit.base_language) rzd.add_slot(GeoReferent.ATTR_REF, li[0].rzd.referent, False, 0) rzd.add_ext_referent(li[0].rzd) return ReferentToken(rzd, li[0].begin_token, li[1].end_token) if (li[1].rzd is not None and li[0].rzd_dir is not None): rzd = GeoReferent() rzd._add_name(li[0].rzd_dir) rzd._add_typ_ter(li[0].kit.base_language) rzd.add_slot(GeoReferent.ATTR_REF, li[1].rzd.referent, False, 0) rzd.add_ext_referent(li[1].rzd) return ReferentToken(rzd, li[0].begin_token, li[1].end_token) can_be_city_before = False adj_terr_before = False if (cits is not None): if (cits[0].typ == CityItemToken.ItemType.CITY): can_be_city_before = True elif (cits[0].typ == CityItemToken.ItemType.NOUN and len(cits) > 1): can_be_city_before = True k = 0 k = 0 while k < len(li): if (li[k].onto_item is not None): if (ex_obj is not None or new_name is not None): break if (noun is not None): if (k == 1): if (noun.termin_item.canonic_text == "РАЙОН" or noun.termin_item.canonic_text == "ОБЛАСТЬ" or noun.termin_item.canonic_text == "СОЮЗ"): if (isinstance(li[k].onto_item.referent, GeoReferent)): if (li[k].onto_item.referent.is_state): break ok = False tt = li[k].end_token.next0_ if (tt is None): ok = True elif (tt.is_char_of(",.")): ok = True if (not ok): ok = MiscLocationHelper.check_geo_object_before( li[0].begin_token) if (not ok): adr = AddressItemToken.try_parse( tt, None, False, False, None) if (adr is not None): if (adr.typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): break if (li[k].onto_item is not None): if (noun.begin_token.is_value("МО", None) or noun.begin_token.is_value("ЛО", None)): return None ex_obj = li[k] elif (li[k].termin_item is not None): if (noun is not None): break if (li[k].termin_item.is_always_prefix and k > 0): break if (k > 0 and li[k].is_doubt): if (li[k].begin_token == li[k].end_token and li[k].begin_token.is_value("ЗАО", None)): break if (li[k].termin_item.is_adjective or li[k].is_geo_in_dictionary): adj_list.append(li[k]) else: if (ex_obj is not None): geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent, GeoReferent) if (geo_ is None): break if (ex_obj.is_adjective and ((li[k].termin_item.canonic_text == "СОЮЗ" or li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))): str0_ = str(ex_obj.onto_item) if (not li[k].termin_item.canonic_text in str0_): return None if (li[k].termin_item.canonic_text == "РАЙОН" or li[k].termin_item.canonic_text == "ОКРУГ" or li[k].termin_item.canonic_text == "КРАЙ"): tmp = io.StringIO() for s in geo_.slots: if (s.type_name == GeoReferent.ATTR_TYPE): print("{0};".format(s.value), end="", file=tmp, flush=True) if (not li[k].termin_item.canonic_text in Utils.toStringStringIO(tmp).upper()): if (k != 1 or new_name is not None): break new_name = li[0] new_name.is_adjective = True new_name.onto_item = (None) ex_obj = (None) noun = li[k] if (k == 0): tt = TerrItemToken.try_parse( li[k].begin_token.previous, None, True, False, None) if (tt is not None and tt.morph.class0_.is_adjective): adj_terr_before = True else: if (ex_obj is not None): break if (new_name is not None): break new_name = li[k] k += 1 name = None alt_name = None full_name = None morph_ = None if (ex_obj is not None): if (ex_obj.is_adjective and not ex_obj.morph.language.is_en and noun is None): if (attach_always and ex_obj.end_token.next0_ is not None): npt = NounPhraseHelper.try_parse(ex_obj.begin_token, NounPhraseParseAttr.NO, 0, None) if (ex_obj.end_token.next0_.is_comma_and): pass elif (npt is None): pass else: str0_ = StreetItemToken.try_parse( ex_obj.end_token.next0_, None, False, None, False) if (str0_ is not None): if (str0_.typ == StreetItemType.NOUN and str0_.end_token == npt.end_token): return None else: cit = CityItemToken.try_parse(ex_obj.end_token.next0_, None, False, None) if (cit is not None and ((cit.typ == CityItemToken.ItemType.NOUN or cit.typ == CityItemToken.ItemType.CITY))): npt = NounPhraseHelper.try_parse( ex_obj.begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.end_token == cit.end_token): pass else: return None elif (ex_obj.begin_token.is_value("ПОДНЕБЕСНЫЙ", None)): pass else: return None if (noun is None and ex_obj.can_be_city): cit0 = CityItemToken.try_parse_back( ex_obj.begin_token.previous) if (cit0 is not None and cit0.typ != CityItemToken.ItemType.PROPERNAME): return None if (ex_obj.is_doubt and noun is None): ok2 = False if (TerrAttachHelper.__can_be_geo_after( ex_obj.end_token.next0_)): ok2 = True elif (not ex_obj.can_be_surname and not ex_obj.can_be_city): if ((ex_obj.end_token.next0_ is not None and ex_obj.end_token.next0_.is_char(')') and ex_obj.begin_token.previous is not None) and ex_obj.begin_token.previous.is_char('(')): ok2 = True elif (ex_obj.chars.is_latin_letter and ex_obj.begin_token.previous is not None): if (ex_obj.begin_token.previous.is_value("IN", None)): ok2 = True elif (ex_obj.begin_token.previous.is_value( "THE", None) and ex_obj.begin_token.previous.previous is not None and ex_obj.begin_token.previous.previous.is_value( "IN", None)): ok2 = True if (not ok2): cit0 = CityItemToken.try_parse_back( ex_obj.begin_token.previous) if (cit0 is not None and cit0.typ != CityItemToken.ItemType.PROPERNAME): pass elif (MiscLocationHelper.check_geo_object_before( ex_obj.begin_token.previous)): pass else: return None name = ex_obj.onto_item.canonic_text morph_ = ex_obj.morph elif (new_name is not None): if (noun is None): return None j = 1 while j < k: if (li[j].is_newline_before and not li[0].is_newline_before): if (BracketHelper.can_be_start_of_sequence( li[j].begin_token, False, False)): pass else: return None j += 1 morph_ = noun.morph if (new_name.is_adjective): if (noun.termin_item.acronym == "АО"): if (noun.begin_token != noun.end_token): return None if (new_name.morph.gender != MorphGender.FEMINIE): return None geo_before = None tt0 = li[0].begin_token.previous if (tt0 is not None and tt0.is_comma_and): tt0 = tt0.previous if (not li[0].is_newline_before and tt0 is not None): geo_before = (Utils.asObjectOrNull(tt0.get_referent(), GeoReferent)) if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList( li, new_name, 0)): if (noun.termin_item.is_state): return None if (new_name.can_be_surname and geo_before is None): if (((noun.morph.case_) & new_name.morph.case_).is_undefined): return None if (MiscHelper.is_exists_in_dictionary( new_name.begin_token, new_name.end_token, (MorphClass.ADJECTIVE) | MorphClass.PRONOUN | MorphClass.VERB)): if (noun.begin_token != new_name.begin_token): if (geo_before is None): if (len(li) == 2 and TerrAttachHelper.__can_be_geo_after( li[1].end_token.next0_)): pass elif (len(li) == 3 and li[2].termin_item is not None and TerrAttachHelper.__can_be_geo_after( li[2].end_token.next0_)): pass elif (new_name.is_geo_in_dictionary): pass elif (new_name.end_token.is_newline_after): pass else: return None npt = NounPhraseHelper.try_parse( new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS, 0, None) if (npt is not None and npt.end_token != new_name.end_token): if (len(li) >= 3 and li[2].termin_item is not None and npt.end_token == li[2].end_token): add_noun = li[2] else: return None rtp = new_name.kit.process_referent( "PERSON", new_name.begin_token) if (rtp is not None): return None name = ProperNameHelper.get_name_ex( new_name.begin_token, new_name.end_token, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False) else: ok = False if (((k + 1) < len(li)) and li[k].termin_item is None and li[k + 1].termin_item is not None): ok = True elif ((k < len(li)) and li[k].onto_item is not None): ok = True elif (k == len(li) and not new_name.is_adj_in_dictionary): ok = True elif (MiscLocationHelper.check_geo_object_before( li[0].begin_token) or can_be_city_before): ok = True elif (MiscLocationHelper.check_geo_object_after( li[k - 1].end_token, False)): ok = True elif (len(li) == 3 and k == 2): cit = CityItemToken.try_parse(li[2].begin_token, None, False, None) if (cit is not None): if (cit.typ == CityItemToken.ItemType.CITY or cit.typ == CityItemToken.ItemType.NOUN): ok = True elif (len(li) == 2): ok = TerrAttachHelper.__can_be_geo_after( li[len(li) - 1].end_token.next0_) if (not ok and not li[0].is_newline_before and not li[0].chars.is_all_lower): rt00 = li[0].kit.process_referent( "PERSONPROPERTY", li[0].begin_token.previous) if (rt00 is not None): ok = True if (noun.termin_item is not None and noun.termin_item.is_strong and new_name.is_adjective): ok = True if (noun.is_doubt and len(adj_list) == 0 and geo_before is None): return None name = ProperNameHelper.get_name_ex( new_name.begin_token, new_name.end_token, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False) if (not ok and not attach_always): if (MiscHelper.is_exists_in_dictionary( new_name.begin_token, new_name.end_token, (MorphClass.ADJECTIVE) | MorphClass.PRONOUN | MorphClass.VERB)): if (exists is not None): for e0_ in exists: if (e0_.find_slot(GeoReferent.ATTR_NAME, name, True) is not None): ok = True break if (not ok): return None full_name = "{0} {1}".format( ProperNameHelper.get_name_ex(li[0].begin_token, noun.begin_token.previous, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False), noun.termin_item.canonic_text) else: if (not attach_always or ((noun.termin_item is not None and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))): is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList( li, new_name, 0)): if (not is_latin): return None if (not new_name.is_district_name and not BracketHelper.can_be_start_of_sequence( new_name.begin_token, False, False)): if (len(adj_list) == 0 and MiscHelper.is_exists_in_dictionary( new_name.begin_token, new_name.end_token, (MorphClass.NOUN) | MorphClass.PRONOUN)): if (len(li) == 2 and noun.is_city_region and (noun.whitespaces_after_count < 2)): pass else: return None if (not is_latin): if ((noun.termin_item.is_region and not attach_always and ((not adj_terr_before or new_name.is_doubt))) and not noun.is_city_region and not noun.termin_item.is_specific_prefix): if (not MiscLocationHelper. check_geo_object_before( noun.begin_token)): if (not noun.is_doubt and noun.begin_token != noun.end_token): pass elif ((noun.termin_item.is_always_prefix and len(li) == 2 and li[0] == noun) and li[1] == new_name): pass else: return None if (noun.is_doubt and len(adj_list) == 0): if (noun.termin_item.acronym == "МО" or noun.termin_item.acronym == "ЛО"): if (k == (len(li) - 1) and li[k].termin_item is not None): add_noun = li[k] k += 1 elif (len(li) == 2 and noun == li[0] and str(new_name).endswith("совет")): pass else: return None else: return None pers = new_name.kit.process_referent( "PERSON", new_name.begin_token) if (pers is not None): return None name = MiscHelper.get_text_value(new_name.begin_token, new_name.end_token, GetTextAttr.NO) if (new_name.begin_token != new_name.end_token): ttt = new_name.begin_token.next0_ while ttt is not None and ttt.end_char <= new_name.end_char: if (ttt.chars.is_letter): ty = TerrItemToken.try_parse( ttt, None, False, False, None) if ((ty is not None and ty.termin_item is not None and noun is not None) and ((noun.termin_item.canonic_text in ty.termin_item.canonic_text or ty.termin_item.canonic_text in noun.termin_item.canonic_text))): name = MiscHelper.get_text_value( new_name.begin_token, ttt.previous, GetTextAttr.NO) break ttt = ttt.next0_ if (len(adj_list) > 0): npt = NounPhraseHelper.try_parse(adj_list[0].begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.end_token == noun.end_token): alt_name = "{0} {1}".format( npt.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False), name) else: if ((len(li) == 1 and noun is not None and noun.end_token.next0_ is not None) and (isinstance( noun.end_token.next0_.get_referent(), GeoReferent))): g = Utils.asObjectOrNull(noun.end_token.next0_.get_referent(), GeoReferent) if (noun.termin_item is not None): tyy = noun.termin_item.canonic_text.lower() ooo = False if (g.find_slot(GeoReferent.ATTR_TYPE, tyy, True) is not None): ooo = True elif (tyy.endswith("район") and g.find_slot( GeoReferent.ATTR_TYPE, "район", True) is not None): ooo = True if (ooo): return ReferentToken._new734(g, noun.begin_token, noun.end_token.next0_, noun.begin_token.morph) if ((len(li) == 1 and noun == li[0] and li[0].termin_item is not None) and TerrItemToken.try_parse(li[0].end_token.next0_, None, True, False, None) is None and TerrItemToken.try_parse(li[0].begin_token.previous, None, True, False, None) is None): if (li[0].morph.number == MorphNumber.PLURAL): return None cou = 0 str0_ = li[0].termin_item.canonic_text.lower() tt = li[0].begin_token.previous first_pass3158 = True while True: if first_pass3158: first_pass3158 = False else: tt = tt.previous if (not (tt is not None)): break if (tt.is_newline_after): cou += 10 else: cou += 1 if (cou > 500): break g = Utils.asObjectOrNull(tt.get_referent(), GeoReferent) if (g is None): continue ok = True cou = 0 tt = li[0].end_token.next0_ first_pass3159 = True while True: if first_pass3159: first_pass3159 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): cou += 10 else: cou += 1 if (cou > 500): break tee = TerrItemToken.try_parse(tt, None, True, False, None) if (tee is None): continue ok = False break if (ok): ii = 0 while g is not None and (ii < 3): if (g.find_slot(GeoReferent.ATTR_TYPE, str0_, True) is not None): return ReferentToken._new734( g, li[0].begin_token, li[0].end_token, noun.begin_token.morph) g = g.higher ii += 1 break return None ter = None if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))): ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent)) else: ter = GeoReferent() if (ex_obj is not None): geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent, GeoReferent) if (geo_ is not None and not geo_.is_city): ter._merge_slots2(geo_, li[0].kit.base_language) else: ter._add_name(name) if (noun is None and ex_obj.can_be_city): ter._add_typ_city(li[0].kit.base_language) else: pass elif (new_name is not None): ter._add_name(name) if (alt_name is not None): ter._add_name(alt_name) if (noun is not None): if (noun.termin_item.canonic_text == "АО"): ter._add_typ( ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua else "АВТОНОМНЫЙ ОКРУГ")) elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" or noun.termin_item.canonic_text == "МУНІЦИПАЛЬНЕ ЗБОРИ"): ter._add_typ(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ" if li[0].kit.base_language.is_ua else "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")) elif (noun.termin_item.acronym == "МО" and add_noun is not None): ter._add_typ(add_noun.termin_item.canonic_text) else: if (noun.termin_item.canonic_text == "СОЮЗ" and ex_obj is not None and ex_obj.end_char > noun.end_char): return ReferentToken._new734(ter, ex_obj.begin_token, ex_obj.end_token, ex_obj.morph) ter._add_typ(noun.termin_item.canonic_text) if (noun.termin_item.is_region and ter.is_state): ter._add_typ_reg(li[0].kit.base_language) if (ter.is_state and ter.is_region): for a in adj_list: if (a.termin_item.is_region): ter._add_typ_reg(li[0].kit.base_language) break if (ter.is_state): if (full_name is not None): ter._add_name(full_name) res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token) if (noun is not None and noun.morph.class0_.is_noun): res.morph = noun.morph else: res.morph = MorphCollection() ii = 0 while ii < k: for v in li[ii].morph.items: bi = MorphBaseInfo() bi.copy_from(v) if (noun is not None): if (bi.class0_.is_adjective): bi.class0_ = MorphClass.NOUN res.morph.add_item(bi) ii += 1 if (li[0].termin_item is not None and li[0].termin_item.is_specific_prefix): res.begin_token = li[0].end_token.next0_ if (add_noun is not None and add_noun.end_char > res.end_char): res.end_token = add_noun.end_token if ((isinstance(res.begin_token.previous, TextToken)) and (res.whitespaces_before_count < 2)): tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken) if (tt.term == "АР"): for ty in ter.typs: if ("республика" in ty or "республіка" in ty): res.begin_token = tt break return res
def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken': if (sli is None or len(sli) == 0): return None i = 0 while i < len(sli): if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): return StreetDefineHelper.__tryParseFix(sli) elif (sli[i].typ == StreetItemType.NOUN): if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): sli[i + 1].begin_token = sli[i].begin_token del sli[i] if (sli[i].termin.canonic_text == "МЕТРО"): if ((i + 1) < len(sli)): sli1 = list() ii = i + 1 while ii < len(sli): sli1.append(sli[ii]) ii += 1 str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True) if (str1 is not None): str1.begin_token = sli[i].begin_token str1.is_doubt = sli[i].is_abridge if (sli[i + 1].is_in_brackets): str1.is_doubt = False return str1 elif (i == 1 and sli[0].typ == StreetItemType.NAME): for_metro = True break if (i == 0 and len(sli) > 0): for_metro = True break return None if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0) stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): tt = sli[i].end_token.next0_ if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): tt = tt.next0_ nex = NumberHelper.tryParseNumberWithPostfix(tt) if (nex is not None): return None break i += 1 if (i >= len(sli)): return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro) name = None number = None age = None adj = None noun = sli[i] alt_noun = None is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК") before = 0 after = 0 j = 0 while j < i: if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): before += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].is_newline_after): return None if (sli[j].number.morph.class0_.is_adjective): before += 1 elif (is_micro_raion): before += 1 elif (sli[i].number_has_prefix): before += 1 else: before += 1 j += 1 j = (i + 1) while j < len(sli): if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): after += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): after += 1 elif (is_micro_raion): after += 1 elif (sli[j].number_has_prefix): after += 1 elif (ext_onto_regim): after += 1 elif (sli[j].typ == StreetItemType.NOUN): break else: after += 1 j += 1 rli = list() if (before > after): if (noun.termin.canonic_text == "МЕТРО"): return None tt = sli[0].begin_token if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): ok = False if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): ok = True elif (noun.end_token.next0_ is None): ok = True elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): ok = True if (not ok): if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): ok = True if (not ok): return None n0 = 0 n1 = (i - 1) elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): if (not sli[0].is_whitespace_after): return None number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value)) if (sli[0].is_number_km): number += "км" n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[0]) rli.append(sli[i]) elif (after > before): n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[i]) elif (after == 0): return None elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): n0 = 0 n1 = 0 num = False tt2 = sli[2].end_token.next0_ if (sli[2].is_number_km): num = True elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): sli[2].is_number_km = True num = True elif (sli[2].begin_token.previous.is_comma): pass elif (sli[2].begin_token != sli[2].end_token): num = True elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): num = True elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): num = True if (num): number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value)) if (sli[2].is_number_km): number += "км" rli.append(sli[2]) else: del sli[2:2+len(sli) - 2] else: return None sec_number = None j = n0 first_pass2732 = True while True: if first_pass2732: first_pass2732 = False else: j += 1 if (not (j <= n1)): break if (sli[j].typ == StreetItemType.NUMBER): if (age is not None or ((sli[j].is_newline_before and j > 0))): break if (number is not None): if (name is not None and name.typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue break if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): if (sli[j].whitespaces_before_count > 2 and j > 0): break if (sli[j].number is not None and sli[j].number.int_value > 20): if (j > n0): if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): pass else: break if (j == n0 and n0 > 0): pass elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): pass elif (sli[j].number_has_prefix): pass elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): pass else: break number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): number += "км" rli.append(sli[j]) elif (sli[j].typ == StreetItemType.AGE): if (number is not None or age is not None): break age = str(sli[j].number.int_value) rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDADJECTIVE): if (adj is not None): return None adj = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): if (name is not None): if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): break elif (i < j): break else: return None name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): if (name is not None): break name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NOUN): if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): alt_noun = noun noun = sli[j] rli.append(sli[j]) else: break if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value)) rli.append(sli[i + 1]) elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): alt_noun = noun noun = sli[j] rli.append(sli[j]) if (name is None): if (number is None and adj is None): return None if (noun.is_abridge): if (is_micro_raion): pass elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): pass elif (adj is None or adj.is_abridge): return None if (adj is not None and adj.is_abridge): return None if (not sli[i] in rli): rli.append(sli[i]) street = StreetReferent() if (not for_metro): street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0) if (noun.alt_termin is not None): if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): pass else: street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0) else: street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0) res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street) for r in rli: if (res.begin_char > r.begin_char): res.begin_token = r.begin_token if (res.end_char < r.end_char): res.end_token = r.end_token if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): rli.remove(noun) if (noun.is_abridge and (noun.length_char < 4)): res.is_doubt = True elif (noun.noun_is_doubt_coef > 0): res.is_doubt = True if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0) if (npt2 is not None and npt2.end_char > name.end_char): pass elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): res.is_doubt = False elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): res.is_doubt = False name_base = io.StringIO() name_alt = io.StringIO() name_alt2 = None gen = noun.termin.gender adj_gen = MorphGender.UNDEFINED if (number is not None): street.number = number if (sec_number is not None): street.sec_number = sec_number if (age is not None): if (street.number is None): street.number = age else: street.sec_number = age if (name is not None and name.value is not None): if (street.kind == StreetKind.ROAD): for r in rli: if (r.typ == StreetItemType.NAME and r != name): print(r.value, end="", file=name_alt) break if (name.alt_value is not None and name_alt.tell() == 0): print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True) print(" {0}".format(name.value), end="", file=name_base, flush=True) elif (name is not None): is_adj = False if (isinstance(name.end_token, TextToken)): for wf in name.end_token.morph.items: if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo) adj_gen = wf.gender break elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): is_adj = True if (is_adj): tmp = io.StringIO() vars0_ = list() t = name.begin_token while t is not None: tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break if (tmp.tell() > 0): print(' ', end="", file=tmp) if (t == name.end_token): is_padez = False if (not noun.is_abridge): if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): is_padez = True elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): is_padez = True if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): is_padez = True if (not is_padez): print(tt.term, end="", file=tmp) break for wf in tt.morph.items: if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): continue if (not wff.normal_case in vars0_): vars0_.append(wff.normal_case) if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): vars0_.append(tt.term) if (len(vars0_) == 0): vars0_.append(tt.term) break if (not tt.is_hiphen): print(tt.term, end="", file=tmp) t = t.next0_ if (len(vars0_) == 0): print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True) else: head = Utils.toStringStringIO(name_base) print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True) if (len(vars0_) > 1): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True) if (len(vars0_) > 2): name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2]) else: str_nam = None nits = list() has_adj = False has_proper_name = False t = name.begin_token while t is not None: if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): has_adj = True if ((isinstance(t, TextToken)) and not t.is_hiphen): if (name.termin is not None): nits.append(name.termin.canonic_text) break elif (not t.chars.is_letter and len(nits) > 0): nits[len(nits) - 1] += (t).term else: nits.append((t).term) if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): has_proper_name = True elif ((isinstance(t, ReferentToken)) and name.termin is None): nits.append(t.getSourceText().upper()) if (t == name.end_token): break t = t.next0_ if (not has_adj and not has_proper_name): nits.sort() str_nam = Utils.joinStrings(" ", list(nits)) if (has_proper_name and len(nits) == 2): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True) print(" {0}".format(str_nam), end="", file=name_base, flush=True) adj_str = None adj_can_be_initial = False if (adj is not None): if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): adj_gen = name.morph.gender if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL)) elif (adj_gen != MorphGender.UNDEFINED): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen)) elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender)) else: s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen)) adj_str = s if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): adj_can_be_initial = True s1 = Utils.toStringStringIO(name_base).strip() s2 = Utils.toStringStringIO(name_alt).strip() if (len(s1) < 3): if (street.number is not None): if (adj_str is not None): if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_str is None): if (len(s1) < 1): return None if (is_micro_raion): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) if (not Utils.isNullOrEmpty(s2)): street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0) else: return None else: if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_can_be_initial): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0) street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) elif (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt.tell() > 0): s1 = Utils.toStringStringIO(name_alt).strip() if (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt2 is not None): if (adj_str is None): if (for_metro and noun is not None): street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0) if (name is not None and name.alt_value2 is not None): street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0) if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): for n in name.exist_street.names: street.addSlot(StreetReferent.ATTR_NAME, n, False, 0) if (alt_noun is not None and not for_metro): street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0) if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): res.is_doubt = True if (name is not None and name.is_in_dictionary): res.is_doubt = False elif (alt_noun is not None or for_metro): res.is_doubt = False elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): res.is_doubt = False if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): for s in street.slots: if (s.type_name == StreetReferent.ATTR_TYP): street.uploadSlot(s, "микрорайон") elif (s.type_name == StreetReferent.ATTR_NAME): street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value)) if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0) t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma): t1 = t1.next0_ non = StreetItemToken.tryParse(t1, None, False, None, False) if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): street._correct() nams = street.names for t in street.typs: for n in nams: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0) street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0) res.end_token = non.end_token if (res.is_doubt): if (noun.is_road): if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): res.is_doubt = False elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): res.is_doubt = False elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): res.is_doubt = False elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): res.is_doubt = False tt0 = res.begin_token.previous first_pass2733 = True while True: if first_pass2733: first_pass2733 = False else: tt0 = tt0.previous if (not (tt0 is not None)): break if (tt0.isCharOf(",,") or tt0.is_comma_and): continue str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent) if (str0 is not None): res.is_doubt = False break if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None) if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0) res.end_token = ait.end_token return res
def get_vars(self, key: str) -> typing.List['MorphRuleVariant']: i = Utils.indexOfList(self.tails, key, 0) if (i >= 0): return self.morph_vars[i] return None
def contains_var(self, tail: str) -> bool: return Utils.indexOfList(self.tails, tail, 0) >= 0
def analyze(res : 'FragToken') -> None: if (res.number == 4): pass if (len(res.children) == 0): ki = res.kind if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): tmp = list() tmp.append(res) ListHelper.__analize_list_items(tmp, 0) return if (res.kind == InstrumentKind.CLAUSE and res.number == 12): pass i = 0 first_pass3273 = True while True: if first_pass3273: first_pass3273 = False else: i += 1 if (not (i < len(res.children))): break if (res.children[i].kind == InstrumentKind.INDENTION and ((res.children[i].end_token.is_char_of(":;") or ((((i + 1) < len(res.children)) and res.children[i + 1].kind == InstrumentKind.EDITIONS and res.children[i + 1].end_token.is_char_of(":;")))))): j = 0 cou = 1 list_bullet = chr(0) j = (i + 1) first_pass3274 = True while True: if first_pass3274: first_pass3274 = False else: j += 1 if (not (j < len(res.children))): break ch = res.children[j] if (ch.kind == InstrumentKind.COMMENT or ch.kind == InstrumentKind.EDITIONS): continue if (ch.kind != InstrumentKind.INDENTION): break if (ch.end_token.is_char_of(";") or ((((j + 1) < len(res.children)) and res.children[j + 1].kind == InstrumentKind.EDITIONS and res.children[j + 1].end_token.is_char(';')))): cou += 1 if ((isinstance(ch.begin_token, TextToken)) and not ch.chars.is_letter): list_bullet = ch.kit.get_text_character(ch.begin_char) continue if (ch.end_token.is_char_of(".")): cou += 1 j += 1 break if (ch.end_token.is_char_of(":")): if ((ord(list_bullet)) != 0 and ch.begin_token.is_char(list_bullet)): tt = ch.begin_token.next0_ while tt is not None and (tt.end_char < ch.end_char): if (tt.previous.is_char('.') and MiscHelper.can_be_start_of_sentence(tt)): ch2 = FragToken._new1357(tt, ch.end_token, InstrumentKind.INDENTION, ch.number) ch.end_token = tt.previous res.children.insert(j + 1, ch2) k = j + 1 while k < len(res.children): if (res.children[k].kind == InstrumentKind.INDENTION): res.children[k].number += 1 k += 1 cou += 1 j += 1 break tt = tt.next0_ break cou += 1 j += 1 break if (cou < 3): i = j continue if ((i > 0 and not res.children[i].end_token.is_char(':') and res.children[i - 1].kind2 == InstrumentKind.UNDEFINED) and res.children[i - 1].end_token.is_char(':')): res.children[i - 1].kind2 = InstrumentKind.LISTHEAD first_pass3275 = True while True: if first_pass3275: first_pass3275 = False else: i += 1 if (not (i < j)): break ch = res.children[i] if (ch.kind != InstrumentKind.INDENTION): continue if (ch.end_token.is_char(':')): ch.kind2 = InstrumentKind.LISTHEAD elif (((i + 1) < j) and res.children[i + 1].kind == InstrumentKind.EDITIONS and res.children[i + 1].end_token.is_char(':')): ch.kind2 = InstrumentKind.LISTHEAD else: ch.kind2 = InstrumentKind.LISTITEM changed = list() i = 0 while i < len(res.children): if (res.number == 7): pass if (len(res.children[i].children) > 0): ListHelper.analyze(res.children[i]) else: co = ListHelper.__analize_list_items(res.children, i) if (co > 0): changed.append(res.children[i]) if (co > 1): del res.children[i + 1:i + 1+co - 1] i += (co - 1) i += 1 for i in range(len(changed) - 1, -1, -1): if (changed[i].kind == InstrumentKind.CONTENT): j = Utils.indexOfList(res.children, changed[i], 0) if (j < 0): continue del res.children[j] res.children[j:j] = changed[i].children