def __try1(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', ad: 'AnalyzerDataWithOntology') -> 'ReferentToken': oi.value = (None) if (li is None or (len(li) < 1)): return None elif (li[0].typ != CityItemToken.ItemType.CITY): if (len(li) != 2 or li[0].typ != CityItemToken.ItemType.PROPERNAME or li[1].typ != CityItemToken.ItemType.NOUN): return None i = 1 oi.value = li[0].onto_item ok = not li[0].doubtful if ((ok and li[0].onto_item is not None and li[0].onto_item.misc_attr is None) and ad is not None): if (li[0].onto_item.owner != ad.local_ontology and not li[0].onto_item.owner.is_ext_ontology): if (li[0].begin_token.previous is not None and li[0].begin_token.previous.isValue("В", None)): pass else: ok = False if (len(li) == 1 and li[0].begin_token.morph.class0_.is_adjective): sits = StreetItemToken.tryParseList(li[0].begin_token, None, 3) if (sits is not None and len(sits) == 2 and sits[1].typ == StreetItemType.NOUN): return None typ = None alttyp = None mc = li[0].morph if (i < len(li)): if (li[i].typ == CityItemToken.ItemType.NOUN): at = None if (not li[i].chars.is_all_lower and (li[i].whitespaces_after_count < 2)): sit = StreetItemToken.tryParse(li[i].end_token.next0_, None, False, None, False) if (sit is not None and sit.typ == StreetItemType.NOUN): at = AddressItemToken.tryParse(li[i].begin_token, None, False, False, None) if (at is not None): at2 = AddressItemToken.tryParse( li[i].end_token.next0_, None, False, False, None) if (at2 is not None and at2.typ == AddressItemToken.ItemType.STREET): at = (None) if (at is None): typ = li[i].value alttyp = li[i].alt_value if (li[i].begin_token.isValue("СТ", None) and li[i].begin_token.chars.is_all_upper): return None if ((i + 1) == len(li)): ok = True if (not li[i].morph.case_.is_undefined): mc = li[i].morph i += 1 elif (ok): i += 1 else: tt0 = li[0].begin_token.previous if ((isinstance(tt0, TextToken)) and (tt0.whitespaces_after_count < 3)): if (tt0.isValue("МЭР", "МЕР") or tt0.isValue("ГЛАВА", None) or tt0.isValue("ГРАДОНАЧАЛЬНИК", None)): ok = True i += 1 if (not ok and oi.value is not None and (len(oi.value.canonic_text) < 4)): return None if (not ok and li[0].begin_token.morph.class0_.is_proper_name): return None if (not ok): if (not MiscHelper.isExistsInDictionary( li[0].begin_token, li[0].end_token, (MorphClass.ADJECTIVE) | MorphClass.NOUN | MorphClass.PRONOUN)): ok = (li[0].geo_object_before or li[i - 1].geo_object_after) if (ok and li[0].begin_token == li[0].end_token): mcc = li[0].begin_token.getMorphClassInDictionary() if (mcc.is_proper_name or mcc.is_proper_surname): ok = False elif (li[0].geo_object_before and (li[0].whitespaces_after_count < 2)): ad1 = AddressItemToken.tryParse( li[0].begin_token, None, False, False, None) if (ad1 is not None and ad1.typ == AddressItemToken.ItemType.STREET): ad2 = AddressItemToken.tryParse( li[0].end_token.next0_, None, False, False, None) if (ad2 is None or ad2.typ != AddressItemToken.ItemType.STREET): ok = False elif (AddressItemToken.tryAttachOrg(li[0].begin_token) is not None): ok = False if (ok): if (li[0].kit.processReferent("PERSON", li[0].begin_token) is not None): ok = False if (not ok): ok = CityAttachHelper.checkYearAfter(li[0].end_token.next0_) if (not ok and ((not li[0].begin_token.morph.class0_.is_adjective or li[0].begin_token != li[0].end_token))): ok = CityAttachHelper.checkCityAfter(li[0].end_token.next0_) if (not ok): return None if (i < len(li)): del li[i:i + len(li) - i] rt = None if (oi.value is None): if (li[0].value is not None and li[0].higher_geo is not None): cap = GeoReferent() cap._addName(li[0].value) cap._addTypCity(li[0].kit.base_language) cap.higher = li[0].higher_geo if (typ is not None): cap._addTyp(typ) if (alttyp is not None): cap._addTyp(alttyp) rt = ReferentToken(cap, li[0].begin_token, li[0].end_token) else: if (li[0].value is None): return None if (typ is None): if ((len(li) == 1 and li[0].begin_token.previous is not None and li[0].begin_token.previous.is_hiphen) and (isinstance(li[0].begin_token.previous.previous, ReferentToken)) and (isinstance( li[0].begin_token.previous.previous.getReferent(), GeoReferent))): pass else: return None else: if (not LanguageHelper.endsWithEx(typ, "ПУНКТ", "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ", "ПОСЕЛОК")): if (not LanguageHelper.endsWith(typ, "CITY")): if (typ == "СТАНЦИЯ" and ((MiscLocationHelper.checkGeoObjectBefore( li[0].begin_token)))): pass elif (len(li) > 1 and li[1].typ == CityItemToken.ItemType.NOUN and li[0].typ == CityItemToken.ItemType.CITY): pass else: return None if (li[0].begin_token.morph.class0_.is_adjective): li[0].value = ProperNameHelper.getNameEx( li[0].begin_token, li[0].end_token, MorphClass.ADJECTIVE, li[1].morph.case_, li[1].morph.gender, False, False) elif (isinstance(oi.value.referent, GeoReferent)): rt = ReferentToken._new719( Utils.asObjectOrNull(oi.value.referent, GeoReferent), li[0].begin_token, li[len(li) - 1].end_token, mc) elif (typ is None): typ = oi.value.typ if (rt is None): city = GeoReferent() city._addName( (li[0].value if oi.value is None else oi.value.canonic_text)) if (typ is not None): city._addTyp(typ) else: city._addTypCity(li[0].kit.base_language) if (alttyp is not None): city._addTyp(alttyp) rt = ReferentToken._new719(city, li[0].begin_token, li[len(li) - 1].end_token, mc) if ((isinstance(rt.referent, GeoReferent)) and len(li) == 1 and (rt.referent).is_city): if (rt.begin_token.previous is not None and rt.begin_token.previous.isValue("Г", None)): rt.begin_token = rt.begin_token.previous elif ((rt.begin_token.previous is not None and rt.begin_token.previous.isChar('.') and rt.begin_token.previous.previous is not None) and rt.begin_token.previous.previous.isValue("Г", None)): rt.begin_token = rt.begin_token.previous.previous elif (rt.end_token.next0_ is not None and (rt.whitespaces_after_count < 2) and rt.end_token.next0_.isValue("Г", None)): rt.end_token = rt.end_token.next0_ if (rt.end_token.next0_ is not None and rt.end_token.next0_.isChar('.')): rt.end_token = rt.end_token.next0_ return rt
def __tryNounName(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', always: bool) -> 'ReferentToken': oi.value = (None) if (li is None or (len(li) < 2) or ((li[0].typ != CityItemToken.ItemType.NOUN and li[0].typ != CityItemToken.ItemType.MISC))): return None ok = not li[0].doubtful if (ok and li[0].typ == CityItemToken.ItemType.MISC): ok = False typ = (None if li[0].typ == CityItemToken.ItemType.MISC else li[0].value) typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else li[0].alt_value) prob_adj = None i1 = 1 org0_ = None if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and (((LanguageHelper.endsWith(typ, "ПОСЕЛОК") or LanguageHelper.endsWith(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ") or typ == "СЕЛО"))): if (li[i1].begin_token == li[i1].end_token): ooo = AddressItemToken.tryAttachOrg(li[i1].begin_token) if (ooo is not None and ooo.ref_token is not None): return None typ2 = li[i1].value if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.isValue("СТ", None) and ((i1 + 1) < len(li))): m = li[i1 + 1].morph if (m.number == MorphNumber.PLURAL): prob_adj = "СТАРЫЕ" elif (m.gender == MorphGender.FEMINIE): prob_adj = "СТАРАЯ" elif (m.gender == MorphGender.MASCULINE): prob_adj = "СТАРЫЙ" else: prob_adj = "СТАРОЕ" i1 += 1 name = Utils.ifNotNull(li[i1].value, ((None if li[i1].onto_item is None else li[i1].onto_item.canonic_text))) alt_name = li[i1].alt_value if (name is None): return None mc = li[0].morph if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО" or li[0].typ == CityItemToken.ItemType.MISC))): if (typ is None and ((i1 + 1) < len(li)) and li[i1 + 1].typ == CityItemToken.ItemType.NOUN): return None oi.value = li[i1].onto_item if (oi.value is not None): name = oi.value.canonic_text if (len(name) > 2 or oi.value.misc_attr is not None): if (not li[1].doubtful or ((oi.value is not None and oi.value.misc_attr is not None))): ok = True elif (not ok and not li[1].is_newline_before): if (li[0].geo_object_before or li[1].geo_object_after): ok = True elif (StreetDefineHelper.checkStreetAfter( li[1].end_token.next0_)): ok = True elif (li[1].end_token.next0_ is not None and (isinstance(li[1].end_token.next0_.getReferent(), DateReferent))): ok = True elif ((li[1].whitespaces_before_count < 2) and li[1].onto_item is not None): if (li[1].is_newline_after): ok = True if (li[1].doubtful and li[1].end_token.next0_ is not None and li[1].end_token.chars == li[1].end_token.next0_.chars): ok = False if (li[0].begin_token.previous is not None and li[0].begin_token.previous.isValue("В", None)): ok = True if (not ok): ok = CityAttachHelper.checkYearAfter(li[1].end_token.next0_) if (not ok): ok = CityAttachHelper.checkCityAfter(li[1].end_token.next0_) elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME or li[i1].typ == CityItemToken.ItemType.CITY)): if (((li[0].value == "АДМИНИСТРАЦИЯ" or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1): return None if (li[i1].is_newline_before): if (len(li) != 2): return None if (not li[0].doubtful): ok = True if (len(name) < 2): ok = False elif ((len(name) < 3) and li[0].morph.number != MorphNumber.SINGULAR): ok = False if (li[i1].doubtful and not li[i1].geo_object_after and not li[0].geo_object_before): if (li[i1].morph.case_.is_genitive): if (((li[0].begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore( li[0].begin_token))) and ((li[i1].end_token.next0_ is None or MiscLocationHelper.checkGeoObjectAfter( li[i1].end_token.next0_) or AddressItemToken.checkHouseAfter( li[i1].end_token.next0_, False, True)))): pass else: ok = False else: rt0 = li[i1].kit.processReferent( "PERSONPROPERTY", li[0].begin_token.previous) if (rt0 is not None): rt1 = li[i1].kit.processReferent( "PERSON", li[i1].begin_token) if (rt1 is not None): ok = False npt = NounPhraseHelper.tryParse(li[i1].begin_token, NounPhraseParseAttr.NO, 0) if (npt is not None): if (npt.end_token.end_char > li[i1].end_char and len(npt.adjectives) > 0 and not npt.adjectives[0].end_token.next0_.is_comma): ok = False elif (TerrItemToken._m_unknown_regions.tryParse( npt.end_token, TerminParseAttr.FULLWORDSONLY) is not None): ok1 = False if (li[0].begin_token.previous is not None): ttt = li[0].begin_token.previous if (ttt.is_comma and ttt.previous is not None): ttt = ttt.previous geo_ = Utils.asObjectOrNull( ttt.getReferent(), GeoReferent) if (geo_ is not None and not geo_.is_city): ok1 = True if (npt.end_token.next0_ is not None): ttt = npt.end_token.next0_ if (ttt.is_comma and ttt.next0_ is not None): ttt = ttt.next0_ geo_ = Utils.asObjectOrNull( ttt.getReferent(), GeoReferent) if (geo_ is not None and not geo_.is_city): ok1 = True if (not ok1): return None if (li[0].value == "ПОРТ"): if (li[i1].chars.is_all_upper or li[i1].chars.is_latin_letter): return None elif (li[0].geo_object_before): ok = True elif (li[i1].geo_object_after and not li[i1].is_newline_after): ok = True else: ok = CityAttachHelper.checkYearAfter(li[i1].end_token.next0_) if (not ok): ok = CityAttachHelper.checkStreetAfter(li[i1].end_token.next0_) if (not ok and li[0].begin_token.previous is not None and li[0].begin_token.previous.isValue("В", None)): ok = True else: return None if (not ok and not always): if (MiscLocationHelper.checkNearBefore(li[0].begin_token.previous) is None): return None if (len(li) > (i1 + 1)): del li[i1 + 1:i1 + 1 + len(li) - i1 - 1] city = GeoReferent() if (oi.value is not None and oi.value.referent is not None): city = (Utils.asObjectOrNull(oi.value.referent.clone(), GeoReferent)) city.occurrence.clear() if (not li[0].morph.case_.is_undefined and li[0].morph.gender != MorphGender.UNDEFINED): if (li[i1].end_token.morph.class0_.is_adjective and li[i1].begin_token == li[i1].end_token): nam = ProperNameHelper.getNameEx( li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE, li[0].morph.case_, li[0].morph.gender, False, False) if (nam is not None and nam != name): name = nam if (li[0].morph.case_.is_nominative): if (alt_name is not None): city._addName(alt_name) alt_name = (None) city._addName(name) if (prob_adj is not None): city._addName(prob_adj + " " + name) if (alt_name is not None): city._addName(alt_name) if (prob_adj is not None): city._addName(prob_adj + " " + alt_name) if (typ is not None): city._addTyp(typ) elif (not city.is_city): city._addTypCity(li[0].kit.base_language) if (typ2 is not None): city._addTyp(typ2.lower()) if (li[0].higher_geo is not None and GeoOwnerHelper.canBeHigher(li[0].higher_geo, city)): city.higher = li[0].higher_geo if (li[0].typ == CityItemToken.ItemType.MISC): del li[0] res = ReferentToken._new719(city, li[0].begin_token, li[len(li) - 1].end_token, mc) if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): num = Utils.asObjectOrNull(res.end_token.next0_.next0_, NumberToken) if ((num.typ == NumberSpellingType.DIGIT and not num.morph.class0_.is_adjective and num.int_value is not None) and (num.int_value < 50)): for s in city.slots: if (s.type_name == GeoReferent.ATTR_NAME): city.uploadSlot(s, "{0}-{1}".format(s.value, num.value)) res.end_token = num if (li[0].begin_token == li[0].end_token and li[0].begin_token.isValue("ГОРОДОК", None)): if (AddressItemToken.checkHouseAfter(res.end_token.next0_, True, False)): return None return res
def __tryNameExist(li: typing.List['CityItemToken'], oi: 'IntOntologyItem', always: bool) -> 'ReferentToken': """ Это проверяем некоторые частные случаи Args: li(typing.List[CityItemToken]): oi(IntOntologyItem): """ oi.value = (None) if (li is None or li[0].typ != CityItemToken.ItemType.CITY): return None oi.value = li[0].onto_item tt = Utils.asObjectOrNull(li[0].begin_token, TextToken) if (tt is None): return None ok = False nam = (li[0].value if oi.value is None else oi.value.canonic_text) if (nam is None): return None if (nam == "РИМ"): if (tt.term == "РИМ"): if ((isinstance(tt.next0_, TextToken)) and tt.next0_.getMorphClassInDictionary().is_proper_secname ): pass else: ok = True elif (tt.previous is not None and tt.previous.isValue("В", None) and tt.term == "РИМЕ"): ok = True elif (oi.value is not None and oi.value.referent is not None and oi.value.owner.is_ext_ontology): ok = True elif (nam.endswith("ГРАД") or nam.endswith("СК")): ok = True elif (nam.endswith("TOWN") or nam.startswith("SAN")): ok = True elif (li[0].chars.is_latin_letter and li[0].begin_token.previous is not None and ((li[0].begin_token.previous.isValue("IN", None) or li[0].begin_token.previous.isValue("FROM", None)))): ok = True else: tt2 = li[0].end_token.next0_ first_pass2890 = True while True: if first_pass2890: first_pass2890 = False else: tt2 = tt2.next0_ if (not (tt2 is not None)): break if (tt2.is_newline_before): break if ((tt2.isCharOf(",(") or tt2.morph.class0_.is_preposition or tt2.morph.class0_.is_conjunction) or tt2.morph.class0_.is_misc): continue if ((isinstance(tt2.getReferent(), GeoReferent)) and tt2.chars.is_cyrillic_letter == li[0].chars.is_cyrillic_letter): ok = True break if (not ok): tt2 = li[0].begin_token.previous first_pass2891 = True while True: if first_pass2891: first_pass2891 = False else: tt2 = tt2.previous if (not (tt2 is not None)): break if (tt2.is_newline_after): break if ((tt2.isCharOf(",)") or tt2.morph.class0_.is_preposition or tt2.morph.class0_.is_conjunction) or tt2.morph.class0_.is_misc): continue if ((isinstance(tt2.getReferent(), GeoReferent)) and tt2.chars.is_cyrillic_letter == li[0].chars.is_cyrillic_letter): ok = True if (ok): sits = StreetItemToken.tryParseList( li[0].begin_token, None, 10) if (sits is not None and len(sits) > 1): ss = StreetDefineHelper._tryParseStreet( sits, False, False) if (ss is not None): del sits[0] if (StreetDefineHelper._tryParseStreet( sits, False, False) is None): ok = False if (ok): if (len(li) > 1 and li[1].typ == CityItemToken.ItemType.PROPERNAME and (li[1].whitespaces_before_count < 3)): ok = False else: mc = li[0].begin_token.getMorphClassInDictionary() if (mc.is_proper_name or mc.is_proper_surname or mc.is_adjective): ok = False else: npt = NounPhraseHelper.tryParse( li[0].begin_token, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.end_char > li[0].end_char): ok = False if (AddressItemToken.tryAttachOrg(li[0].begin_token) is not None): ok = False break break if (always): if (li[0].whitespaces_before_count > 3 and li[0].doubtful and li[0].begin_token.getMorphClassInDictionary( ).is_proper_surname): pp = li[0].kit.processReferent("PERSON", li[0].begin_token) if (pp is not None): always = False if (li[0].begin_token.chars.is_latin_letter and li[0].begin_token == li[0].end_token): tt1 = li[0].end_token.next0_ if (tt1 is not None and tt1.isChar(',')): tt1 = tt1.next0_ if (((isinstance(tt1, TextToken)) and tt1.chars.is_latin_letter and (tt1.length_char < 3)) and not tt1.chars.is_all_lower): ok = False if (not ok and not always): return None city = None if (oi.value is not None and (isinstance(oi.value.referent, GeoReferent)) and not oi.value.owner.is_ext_ontology): city = (Utils.asObjectOrNull(oi.value.referent, GeoReferent)) else: city = GeoReferent() city._addName(nam) if (oi.value is not None and (isinstance(oi.value.referent, GeoReferent))): city._mergeSlots2( Utils.asObjectOrNull(oi.value.referent, GeoReferent), li[0].kit.base_language) if (not city.is_city): city._addTypCity(li[0].kit.base_language) return ReferentToken._new719(city, li[0].begin_token, li[0].end_token, li[0].morph)
def tryAttachTerritory( li: typing.List['TerrItemToken'], ad: 'AnalyzerData', attach_always: bool = False, cits: typing.List['CityItemToken'] = None, exists: typing.List['GeoReferent'] = None) -> 'ReferentToken': if (li is None or len(li) == 0): return None ex_obj = None new_name = None adj_list = list() noun = None add_noun = None rt = TerrAttachHelper.__tryAttachMoscowAO(li, ad) if (rt is not None): return rt if (li[0].termin_item is not None and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"): res2 = TerrAttachHelper.__tryAttachPureTerr(li, ad) return res2 if (len(li) == 2): if (li[0].rzd is not None and li[1].rzd_dir is not None): rzd = GeoReferent() rzd._addName(li[1].rzd_dir) rzd._addTypTer(li[0].kit.base_language) rzd.addSlot(GeoReferent.ATTR_REF, li[0].rzd.referent, False, 0) rzd.addExtReferent(li[0].rzd) return ReferentToken(rzd, li[0].begin_token, li[1].end_token) if (li[1].rzd is not None and li[0].rzd_dir is not None): rzd = GeoReferent() rzd._addName(li[0].rzd_dir) rzd._addTypTer(li[0].kit.base_language) rzd.addSlot(GeoReferent.ATTR_REF, li[1].rzd.referent, False, 0) rzd.addExtReferent(li[1].rzd) return ReferentToken(rzd, li[0].begin_token, li[1].end_token) can_be_city_before = False adj_terr_before = False if (cits is not None): if (cits[0].typ == CityItemToken.ItemType.CITY): can_be_city_before = True elif (cits[0].typ == CityItemToken.ItemType.NOUN and len(cits) > 1): can_be_city_before = True k = 0 while k < len(li): if (li[k].onto_item is not None): if (ex_obj is not None or new_name is not None): break if (noun is not None): if (k == 1): if (noun.termin_item.canonic_text == "РАЙОН" or noun.termin_item.canonic_text == "ОБЛАСТЬ" or noun.termin_item.canonic_text == "СОЮЗ"): if (isinstance(li[k].onto_item.referent, GeoReferent)): if ((li[k].onto_item.referent).is_state): break ok = False tt = li[k].end_token.next0_ if (tt is None): ok = True elif (tt.isCharOf(",.")): ok = True if (not ok): ok = MiscLocationHelper.checkGeoObjectBefore( li[0].begin_token) if (not ok): adr = AddressItemToken.tryParse( tt, None, False, False, None) if (adr is not None): if (adr.typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): break if (li[k].onto_item is not None): if (noun.begin_token.isValue("МО", None) or noun.begin_token.isValue("ЛО", None)): return None ex_obj = li[k] elif (li[k].termin_item is not None): if (noun is not None): break if (li[k].termin_item.is_always_prefix and k > 0): break if (k > 0 and li[k].is_doubt): if (li[k].begin_token == li[k].end_token and li[k].begin_token.isValue("ЗАО", None)): break if (li[k].termin_item.is_adjective or li[k].is_geo_in_dictionary): adj_list.append(li[k]) else: if (ex_obj is not None): geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent, GeoReferent) if (geo_ is None): break if (ex_obj.is_adjective and ((li[k].termin_item.canonic_text == "СОЮЗ" or li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))): str0_ = str(ex_obj.onto_item) if (not li[k].termin_item.canonic_text in str0_): return None if (li[k].termin_item.canonic_text == "РАЙОН" or li[k].termin_item.canonic_text == "ОКРУГ" or li[k].termin_item.canonic_text == "КРАЙ"): tmp = io.StringIO() for s in geo_.slots: if (s.type_name == GeoReferent.ATTR_TYPE): print("{0};".format(s.value), end="", file=tmp, flush=True) if (not li[k].termin_item.canonic_text in Utils.toStringStringIO(tmp).upper()): if (k != 1 or new_name is not None): break new_name = li[0] new_name.is_adjective = True new_name.onto_item = (None) ex_obj = (None) noun = li[k] if (k == 0): tt = TerrItemToken.tryParse(li[k].begin_token.previous, None, True, False) if (tt is not None and tt.morph.class0_.is_adjective): adj_terr_before = True else: if (ex_obj is not None): break if (new_name is not None): break new_name = li[k] k += 1 name = None alt_name = None full_name = None morph_ = None if (ex_obj is not None): if (ex_obj.is_adjective and not ex_obj.morph.language.is_en and noun is None): if (attach_always and ex_obj.end_token.next0_ is not None): npt = NounPhraseHelper.tryParse(ex_obj.begin_token, NounPhraseParseAttr.NO, 0) if (ex_obj.end_token.next0_.is_comma_and): pass elif (npt is None): pass else: str0_ = StreetItemToken.tryParse( ex_obj.end_token.next0_, None, False, None, False) if (str0_ is not None): if (str0_.typ == StreetItemType.NOUN and str0_.end_token == npt.end_token): return None else: cit = CityItemToken.tryParse(ex_obj.end_token.next0_, None, False, None) if (cit is not None and ((cit.typ == CityItemToken.ItemType.NOUN or cit.typ == CityItemToken.ItemType.CITY))): npt = NounPhraseHelper.tryParse( ex_obj.begin_token, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.end_token == cit.end_token): pass else: return None elif (ex_obj.begin_token.isValue("ПОДНЕБЕСНЫЙ", None)): pass else: return None if (noun is None and ex_obj.can_be_city): cit0 = CityItemToken.tryParseBack(ex_obj.begin_token.previous) if (cit0 is not None and cit0.typ != CityItemToken.ItemType.PROPERNAME): return None if (ex_obj.is_doubt and noun is None): ok2 = False if (TerrAttachHelper.__canBeGeoAfter(ex_obj.end_token.next0_)): ok2 = True elif (not ex_obj.can_be_surname and not ex_obj.can_be_city): if ((ex_obj.end_token.next0_ is not None and ex_obj.end_token.next0_.isChar(')') and ex_obj.begin_token.previous is not None) and ex_obj.begin_token.previous.isChar('(')): ok2 = True elif (ex_obj.chars.is_latin_letter and ex_obj.begin_token.previous is not None): if (ex_obj.begin_token.previous.isValue("IN", None)): ok2 = True elif (ex_obj.begin_token.previous.isValue("THE", None) and ex_obj.begin_token.previous.previous is not None and ex_obj.begin_token.previous.previous.isValue( "IN", None)): ok2 = True if (not ok2): cit0 = CityItemToken.tryParseBack( ex_obj.begin_token.previous) if (cit0 is not None and cit0.typ != CityItemToken.ItemType.PROPERNAME): pass elif (MiscLocationHelper.checkGeoObjectBefore( ex_obj.begin_token.previous)): pass else: return None name = ex_obj.onto_item.canonic_text morph_ = ex_obj.morph elif (new_name is not None): if (noun is None): return None j = 1 while j < k: if (li[j].is_newline_before and not li[0].is_newline_before): return None j += 1 morph_ = noun.morph if (new_name.is_adjective): if (noun.termin_item.acronym == "АО"): if (noun.begin_token != noun.end_token): return None if (new_name.morph.gender != MorphGender.FEMINIE): return None geo_before = None tt0 = li[0].begin_token.previous if (tt0 is not None and tt0.is_comma_and): tt0 = tt0.previous if (not li[0].is_newline_before and tt0 is not None): geo_before = (Utils.asObjectOrNull(tt0.getReferent(), GeoReferent)) if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList( li, new_name, 0)): if (noun.termin_item.is_state): return None if (new_name.can_be_surname and geo_before is None): if (((noun.morph.case_) & new_name.morph.case_).is_undefined): return None if (MiscHelper.isExistsInDictionary( new_name.begin_token, new_name.end_token, (MorphClass.ADJECTIVE) | MorphClass.PRONOUN | MorphClass.VERB)): if (noun.begin_token != new_name.begin_token): if (geo_before is None): if (len(li) == 2 and TerrAttachHelper.__canBeGeoAfter( li[1].end_token.next0_)): pass elif (len(li) == 3 and li[2].termin_item is not None and TerrAttachHelper.__canBeGeoAfter( li[2].end_token.next0_)): pass elif (new_name.is_geo_in_dictionary): pass elif (new_name.end_token.is_newline_after): pass else: return None npt = NounPhraseHelper.tryParse( new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS, 0) if (npt is not None and npt.end_token != new_name.end_token): if (len(li) >= 3 and li[2].termin_item is not None and npt.end_token == li[2].end_token): add_noun = li[2] else: return None rtp = new_name.kit.processReferent("PERSON", new_name.begin_token) if (rtp is not None): return None name = ProperNameHelper.getNameEx(new_name.begin_token, new_name.end_token, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False) else: ok = False if (((k + 1) < len(li)) and li[k].termin_item is None and li[k + 1].termin_item is not None): ok = True elif ((k < len(li)) and li[k].onto_item is not None): ok = True elif (k == len(li) and not new_name.is_adj_in_dictionary): ok = True elif (MiscLocationHelper.checkGeoObjectBefore( li[0].begin_token) or can_be_city_before): ok = True elif (MiscLocationHelper.checkGeoObjectAfter( li[k - 1].end_token)): ok = True elif (len(li) == 3 and k == 2): cit = CityItemToken.tryParse(li[2].begin_token, None, False, None) if (cit is not None): if (cit.typ == CityItemToken.ItemType.CITY or cit.typ == CityItemToken.ItemType.NOUN): ok = True elif (len(li) == 2): ok = TerrAttachHelper.__canBeGeoAfter( li[len(li) - 1].end_token.next0_) if (not ok and not li[0].is_newline_before and not li[0].chars.is_all_lower): rt00 = li[0].kit.processReferent( "PERSONPROPERTY", li[0].begin_token.previous) if (rt00 is not None): ok = True if (noun.termin_item is not None and noun.termin_item.is_strong and new_name.is_adjective): ok = True if (noun.is_doubt and len(adj_list) == 0 and geo_before is None): return None name = ProperNameHelper.getNameEx(new_name.begin_token, new_name.end_token, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False) if (not ok and not attach_always): if (MiscHelper.isExistsInDictionary( new_name.begin_token, new_name.end_token, (MorphClass.ADJECTIVE) | MorphClass.PRONOUN | MorphClass.VERB)): if (exists is not None): for e0_ in exists: if (e0_.findSlot(GeoReferent.ATTR_NAME, name, True) is not None): ok = True break if (not ok): return None full_name = "{0} {1}".format( ProperNameHelper.getNameEx(li[0].begin_token, noun.begin_token.previous, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False), noun.termin_item.canonic_text) else: if (not attach_always or ((noun.termin_item is not None and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))): is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList( li, new_name, 0)): if (not is_latin): return None if (not new_name.is_district_name and not BracketHelper.canBeStartOfSequence( new_name.begin_token, False, False)): if (len(adj_list) == 0 and MiscHelper.isExistsInDictionary( new_name.begin_token, new_name.end_token, (MorphClass.NOUN) | MorphClass.PRONOUN)): if (len(li) == 2 and noun.is_city_region and (noun.whitespaces_after_count < 2)): pass else: return None if (not is_latin): if ((noun.termin_item.is_region and not attach_always and ((not adj_terr_before or new_name.is_doubt))) and not noun.is_city_region and not noun.termin_item.is_specific_prefix): if (not MiscLocationHelper. checkGeoObjectBefore( noun.begin_token)): if (not noun.is_doubt and noun.begin_token != noun.end_token): pass else: return None if (noun.is_doubt and len(adj_list) == 0): if (((noun.termin_item.acronym == "МО" or noun.termin_item.acronym == "ЛО")) and k == (len(li) - 1) and li[k].termin_item is not None): add_noun = li[k] k += 1 else: return None pers = new_name.kit.processReferent( "PERSON", new_name.begin_token) if (pers is not None): return None name = MiscHelper.getTextValue(new_name.begin_token, new_name.end_token, GetTextAttr.NO) if (new_name.begin_token != new_name.end_token): ttt = new_name.begin_token.next0_ while ttt is not None and ttt.end_char <= new_name.end_char: if (ttt.chars.is_letter): ty = TerrItemToken.tryParse( ttt, None, False, False) if ((ty is not None and ty.termin_item is not None and noun is not None) and ((noun.termin_item.canonic_text in ty.termin_item.canonic_text or ty.termin_item.canonic_text in noun.termin_item.canonic_text))): name = MiscHelper.getTextValue( new_name.begin_token, ttt.previous, GetTextAttr.NO) break ttt = ttt.next0_ if (len(adj_list) > 0): npt = NounPhraseHelper.tryParse(adj_list[0].begin_token, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.end_token == noun.end_token): alt_name = "{0} {1}".format( npt.getNormalCaseText(None, False, MorphGender.UNDEFINED, False), name) else: if ((len(li) == 1 and noun is not None and noun.end_token.next0_ is not None) and (isinstance( noun.end_token.next0_.getReferent(), GeoReferent))): g = Utils.asObjectOrNull(noun.end_token.next0_.getReferent(), GeoReferent) if (noun.termin_item is not None): tyy = noun.termin_item.canonic_text.lower() ooo = False if (g.findSlot(GeoReferent.ATTR_TYPE, tyy, True) is not None): ooo = True elif (tyy.endswith("район") and g.findSlot( GeoReferent.ATTR_TYPE, "район", True) is not None): ooo = True if (ooo): return ReferentToken._new719(g, noun.begin_token, noun.end_token.next0_, noun.begin_token.morph) if ((len(li) == 1 and noun == li[0] and li[0].termin_item is not None) and TerrItemToken.tryParse( li[0].end_token.next0_, None, True, False) is None and TerrItemToken.tryParse(li[0].begin_token.previous, None, True, False) is None): if (li[0].morph.number == MorphNumber.PLURAL): return None cou = 0 str0_ = li[0].termin_item.canonic_text.lower() tt = li[0].begin_token.previous first_pass2898 = True while True: if first_pass2898: first_pass2898 = False else: tt = tt.previous if (not (tt is not None)): break if (tt.is_newline_after): cou += 10 else: cou += 1 if (cou > 500): break g = Utils.asObjectOrNull(tt.getReferent(), GeoReferent) if (g is None): continue ok = True cou = 0 tt = li[0].end_token.next0_ first_pass2899 = True while True: if first_pass2899: first_pass2899 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): cou += 10 else: cou += 1 if (cou > 500): break tee = TerrItemToken.tryParse(tt, None, True, False) if (tee is None): continue ok = False break if (ok): ii = 0 while g is not None and (ii < 3): if (g.findSlot(GeoReferent.ATTR_TYPE, str0_, True) is not None): return ReferentToken._new719( g, li[0].begin_token, li[0].end_token, noun.begin_token.morph) g = g.higher ii += 1 break return None ter = None if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))): ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent)) else: ter = GeoReferent() if (ex_obj is not None): geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent, GeoReferent) if (geo_ is not None and not geo_.is_city): ter._mergeSlots2(geo_, li[0].kit.base_language) else: ter._addName(name) if (noun is None and ex_obj.can_be_city): ter._addTypCity(li[0].kit.base_language) else: pass elif (new_name is not None): ter._addName(name) if (alt_name is not None): ter._addName(alt_name) if (noun is not None): if (noun.termin_item.canonic_text == "АО"): ter._addTyp( ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua else "АВТОНОМНЫЙ ОКРУГ")) elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" or noun.termin_item.canonic_text == "МУНІЦИПАЛЬНЕ ЗБОРИ"): ter._addTyp(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ" if li[0].kit.base_language.is_ua else "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")) elif (noun.termin_item.acronym == "МО" and add_noun is not None): ter._addTyp(add_noun.termin_item.canonic_text) else: if (noun.termin_item.canonic_text == "СОЮЗ" and ex_obj is not None and ex_obj.end_char > noun.end_char): return ReferentToken._new719(ter, ex_obj.begin_token, ex_obj.end_token, ex_obj.morph) ter._addTyp(noun.termin_item.canonic_text) if (noun.termin_item.is_region and ter.is_state): ter._addTypReg(li[0].kit.base_language) if (ter.is_state and ter.is_region): for a in adj_list: if (a.termin_item.is_region): ter._addTypReg(li[0].kit.base_language) break if (ter.is_state): if (full_name is not None): ter._addName(full_name) res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token) if (noun is not None and noun.morph.class0_.is_noun): res.morph = noun.morph else: res.morph = MorphCollection() ii = 0 while ii < k: for v in li[ii].morph.items: bi = MorphBaseInfo(v) if (noun is not None): if (bi.class0_.is_adjective): bi.class0_ = MorphClass.NOUN res.morph.addItem(bi) ii += 1 if (li[0].termin_item is not None and li[0].termin_item.is_specific_prefix): res.begin_token = li[0].end_token.next0_ if (add_noun is not None and add_noun.end_char > res.end_char): res.end_token = add_noun.end_token if ((isinstance(res.begin_token.previous, TextToken)) and (res.whitespaces_before_count < 2)): tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken) if (tt.term == "АР"): for ty in ter.typs: if ("республика" in ty or "республіка" in ty): res.begin_token = tt break return res