Ejemplo n.º 1
0
 def trunc_oborot(self, is_participle : bool) -> bool:
     if (self.best_var is None or len(self.best_var.segs) == 0): 
         if (len(self.items) > 1): 
             del self.items[1:1+len(self.items) - 1]
             return True
         return False
     ret = False
     ind = 0
     if (self.best_var.segs[0] is None and not is_participle): 
         ind = 1
         while ind < len(self.items): 
             if (self.items[ind].can_be_comma_end): 
                 break
             ind += 1
     else: 
         for seg in self.best_var.segs: 
             if (seg is None): 
                 break
             for li in seg.links: 
                 if (li is None): 
                     continue
                 ret = True
                 ii = Utils.indexOfList(self.items, li.from0_.source, 0)
                 if (ii < 0): 
                     continue
                 if (li.to_verb is not None): 
                     if (li.to_verb == seg.source.before_verb): 
                         ind = (ii + 1)
                     elif (not is_participle and seg == self.best_var.segs[0] and li.to_verb == seg.source.after_verb): 
                         ii = ind
                         while ii < len(self.items): 
                             if (self.items[ii].source == li.to_verb): 
                                 ind = (ii + 1)
                                 break
                             ii += 1
                     else: 
                         break
                 else: 
                     jj = Utils.indexOfList(self.items, li.to.source, 0)
                     if (jj < 0): 
                         continue
                     if (jj < ii): 
                         ind = (ii + 1)
                     else: 
                         break
             if (not is_participle and seg == self.best_var.segs[0]): 
                 pass
             else: 
                 break
     if (not ret and ind == 0): 
         ind = 1
         while ind < len(self.items): 
             if (self.items[ind].can_be_comma_end): 
                 break
             ind += 1
     if (ind > 0 and (ind < (len(self.items) - 1))): 
         del self.items[ind:ind+len(self.items) - ind]
     return ret
Ejemplo n.º 2
0
 def _checkAbbr(self, abbr : str) -> bool:
     if (len(abbr) != 2): 
         return False
     nameq = False
     typeq = False
     nameq2 = False
     typeq2 = False
     for s in self.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME): 
             val = Utils.asObjectOrNull(s.value, str)
             ch = val[0]
             if (ch == abbr[0]): 
                 nameq = True
                 ii = val.find(' ')
                 if (ii > 0): 
                     if (abbr[1] == val[ii + 1]): 
                         if (Utils.indexOfList(val, ' ', ii + 1) < 0): 
                             return True
             if (ch == abbr[1]): 
                 nameq2 = True
         elif (s.type_name == GeoReferent.ATTR_TYPE): 
             ty = s.value
             if (ty == "государство" or ty == "держава" or ty == "country"): 
                 continue
             ch = str.upper(ty[0])
             if (ch == abbr[1]): 
                 typeq = True
             if (ch == abbr[0]): 
                 typeq2 = True
     if (typeq and nameq): 
         return True
     if (typeq2 and nameq2): 
         return True
     return False
Ejemplo n.º 3
0
 def addReferent(self, referent : 'Referent') -> bool:
     """ Добавить в онтологию сущность
     
     Args:
         referent(Referent): 
     
     """
     if (referent is None): 
         return False
     oi = None
     if (referent._int_ontology_item is not None and referent._int_ontology_item.owner == self): 
         oi1 = referent.createOntologyItem()
         if (oi1 is None or len(oi1.termins) == len(referent._int_ontology_item.termins)): 
             return True
         for t in referent._int_ontology_item.termins: 
             self.__m_termins.remove(t)
         i = Utils.indexOfList(self.__m_items, referent._int_ontology_item, 0)
         if (i >= 0): 
             del self.__m_items[i]
         oi = oi1
     else: 
         oi = referent.createOntologyItem()
     if (oi is None): 
         return False
     oi.referent = referent
     referent._int_ontology_item = oi
     self.addItem(oi)
     return True
Ejemplo n.º 4
0
 def remove(self, t : 'Termin') -> None:
     for v in t._get_hash_variants(): 
         self.__remove_from_tree(v, t)
     for li in self.__m_hash1.values(): 
         for tt in li: 
             if (tt == t): 
                 li.remove(tt)
                 break
     i = Utils.indexOfList(self.termins, t, 0)
     if (i >= 0): 
         del self.termins[i]
Ejemplo n.º 5
0
 def extract_main_sequence(lines : typing.List['InstrToken1'], check_spec_texts : bool, can_sub_numbers : bool) -> typing.List['InstrToken1']:
     res = None
     many_spec_char_lines = 0
     i = 0
     first_pass3279 = True
     while True:
         if first_pass3279: first_pass3279 = False
         else: i += 1
         if (not (i < len(lines))): break
         li = lines[i]
         if (li.all_upper and li.title_typ != InstrToken1.StdTitleType.UNDEFINED): 
             if (res is not None and len(res) > 0 and res[len(res) - 1].tag is None): 
                 res[len(res) - 1].tag = (li)
         if (len(li.numbers) == 0): 
             continue
         if (li.last_number == 901): 
             pass
         if (li.num_typ == NumberTypes.LETTER): 
             pass
         if (li.typ != InstrToken1.Types.LINE): 
             continue
         if (res is None): 
             res = list()
             if (len(li.numbers) == 1 and li.numbers[0] == "1" and li.num_typ == NumberTypes.DIGIT): 
                 if ((((i + 1) < len(lines)) and len(lines[i + 1].numbers) == 1 and lines[i + 1].numbers[0] == "1") and lines[i + 1].num_typ == NumberTypes.DIGIT): 
                     ii = i + 2
                     while ii < len(lines): 
                         if (lines[ii].num_typ == NumberTypes.ROMAN and len(lines[ii].numbers) > 0): 
                             if (lines[ii].numbers[0] == "2"): 
                                 li.num_typ = NumberTypes.ROMAN
                             break
                         ii += 1
         else: 
             if (res[0].num_suffix is not None): 
                 if (li.num_suffix is not None and li.num_suffix != res[0].num_suffix): 
                     continue
             if (len(res[0].numbers) != len(li.numbers)): 
                 if (li.begin_token.previous is not None and li.begin_token.previous.is_char(':')): 
                     continue
                 if (res[0].num_suffix is None or NumberingHelper.calc_delta(res[len(res) - 1], li, True) != 1): 
                     continue
                 if (not can_sub_numbers): 
                     if (((i + 1) < len(lines)) and NumberingHelper.calc_delta(res[len(res) - 1], lines[i + 1], False) == 1 and NumberingHelper.calc_delta(li, lines[i + 1], True) == 1): 
                         pass
                     else: 
                         continue
             else: 
                 if (res[0].num_typ == NumberTypes.ROMAN and li.num_typ != NumberTypes.ROMAN): 
                     continue
                 if (res[0].num_typ != NumberTypes.ROMAN and li.num_typ == NumberTypes.ROMAN): 
                     if (len(li.numbers) == 1 and li.numbers[0] == "1" and len(res) == 1): 
                         res.clear()
                         res.append(li)
                         continue
                     continue
                 if (res[0].num_typ != NumberTypes.LETTER and li.num_typ == NumberTypes.LETTER): 
                     continue
         res.append(li)
         if (li.has_many_spec_chars): 
             many_spec_char_lines += 1
     if (res is None): 
         return None
     if (check_spec_texts): 
         if (many_spec_char_lines > (math.floor(len(res) / 2))): 
             return None
     i = 0
     while i < (len(res) - 1): 
         if (NumberingHelper.calc_delta(res[i], res[i + 1], False) == 2): 
             ii0 = Utils.indexOfList(lines, res[i], 0)
             ii1 = Utils.indexOfList(lines, res[i + 1], ii0)
             j = ii0 + 1
             while j < ii1: 
                 if (len(lines[j].numbers) > 0): 
                     if (NumberingHelper.calc_delta(res[i], lines[j], True) == 1 and NumberingHelper.calc_delta(lines[j], res[i + 1], True) == 1): 
                         res.insert(i + 1, lines[j])
                         break
                 j += 1
         i += 1
     ch = True
     while ch:
         ch = False
         i = 1
         first_pass3280 = True
         while True:
             if first_pass3280: first_pass3280 = False
             else: i += 1
             if (not (i < len(res))): break
             d = NumberingHelper.calc_delta(res[i - 1], res[i], False)
             if (res[i - 1].num_suffix == res[i].num_suffix): 
                 if (d == 1): 
                     continue
                 if (((d > 1 and (d < 20))) or ((d == 0 and res[i - 1].num_typ == res[i].num_typ and len(res[i - 1].numbers) == len(res[i].numbers)))): 
                     if (NumberingHelper.calc_delta(res[i], res[i - 1], False) > 0): 
                         if (res[i - 1].tag is not None and i > 2): 
                             del res[i:i+len(res) - i]
                             ch = True
                             i -= 1
                             continue
                     if ((i + 1) < len(res)): 
                         dd = NumberingHelper.calc_delta(res[i], res[i + 1], False)
                         if (dd == 1): 
                             if (res[i].last_number == 1 and len(res[i].numbers) == len(res[i - 1].numbers)): 
                                 pass
                             else: 
                                 continue
                         else: 
                             dd = NumberingHelper.calc_delta(res[i - 1], res[i + 1], False)
                             if (dd == 1): 
                                 del res[i]
                                 i -= 1
                                 ch = True
                                 continue
                     elif (d > 3): 
                         del res[i]
                         i -= 1
                         ch = True
                         continue
                     else: 
                         continue
             j = 0
             j = (i + 1)
             while j < len(res): 
                 dd = NumberingHelper.calc_delta(res[j - 1], res[j], False)
                 if (dd != 1 and dd != 2): 
                     break
                 if (res[j - 1].num_suffix != res[j].num_suffix): 
                     break
                 j += 1
             if ((d == 0 and NumberingHelper.calc_delta(res[i - 1], res[i], True) == 1 and res[i - 1].num_suffix is not None) and res[i].num_suffix == res[i - 1].num_suffix): 
                 d = 1
             if (d != 1 and j > (i + 1)): 
                 del res[i:i+j - i]
                 i -= 1
                 ch = True
                 continue
             if (d == 1): 
                 if ((i + 1) >= len(res)): 
                     continue
                 dd = NumberingHelper.calc_delta(res[i], res[i + 1], False)
                 if (dd == 1 and res[i - 1].num_suffix == res[i + 1].num_suffix): 
                     if (res[i].num_suffix != res[i - 1].num_suffix): 
                         res[i].num_suffix = res[i - 1].num_suffix
                         res[i].is_num_doubt = False
                         ch = True
                     continue
             if ((i + 1) < len(res)): 
                 dd = NumberingHelper.calc_delta(res[i - 1], res[i + 1], False)
                 if (dd == 1 and res[i - 1].num_suffix == res[i + 1].num_suffix): 
                     if (d == 1 and NumberingHelper.calc_delta(res[i], res[i + 1], True) == 1): 
                         pass
                     else: 
                         del res[i]
                         ch = True
                         continue
             elif (d == 0 or d > 10 or res[i - 1].num_suffix != res[i].num_suffix): 
                 del res[i]
                 ch = True
                 continue
     has_suf = 0
     for r in res: 
         if ((r.num_suffix is not None or r.typ_container_rank > 0 or len(r.numbers) > 1) or r.all_upper or r.num_typ == NumberTypes.ROMAN): 
             has_suf += 1
     if (has_suf == 0): 
         if (len(res) < 5): 
             return None
     if (len(res) >= 2): 
         if (res[0] != lines[0]): 
             tot = res[0].begin_token.begin_char - lines[0].begin_token.begin_char
             tot += (lines[len(lines) - 1].end_token.end_char - res[len(res) - 1].end_token.end_char)
             blk = res[len(res) - 1].end_token.end_char - res[0].begin_token.begin_char
             i = Utils.indexOfList(lines, res[len(res) - 1], 0)
             if (i > 0): 
                 lines1 = list(lines)
                 del lines1[0:0+i + 1]
                 res1 = NumberingHelper.extract_main_sequence(lines1, check_spec_texts, can_sub_numbers)
                 if (res1 is not None and len(res1) > 2): 
                     blk += (res1[len(res1) - 1].end_char - res1[0].begin_char)
             if ((blk * 3) < tot): 
                 if ((blk * 5) < tot): 
                     return None
                 for r in res: 
                     if (not r.all_upper and not r.has_changes): 
                         return None
         if (res[0].last_number == 1 and len(res[0].numbers) == 1): 
             res0 = list()
             res0.append(res[0])
             i = 0
             i = 1
             while i < len(res): 
                 j = 0
                 j = (i + 1)
                 while j < len(res): 
                     if (res[j].last_number == 1 and len(res[j].numbers) == 1): 
                         break
                     j += 1
                 if ((j - i) < 3): 
                     break
                 j -= 1
                 jj = 0
                 errs = 0
                 jj = (i + 1)
                 while jj < j: 
                     d = NumberingHelper.calc_delta(res[jj - 1], res[jj], False)
                     if (d == 1): 
                         pass
                     elif (d > 1 and (d < 3)): 
                         errs += 1
                     else: 
                         break
                     jj += 1
                 if ((jj < j) or errs > 1): 
                     break
                 if (j < (len(res) - 1)): 
                     if (NumberingHelper.calc_delta(res0[len(res0) - 1], res[j], False) != 1): 
                         break
                     res0.append(res[j])
                 i = j
                 i += 1
             if (i >= len(res) and len(res0) > 1): 
                 return res0
         if (len(res) > 500): 
             return None
         return res
     if (len(res) == 1 and lines[0] == res[0]): 
         if (has_suf > 0): 
             return res
         if (len(lines) > 1 and len(lines[1].numbers) == (len(lines[0].numbers) + 1)): 
             i = 0
             while i < len(lines[0].numbers): 
                 if (lines[1].numbers[i] != lines[0].numbers[i]): 
                     return None
                 i += 1
             return res
     return None
Ejemplo n.º 6
0
 def try_attach_territory(
         li: typing.List['TerrItemToken'],
         ad: 'AnalyzerData',
         attach_always: bool = False,
         cits: typing.List['CityItemToken'] = None,
         exists: typing.List['GeoReferent'] = None) -> 'ReferentToken':
     if (li is None or len(li) == 0):
         return None
     ex_obj = None
     new_name = None
     adj_list = list()
     noun = None
     add_noun = None
     rt = TerrAttachHelper.__try_attach_moscowao(li, ad)
     if (rt is not None):
         return rt
     if (li[0].termin_item is not None
             and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"):
         res2 = TerrAttachHelper.__try_attach_pure_terr(li, ad)
         return res2
     if (len(li) == 2):
         if (li[0].rzd is not None and li[1].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._add_name(li[1].rzd_dir)
             rzd._add_typ_ter(li[0].kit.base_language)
             rzd.add_slot(GeoReferent.ATTR_REF, li[0].rzd.referent, False,
                          0)
             rzd.add_ext_referent(li[0].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
         if (li[1].rzd is not None and li[0].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._add_name(li[0].rzd_dir)
             rzd._add_typ_ter(li[0].kit.base_language)
             rzd.add_slot(GeoReferent.ATTR_REF, li[1].rzd.referent, False,
                          0)
             rzd.add_ext_referent(li[1].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
     can_be_city_before = False
     adj_terr_before = False
     if (cits is not None):
         if (cits[0].typ == CityItemToken.ItemType.CITY):
             can_be_city_before = True
         elif (cits[0].typ == CityItemToken.ItemType.NOUN
               and len(cits) > 1):
             can_be_city_before = True
     k = 0
     k = 0
     while k < len(li):
         if (li[k].onto_item is not None):
             if (ex_obj is not None or new_name is not None):
                 break
             if (noun is not None):
                 if (k == 1):
                     if (noun.termin_item.canonic_text == "РАЙОН"
                             or noun.termin_item.canonic_text == "ОБЛАСТЬ"
                             or noun.termin_item.canonic_text == "СОЮЗ"):
                         if (isinstance(li[k].onto_item.referent,
                                        GeoReferent)):
                             if (li[k].onto_item.referent.is_state):
                                 break
                         ok = False
                         tt = li[k].end_token.next0_
                         if (tt is None):
                             ok = True
                         elif (tt.is_char_of(",.")):
                             ok = True
                         if (not ok):
                             ok = MiscLocationHelper.check_geo_object_before(
                                 li[0].begin_token)
                         if (not ok):
                             adr = AddressItemToken.try_parse(
                                 tt, None, False, False, None)
                             if (adr is not None):
                                 if (adr.typ ==
                                         AddressItemToken.ItemType.STREET):
                                     ok = True
                         if (not ok):
                             break
                     if (li[k].onto_item is not None):
                         if (noun.begin_token.is_value("МО", None)
                                 or noun.begin_token.is_value("ЛО", None)):
                             return None
             ex_obj = li[k]
         elif (li[k].termin_item is not None):
             if (noun is not None):
                 break
             if (li[k].termin_item.is_always_prefix and k > 0):
                 break
             if (k > 0 and li[k].is_doubt):
                 if (li[k].begin_token == li[k].end_token
                         and li[k].begin_token.is_value("ЗАО", None)):
                     break
             if (li[k].termin_item.is_adjective
                     or li[k].is_geo_in_dictionary):
                 adj_list.append(li[k])
             else:
                 if (ex_obj is not None):
                     geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                                 GeoReferent)
                     if (geo_ is None):
                         break
                     if (ex_obj.is_adjective and
                         ((li[k].termin_item.canonic_text == "СОЮЗ" or
                           li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                         str0_ = str(ex_obj.onto_item)
                         if (not li[k].termin_item.canonic_text in str0_):
                             return None
                     if (li[k].termin_item.canonic_text == "РАЙОН"
                             or li[k].termin_item.canonic_text == "ОКРУГ"
                             or li[k].termin_item.canonic_text == "КРАЙ"):
                         tmp = io.StringIO()
                         for s in geo_.slots:
                             if (s.type_name == GeoReferent.ATTR_TYPE):
                                 print("{0};".format(s.value),
                                       end="",
                                       file=tmp,
                                       flush=True)
                         if (not li[k].termin_item.canonic_text
                                 in Utils.toStringStringIO(tmp).upper()):
                             if (k != 1 or new_name is not None):
                                 break
                             new_name = li[0]
                             new_name.is_adjective = True
                             new_name.onto_item = (None)
                             ex_obj = (None)
                 noun = li[k]
                 if (k == 0):
                     tt = TerrItemToken.try_parse(
                         li[k].begin_token.previous, None, True, False,
                         None)
                     if (tt is not None and tt.morph.class0_.is_adjective):
                         adj_terr_before = True
         else:
             if (ex_obj is not None):
                 break
             if (new_name is not None):
                 break
             new_name = li[k]
         k += 1
     name = None
     alt_name = None
     full_name = None
     morph_ = None
     if (ex_obj is not None):
         if (ex_obj.is_adjective and not ex_obj.morph.language.is_en
                 and noun is None):
             if (attach_always and ex_obj.end_token.next0_ is not None):
                 npt = NounPhraseHelper.try_parse(ex_obj.begin_token,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (ex_obj.end_token.next0_.is_comma_and):
                     pass
                 elif (npt is None):
                     pass
                 else:
                     str0_ = StreetItemToken.try_parse(
                         ex_obj.end_token.next0_, None, False, None, False)
                     if (str0_ is not None):
                         if (str0_.typ == StreetItemType.NOUN
                                 and str0_.end_token == npt.end_token):
                             return None
             else:
                 cit = CityItemToken.try_parse(ex_obj.end_token.next0_,
                                               None, False, None)
                 if (cit is not None
                         and ((cit.typ == CityItemToken.ItemType.NOUN
                               or cit.typ == CityItemToken.ItemType.CITY))):
                     npt = NounPhraseHelper.try_parse(
                         ex_obj.begin_token, NounPhraseParseAttr.NO, 0,
                         None)
                     if (npt is not None
                             and npt.end_token == cit.end_token):
                         pass
                     else:
                         return None
                 elif (ex_obj.begin_token.is_value("ПОДНЕБЕСНЫЙ", None)):
                     pass
                 else:
                     return None
         if (noun is None and ex_obj.can_be_city):
             cit0 = CityItemToken.try_parse_back(
                 ex_obj.begin_token.previous)
             if (cit0 is not None
                     and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                 return None
         if (ex_obj.is_doubt and noun is None):
             ok2 = False
             if (TerrAttachHelper.__can_be_geo_after(
                     ex_obj.end_token.next0_)):
                 ok2 = True
             elif (not ex_obj.can_be_surname and not ex_obj.can_be_city):
                 if ((ex_obj.end_token.next0_ is not None
                      and ex_obj.end_token.next0_.is_char(')')
                      and ex_obj.begin_token.previous is not None)
                         and ex_obj.begin_token.previous.is_char('(')):
                     ok2 = True
                 elif (ex_obj.chars.is_latin_letter
                       and ex_obj.begin_token.previous is not None):
                     if (ex_obj.begin_token.previous.is_value("IN", None)):
                         ok2 = True
                     elif (ex_obj.begin_token.previous.is_value(
                             "THE", None) and
                           ex_obj.begin_token.previous.previous is not None
                           and
                           ex_obj.begin_token.previous.previous.is_value(
                               "IN", None)):
                         ok2 = True
             if (not ok2):
                 cit0 = CityItemToken.try_parse_back(
                     ex_obj.begin_token.previous)
                 if (cit0 is not None
                         and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                     pass
                 elif (MiscLocationHelper.check_geo_object_before(
                         ex_obj.begin_token.previous)):
                     pass
                 else:
                     return None
         name = ex_obj.onto_item.canonic_text
         morph_ = ex_obj.morph
     elif (new_name is not None):
         if (noun is None):
             return None
         j = 1
         while j < k:
             if (li[j].is_newline_before and not li[0].is_newline_before):
                 if (BracketHelper.can_be_start_of_sequence(
                         li[j].begin_token, False, False)):
                     pass
                 else:
                     return None
             j += 1
         morph_ = noun.morph
         if (new_name.is_adjective):
             if (noun.termin_item.acronym == "АО"):
                 if (noun.begin_token != noun.end_token):
                     return None
                 if (new_name.morph.gender != MorphGender.FEMINIE):
                     return None
             geo_before = None
             tt0 = li[0].begin_token.previous
             if (tt0 is not None and tt0.is_comma_and):
                 tt0 = tt0.previous
             if (not li[0].is_newline_before and tt0 is not None):
                 geo_before = (Utils.asObjectOrNull(tt0.get_referent(),
                                                    GeoReferent))
             if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList(
                     li, new_name, 0)):
                 if (noun.termin_item.is_state):
                     return None
                 if (new_name.can_be_surname and geo_before is None):
                     if (((noun.morph.case_)
                          & new_name.morph.case_).is_undefined):
                         return None
                 if (MiscHelper.is_exists_in_dictionary(
                         new_name.begin_token, new_name.end_token,
                     (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                         | MorphClass.VERB)):
                     if (noun.begin_token != new_name.begin_token):
                         if (geo_before is None):
                             if (len(li) == 2 and
                                     TerrAttachHelper.__can_be_geo_after(
                                         li[1].end_token.next0_)):
                                 pass
                             elif (len(li) == 3
                                   and li[2].termin_item is not None
                                   and TerrAttachHelper.__can_be_geo_after(
                                       li[2].end_token.next0_)):
                                 pass
                             elif (new_name.is_geo_in_dictionary):
                                 pass
                             elif (new_name.end_token.is_newline_after):
                                 pass
                             else:
                                 return None
                 npt = NounPhraseHelper.try_parse(
                     new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS,
                     0, None)
                 if (npt is not None
                         and npt.end_token != new_name.end_token):
                     if (len(li) >= 3 and li[2].termin_item is not None
                             and npt.end_token == li[2].end_token):
                         add_noun = li[2]
                     else:
                         return None
                 rtp = new_name.kit.process_referent(
                     "PERSON", new_name.begin_token)
                 if (rtp is not None):
                     return None
                 name = ProperNameHelper.get_name_ex(
                     new_name.begin_token, new_name.end_token,
                     MorphClass.ADJECTIVE, MorphCase.UNDEFINED,
                     noun.termin_item.gender, False, False)
             else:
                 ok = False
                 if (((k + 1) < len(li)) and li[k].termin_item is None
                         and li[k + 1].termin_item is not None):
                     ok = True
                 elif ((k < len(li)) and li[k].onto_item is not None):
                     ok = True
                 elif (k == len(li) and not new_name.is_adj_in_dictionary):
                     ok = True
                 elif (MiscLocationHelper.check_geo_object_before(
                         li[0].begin_token) or can_be_city_before):
                     ok = True
                 elif (MiscLocationHelper.check_geo_object_after(
                         li[k - 1].end_token, False)):
                     ok = True
                 elif (len(li) == 3 and k == 2):
                     cit = CityItemToken.try_parse(li[2].begin_token, None,
                                                   False, None)
                     if (cit is not None):
                         if (cit.typ == CityItemToken.ItemType.CITY
                                 or cit.typ == CityItemToken.ItemType.NOUN):
                             ok = True
                 elif (len(li) == 2):
                     ok = TerrAttachHelper.__can_be_geo_after(
                         li[len(li) - 1].end_token.next0_)
                 if (not ok and not li[0].is_newline_before
                         and not li[0].chars.is_all_lower):
                     rt00 = li[0].kit.process_referent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt00 is not None):
                         ok = True
                 if (noun.termin_item is not None
                         and noun.termin_item.is_strong
                         and new_name.is_adjective):
                     ok = True
                 if (noun.is_doubt and len(adj_list) == 0
                         and geo_before is None):
                     return None
                 name = ProperNameHelper.get_name_ex(
                     new_name.begin_token, new_name.end_token,
                     MorphClass.ADJECTIVE, MorphCase.UNDEFINED,
                     noun.termin_item.gender, False, False)
                 if (not ok and not attach_always):
                     if (MiscHelper.is_exists_in_dictionary(
                             new_name.begin_token, new_name.end_token,
                         (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                             | MorphClass.VERB)):
                         if (exists is not None):
                             for e0_ in exists:
                                 if (e0_.find_slot(GeoReferent.ATTR_NAME,
                                                   name, True) is not None):
                                     ok = True
                                     break
                         if (not ok):
                             return None
                 full_name = "{0} {1}".format(
                     ProperNameHelper.get_name_ex(li[0].begin_token,
                                                  noun.begin_token.previous,
                                                  MorphClass.ADJECTIVE,
                                                  MorphCase.UNDEFINED,
                                                  noun.termin_item.gender,
                                                  False, False),
                     noun.termin_item.canonic_text)
         else:
             if (not attach_always or
                 ((noun.termin_item is not None
                   and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                 is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter
                 if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList(
                         li, new_name, 0)):
                     if (not is_latin):
                         return None
                 if (not new_name.is_district_name
                         and not BracketHelper.can_be_start_of_sequence(
                             new_name.begin_token, False, False)):
                     if (len(adj_list) == 0
                             and MiscHelper.is_exists_in_dictionary(
                                 new_name.begin_token, new_name.end_token,
                                 (MorphClass.NOUN) | MorphClass.PRONOUN)):
                         if (len(li) == 2 and noun.is_city_region
                                 and (noun.whitespaces_after_count < 2)):
                             pass
                         else:
                             return None
                     if (not is_latin):
                         if ((noun.termin_item.is_region
                              and not attach_always and
                              ((not adj_terr_before or new_name.is_doubt)))
                                 and not noun.is_city_region and
                                 not noun.termin_item.is_specific_prefix):
                             if (not MiscLocationHelper.
                                     check_geo_object_before(
                                         noun.begin_token)):
                                 if (not noun.is_doubt and noun.begin_token
                                         != noun.end_token):
                                     pass
                                 elif ((noun.termin_item.is_always_prefix
                                        and len(li) == 2 and li[0] == noun)
                                       and li[1] == new_name):
                                     pass
                                 else:
                                     return None
                         if (noun.is_doubt and len(adj_list) == 0):
                             if (noun.termin_item.acronym == "МО"
                                     or noun.termin_item.acronym == "ЛО"):
                                 if (k == (len(li) - 1)
                                         and li[k].termin_item is not None):
                                     add_noun = li[k]
                                     k += 1
                                 elif (len(li) == 2 and noun == li[0]
                                       and str(new_name).endswith("совет")):
                                     pass
                                 else:
                                     return None
                             else:
                                 return None
                         pers = new_name.kit.process_referent(
                             "PERSON", new_name.begin_token)
                         if (pers is not None):
                             return None
             name = MiscHelper.get_text_value(new_name.begin_token,
                                              new_name.end_token,
                                              GetTextAttr.NO)
             if (new_name.begin_token != new_name.end_token):
                 ttt = new_name.begin_token.next0_
                 while ttt is not None and ttt.end_char <= new_name.end_char:
                     if (ttt.chars.is_letter):
                         ty = TerrItemToken.try_parse(
                             ttt, None, False, False, None)
                         if ((ty is not None and ty.termin_item is not None
                              and noun is not None)
                                 and ((noun.termin_item.canonic_text
                                       in ty.termin_item.canonic_text
                                       or ty.termin_item.canonic_text
                                       in noun.termin_item.canonic_text))):
                             name = MiscHelper.get_text_value(
                                 new_name.begin_token, ttt.previous,
                                 GetTextAttr.NO)
                             break
                     ttt = ttt.next0_
             if (len(adj_list) > 0):
                 npt = NounPhraseHelper.try_parse(adj_list[0].begin_token,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.end_token == noun.end_token):
                     alt_name = "{0} {1}".format(
                         npt.get_normal_case_text(None,
                                                  MorphNumber.UNDEFINED,
                                                  MorphGender.UNDEFINED,
                                                  False), name)
     else:
         if ((len(li) == 1 and noun is not None
              and noun.end_token.next0_ is not None) and (isinstance(
                  noun.end_token.next0_.get_referent(), GeoReferent))):
             g = Utils.asObjectOrNull(noun.end_token.next0_.get_referent(),
                                      GeoReferent)
             if (noun.termin_item is not None):
                 tyy = noun.termin_item.canonic_text.lower()
                 ooo = False
                 if (g.find_slot(GeoReferent.ATTR_TYPE, tyy, True)
                         is not None):
                     ooo = True
                 elif (tyy.endswith("район") and g.find_slot(
                         GeoReferent.ATTR_TYPE, "район", True) is not None):
                     ooo = True
                 if (ooo):
                     return ReferentToken._new734(g, noun.begin_token,
                                                  noun.end_token.next0_,
                                                  noun.begin_token.morph)
         if ((len(li) == 1 and noun == li[0]
              and li[0].termin_item is not None)
                 and TerrItemToken.try_parse(li[0].end_token.next0_, None,
                                             True, False, None) is None and
                 TerrItemToken.try_parse(li[0].begin_token.previous, None,
                                         True, False, None) is None):
             if (li[0].morph.number == MorphNumber.PLURAL):
                 return None
             cou = 0
             str0_ = li[0].termin_item.canonic_text.lower()
             tt = li[0].begin_token.previous
             first_pass3158 = True
             while True:
                 if first_pass3158: first_pass3158 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after):
                     cou += 10
                 else:
                     cou += 1
                 if (cou > 500):
                     break
                 g = Utils.asObjectOrNull(tt.get_referent(), GeoReferent)
                 if (g is None):
                     continue
                 ok = True
                 cou = 0
                 tt = li[0].end_token.next0_
                 first_pass3159 = True
                 while True:
                     if first_pass3159: first_pass3159 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_newline_before):
                         cou += 10
                     else:
                         cou += 1
                     if (cou > 500):
                         break
                     tee = TerrItemToken.try_parse(tt, None, True, False,
                                                   None)
                     if (tee is None):
                         continue
                     ok = False
                     break
                 if (ok):
                     ii = 0
                     while g is not None and (ii < 3):
                         if (g.find_slot(GeoReferent.ATTR_TYPE, str0_, True)
                                 is not None):
                             return ReferentToken._new734(
                                 g, li[0].begin_token, li[0].end_token,
                                 noun.begin_token.morph)
                         g = g.higher
                         ii += 1
                 break
         return None
     ter = None
     if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))):
         ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent))
     else:
         ter = GeoReferent()
         if (ex_obj is not None):
             geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                         GeoReferent)
             if (geo_ is not None and not geo_.is_city):
                 ter._merge_slots2(geo_, li[0].kit.base_language)
             else:
                 ter._add_name(name)
             if (noun is None and ex_obj.can_be_city):
                 ter._add_typ_city(li[0].kit.base_language)
             else:
                 pass
         elif (new_name is not None):
             ter._add_name(name)
             if (alt_name is not None):
                 ter._add_name(alt_name)
         if (noun is not None):
             if (noun.termin_item.canonic_text == "АО"):
                 ter._add_typ(
                     ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua
                      else "АВТОНОМНЫЙ ОКРУГ"))
             elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ"
                   or noun.termin_item.canonic_text
                   == "МУНІЦИПАЛЬНЕ ЗБОРИ"):
                 ter._add_typ(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ"
                               if li[0].kit.base_language.is_ua else
                               "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"))
             elif (noun.termin_item.acronym == "МО"
                   and add_noun is not None):
                 ter._add_typ(add_noun.termin_item.canonic_text)
             else:
                 if (noun.termin_item.canonic_text == "СОЮЗ"
                         and ex_obj is not None
                         and ex_obj.end_char > noun.end_char):
                     return ReferentToken._new734(ter, ex_obj.begin_token,
                                                  ex_obj.end_token,
                                                  ex_obj.morph)
                 ter._add_typ(noun.termin_item.canonic_text)
                 if (noun.termin_item.is_region and ter.is_state):
                     ter._add_typ_reg(li[0].kit.base_language)
         if (ter.is_state and ter.is_region):
             for a in adj_list:
                 if (a.termin_item.is_region):
                     ter._add_typ_reg(li[0].kit.base_language)
                     break
         if (ter.is_state):
             if (full_name is not None):
                 ter._add_name(full_name)
     res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token)
     if (noun is not None and noun.morph.class0_.is_noun):
         res.morph = noun.morph
     else:
         res.morph = MorphCollection()
         ii = 0
         while ii < k:
             for v in li[ii].morph.items:
                 bi = MorphBaseInfo()
                 bi.copy_from(v)
                 if (noun is not None):
                     if (bi.class0_.is_adjective):
                         bi.class0_ = MorphClass.NOUN
                 res.morph.add_item(bi)
             ii += 1
     if (li[0].termin_item is not None
             and li[0].termin_item.is_specific_prefix):
         res.begin_token = li[0].end_token.next0_
     if (add_noun is not None and add_noun.end_char > res.end_char):
         res.end_token = add_noun.end_token
     if ((isinstance(res.begin_token.previous, TextToken))
             and (res.whitespaces_before_count < 2)):
         tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken)
         if (tt.term == "АР"):
             for ty in ter.typs:
                 if ("республика" in ty or "республіка" in ty):
                     res.begin_token = tt
                     break
     return res
Ejemplo n.º 7
0
 def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken':
     if (sli is None or len(sli) == 0): 
         return None
     i = 0
     while i < len(sli): 
         if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): 
             return StreetDefineHelper.__tryParseFix(sli)
         elif (sli[i].typ == StreetItemType.NOUN): 
             if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): 
                 sli[i + 1].begin_token = sli[i].begin_token
                 del sli[i]
             if (sli[i].termin.canonic_text == "МЕТРО"): 
                 if ((i + 1) < len(sli)): 
                     sli1 = list()
                     ii = i + 1
                     while ii < len(sli): 
                         sli1.append(sli[ii])
                         ii += 1
                     str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True)
                     if (str1 is not None): 
                         str1.begin_token = sli[i].begin_token
                         str1.is_doubt = sli[i].is_abridge
                         if (sli[i + 1].is_in_brackets): 
                             str1.is_doubt = False
                         return str1
                 elif (i == 1 and sli[0].typ == StreetItemType.NAME): 
                     for_metro = True
                     break
                 if (i == 0 and len(sli) > 0): 
                     for_metro = True
                     break
                 return None
             if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): 
                 stri0 = StreetReferent()
                 stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0)
                 stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0)
                 return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True)
             if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): 
                 stri0 = StreetReferent()
                 stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0)
                 return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True)
             if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): 
                 tt = sli[i].end_token.next0_
                 if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): 
                     tt = tt.next0_
                 nex = NumberHelper.tryParseNumberWithPostfix(tt)
                 if (nex is not None): 
                     return None
             break
         i += 1
     if (i >= len(sli)): 
         return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro)
     name = None
     number = None
     age = None
     adj = None
     noun = sli[i]
     alt_noun = None
     is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")
     before = 0
     after = 0
     j = 0
     while j < i: 
         if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): 
             before += 1
         elif (sli[j].typ == StreetItemType.NUMBER): 
             if (sli[j].is_newline_after): 
                 return None
             if (sli[j].number.morph.class0_.is_adjective): 
                 before += 1
             elif (is_micro_raion): 
                 before += 1
             elif (sli[i].number_has_prefix): 
                 before += 1
         else: 
             before += 1
         j += 1
     j = (i + 1)
     while j < len(sli): 
         if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): 
             after += 1
         elif (sli[j].typ == StreetItemType.NUMBER): 
             if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): 
                 after += 1
             elif (is_micro_raion): 
                 after += 1
             elif (sli[j].number_has_prefix): 
                 after += 1
             elif (ext_onto_regim): 
                 after += 1
         elif (sli[j].typ == StreetItemType.NOUN): 
             break
         else: 
             after += 1
         j += 1
     rli = list()
     if (before > after): 
         if (noun.termin.canonic_text == "МЕТРО"): 
             return None
         tt = sli[0].begin_token
         if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): 
             if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): 
                 if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): 
                     ok = False
                     if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): 
                         ok = True
                     elif (noun.end_token.next0_ is None): 
                         ok = True
                     elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): 
                         ok = True
                     if (not ok): 
                         if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): 
                             ok = True
                     if (not ok): 
                         return None
         n0 = 0
         n1 = (i - 1)
     elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): 
         if (not sli[0].is_whitespace_after): 
             return None
         number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value))
         if (sli[0].is_number_km): 
             number += "км"
         n0 = (i + 1)
         n1 = (len(sli) - 1)
         rli.append(sli[0])
         rli.append(sli[i])
     elif (after > before): 
         n0 = (i + 1)
         n1 = (len(sli) - 1)
         rli.append(sli[i])
     elif (after == 0): 
         return None
     elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): 
         n0 = 0
         n1 = 0
         num = False
         tt2 = sli[2].end_token.next0_
         if (sli[2].is_number_km): 
             num = True
         elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): 
             sli[2].is_number_km = True
             num = True
         elif (sli[2].begin_token.previous.is_comma): 
             pass
         elif (sli[2].begin_token != sli[2].end_token): 
             num = True
         elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): 
             num = True
         elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): 
             if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): 
                 num = True
         if (num): 
             number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value))
             if (sli[2].is_number_km): 
                 number += "км"
             rli.append(sli[2])
         else: 
             del sli[2:2+len(sli) - 2]
     else: 
         return None
     sec_number = None
     j = n0
     first_pass2732 = True
     while True:
         if first_pass2732: first_pass2732 = False
         else: j += 1
         if (not (j <= n1)): break
         if (sli[j].typ == StreetItemType.NUMBER): 
             if (age is not None or ((sli[j].is_newline_before and j > 0))): 
                 break
             if (number is not None): 
                 if (name is not None and name.typ == StreetItemType.STDNAME): 
                     sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
                     if (sli[j].is_number_km): 
                         sec_number += "км"
                     rli.append(sli[j])
                     continue
                 if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): 
                     sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
                     if (sli[j].is_number_km): 
                         sec_number += "км"
                     rli.append(sli[j])
                     continue
                 break
             if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): 
                 if (sli[j].whitespaces_before_count > 2 and j > 0): 
                     break
                 if (sli[j].number is not None and sli[j].number.int_value > 20): 
                     if (j > n0): 
                         if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): 
                             pass
                         else: 
                             break
                 if (j == n0 and n0 > 0): 
                     pass
                 elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): 
                     pass
                 elif (sli[j].number_has_prefix): 
                     pass
                 elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): 
                     pass
                 else: 
                     break
             number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value))
             if (sli[j].is_number_km): 
                 number += "км"
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.AGE): 
             if (number is not None or age is not None): 
                 break
             age = str(sli[j].number.int_value)
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.STDADJECTIVE): 
             if (adj is not None): 
                 return None
             adj = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): 
             if (name is not None): 
                 if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): 
                     break
                 elif (i < j): 
                     break
                 else: 
                     return None
             name = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): 
             if (name is not None): 
                 break
             name = sli[j]
             rli.append(sli[j])
         elif (sli[j].typ == StreetItemType.NOUN): 
             if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): 
                 alt_noun = noun
                 noun = sli[j]
                 rli.append(sli[j])
             else: 
                 break
     if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): 
         number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value))
         rli.append(sli[i + 1])
     elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): 
         alt_noun = noun
         noun = sli[j]
         rli.append(sli[j])
     if (name is None): 
         if (number is None and adj is None): 
             return None
         if (noun.is_abridge): 
             if (is_micro_raion): 
                 pass
             elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): 
                 pass
             elif (adj is None or adj.is_abridge): 
                 return None
         if (adj is not None and adj.is_abridge): 
             return None
     if (not sli[i] in rli): 
         rli.append(sli[i])
     street = StreetReferent()
     if (not for_metro): 
         street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0)
         if (noun.alt_termin is not None): 
             if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): 
                 pass
             else: 
                 street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0)
     else: 
         street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0)
     res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street)
     for r in rli: 
         if (res.begin_char > r.begin_char): 
             res.begin_token = r.begin_token
         if (res.end_char < r.end_char): 
             res.end_token = r.end_token
     if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): 
         rli.remove(noun)
     if (noun.is_abridge and (noun.length_char < 4)): 
         res.is_doubt = True
     elif (noun.noun_is_doubt_coef > 0): 
         res.is_doubt = True
         if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): 
             npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0)
             if (npt2 is not None and npt2.end_char > name.end_char): 
                 pass
             elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): 
                 res.is_doubt = False
             elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): 
                 res.is_doubt = False
     name_base = io.StringIO()
     name_alt = io.StringIO()
     name_alt2 = None
     gen = noun.termin.gender
     adj_gen = MorphGender.UNDEFINED
     if (number is not None): 
         street.number = number
         if (sec_number is not None): 
             street.sec_number = sec_number
     if (age is not None): 
         if (street.number is None): 
             street.number = age
         else: 
             street.sec_number = age
     if (name is not None and name.value is not None): 
         if (street.kind == StreetKind.ROAD): 
             for r in rli: 
                 if (r.typ == StreetItemType.NAME and r != name): 
                     print(r.value, end="", file=name_alt)
                     break
         if (name.alt_value is not None and name_alt.tell() == 0): 
             print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True)
         print(" {0}".format(name.value), end="", file=name_base, flush=True)
     elif (name is not None): 
         is_adj = False
         if (isinstance(name.end_token, TextToken)): 
             for wf in name.end_token.morph.items: 
                 if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): 
                     is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo)
                     adj_gen = wf.gender
                     break
                 elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): 
                     is_adj = True
         if (is_adj): 
             tmp = io.StringIO()
             vars0_ = list()
             t = name.begin_token
             while t is not None: 
                 tt = Utils.asObjectOrNull(t, TextToken)
                 if (tt is None): 
                     break
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 if (t == name.end_token): 
                     is_padez = False
                     if (not noun.is_abridge): 
                         if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): 
                             is_padez = True
                         elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): 
                             is_padez = True
                     if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): 
                         is_padez = True
                     if (not is_padez): 
                         print(tt.term, end="", file=tmp)
                         break
                     for wf in tt.morph.items: 
                         if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): 
                             if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): 
                                 wff = Utils.asObjectOrNull(wf, MorphWordForm)
                                 if (wff is None): 
                                     continue
                                 if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): 
                                     continue
                                 if (not wff.normal_case in vars0_): 
                                     vars0_.append(wff.normal_case)
                     if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): 
                         vars0_.append(tt.term)
                     if (len(vars0_) == 0): 
                         vars0_.append(tt.term)
                     break
                 if (not tt.is_hiphen): 
                     print(tt.term, end="", file=tmp)
                 t = t.next0_
             if (len(vars0_) == 0): 
                 print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True)
             else: 
                 head = Utils.toStringStringIO(name_base)
                 print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True)
                 if (len(vars0_) > 1): 
                     Utils.setLengthStringIO(name_alt, 0)
                     print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True)
                 if (len(vars0_) > 2): 
                     name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2])
         else: 
             str_nam = None
             nits = list()
             has_adj = False
             has_proper_name = False
             t = name.begin_token
             while t is not None: 
                 if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): 
                     has_adj = True
                 if ((isinstance(t, TextToken)) and not t.is_hiphen): 
                     if (name.termin is not None): 
                         nits.append(name.termin.canonic_text)
                         break
                     elif (not t.chars.is_letter and len(nits) > 0): 
                         nits[len(nits) - 1] += (t).term
                     else: 
                         nits.append((t).term)
                         if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): 
                             has_proper_name = True
                 elif ((isinstance(t, ReferentToken)) and name.termin is None): 
                     nits.append(t.getSourceText().upper())
                 if (t == name.end_token): 
                     break
                 t = t.next0_
             if (not has_adj and not has_proper_name): 
                 nits.sort()
             str_nam = Utils.joinStrings(" ", list(nits))
             if (has_proper_name and len(nits) == 2): 
                 Utils.setLengthStringIO(name_alt, 0)
                 print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True)
             print(" {0}".format(str_nam), end="", file=name_base, flush=True)
     adj_str = None
     adj_can_be_initial = False
     if (adj is not None): 
         if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): 
             if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): 
                 adj_gen = name.morph.gender
         if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL))
         elif (adj_gen != MorphGender.UNDEFINED): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen))
         elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender))
         else: 
             s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen))
         adj_str = s
         if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): 
             if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): 
                 adj_can_be_initial = True
     s1 = Utils.toStringStringIO(name_base).strip()
     s2 = Utils.toStringStringIO(name_alt).strip()
     if (len(s1) < 3): 
         if (street.number is not None): 
             if (adj_str is not None): 
                 if (adj.is_abridge): 
                     return None
                 street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0)
         elif (adj_str is None): 
             if (len(s1) < 1): 
                 return None
             if (is_micro_raion): 
                 street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
                 if (not Utils.isNullOrEmpty(s2)): 
                     street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0)
             else: 
                 return None
         else: 
             if (adj.is_abridge): 
                 return None
             street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0)
     elif (adj_can_be_initial): 
         street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
         street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0)
         street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     elif (adj_str is None): 
         street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
     else: 
         street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     if (name_alt.tell() > 0): 
         s1 = Utils.toStringStringIO(name_alt).strip()
         if (adj_str is None): 
             street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0)
         else: 
             street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0)
     if (name_alt2 is not None): 
         if (adj_str is None): 
             if (for_metro and noun is not None): 
                 street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0)
             else: 
                 street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0)
         else: 
             street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0)
     if (name is not None and name.alt_value2 is not None): 
         street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0)
     if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): 
         for n in name.exist_street.names: 
             street.addSlot(StreetReferent.ATTR_NAME, n, False, 0)
     if (alt_noun is not None and not for_metro): 
         street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0)
     if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): 
         res.is_doubt = True
         if (name is not None and name.is_in_dictionary): 
             res.is_doubt = False
         elif (alt_noun is not None or for_metro): 
             res.is_doubt = False
         elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): 
             if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): 
                 res.is_doubt = False
     if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): 
         for s in street.slots: 
             if (s.type_name == StreetReferent.ATTR_TYP): 
                 street.uploadSlot(s, "микрорайон")
             elif (s.type_name == StreetReferent.ATTR_NAME): 
                 street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value))
         if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): 
             street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0)
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma): 
         t1 = t1.next0_
     non = StreetItemToken.tryParse(t1, None, False, None, False)
     if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): 
         if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): 
             street._correct()
             nams = street.names
             for t in street.typs: 
                 for n in nams: 
                     street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0)
             street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0)
             res.end_token = non.end_token
     if (res.is_doubt): 
         if (noun.is_road): 
             if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): 
                 res.is_doubt = False
             elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): 
                 res.is_doubt = False
             elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): 
                 res.is_doubt = False
         elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): 
             res.is_doubt = False
         tt0 = res.begin_token.previous
         first_pass2733 = True
         while True:
             if first_pass2733: first_pass2733 = False
             else: tt0 = tt0.previous
             if (not (tt0 is not None)): break
             if (tt0.isCharOf(",,") or tt0.is_comma_and): 
                 continue
             str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent)
             if (str0 is not None): 
                 res.is_doubt = False
             break
     if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): 
         ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None)
         if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): 
             street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0)
             res.end_token = ait.end_token
     return res
Ejemplo n.º 8
0
 def get_vars(self, key: str) -> typing.List['MorphRuleVariant']:
     i = Utils.indexOfList(self.tails, key, 0)
     if (i >= 0):
         return self.morph_vars[i]
     return None
Ejemplo n.º 9
0
 def contains_var(self, tail: str) -> bool:
     return Utils.indexOfList(self.tails, tail, 0) >= 0
Ejemplo n.º 10
0
 def analyze(res : 'FragToken') -> None:
     if (res.number == 4): 
         pass
     if (len(res.children) == 0): 
         ki = res.kind
         if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): 
             tmp = list()
             tmp.append(res)
             ListHelper.__analize_list_items(tmp, 0)
         return
     if (res.kind == InstrumentKind.CLAUSE and res.number == 12): 
         pass
     i = 0
     first_pass3273 = True
     while True:
         if first_pass3273: first_pass3273 = False
         else: i += 1
         if (not (i < len(res.children))): break
         if (res.children[i].kind == InstrumentKind.INDENTION and ((res.children[i].end_token.is_char_of(":;") or ((((i + 1) < len(res.children)) and res.children[i + 1].kind == InstrumentKind.EDITIONS and res.children[i + 1].end_token.is_char_of(":;")))))): 
             j = 0
             cou = 1
             list_bullet = chr(0)
             j = (i + 1)
             first_pass3274 = True
             while True:
                 if first_pass3274: first_pass3274 = False
                 else: j += 1
                 if (not (j < len(res.children))): break
                 ch = res.children[j]
                 if (ch.kind == InstrumentKind.COMMENT or ch.kind == InstrumentKind.EDITIONS): 
                     continue
                 if (ch.kind != InstrumentKind.INDENTION): 
                     break
                 if (ch.end_token.is_char_of(";") or ((((j + 1) < len(res.children)) and res.children[j + 1].kind == InstrumentKind.EDITIONS and res.children[j + 1].end_token.is_char(';')))): 
                     cou += 1
                     if ((isinstance(ch.begin_token, TextToken)) and not ch.chars.is_letter): 
                         list_bullet = ch.kit.get_text_character(ch.begin_char)
                     continue
                 if (ch.end_token.is_char_of(".")): 
                     cou += 1
                     j += 1
                     break
                 if (ch.end_token.is_char_of(":")): 
                     if ((ord(list_bullet)) != 0 and ch.begin_token.is_char(list_bullet)): 
                         tt = ch.begin_token.next0_
                         while tt is not None and (tt.end_char < ch.end_char): 
                             if (tt.previous.is_char('.') and MiscHelper.can_be_start_of_sentence(tt)): 
                                 ch2 = FragToken._new1357(tt, ch.end_token, InstrumentKind.INDENTION, ch.number)
                                 ch.end_token = tt.previous
                                 res.children.insert(j + 1, ch2)
                                 k = j + 1
                                 while k < len(res.children): 
                                     if (res.children[k].kind == InstrumentKind.INDENTION): 
                                         res.children[k].number += 1
                                     k += 1
                                 cou += 1
                                 j += 1
                                 break
                             tt = tt.next0_
                     break
                 cou += 1
                 j += 1
                 break
             if (cou < 3): 
                 i = j
                 continue
             if ((i > 0 and not res.children[i].end_token.is_char(':') and res.children[i - 1].kind2 == InstrumentKind.UNDEFINED) and res.children[i - 1].end_token.is_char(':')): 
                 res.children[i - 1].kind2 = InstrumentKind.LISTHEAD
             first_pass3275 = True
             while True:
                 if first_pass3275: first_pass3275 = False
                 else: i += 1
                 if (not (i < j)): break
                 ch = res.children[i]
                 if (ch.kind != InstrumentKind.INDENTION): 
                     continue
                 if (ch.end_token.is_char(':')): 
                     ch.kind2 = InstrumentKind.LISTHEAD
                 elif (((i + 1) < j) and res.children[i + 1].kind == InstrumentKind.EDITIONS and res.children[i + 1].end_token.is_char(':')): 
                     ch.kind2 = InstrumentKind.LISTHEAD
                 else: 
                     ch.kind2 = InstrumentKind.LISTITEM
     changed = list()
     i = 0
     while i < len(res.children): 
         if (res.number == 7): 
             pass
         if (len(res.children[i].children) > 0): 
             ListHelper.analyze(res.children[i])
         else: 
             co = ListHelper.__analize_list_items(res.children, i)
             if (co > 0): 
                 changed.append(res.children[i])
                 if (co > 1): 
                     del res.children[i + 1:i + 1+co - 1]
                 i += (co - 1)
         i += 1
     for i in range(len(changed) - 1, -1, -1):
         if (changed[i].kind == InstrumentKind.CONTENT): 
             j = Utils.indexOfList(res.children, changed[i], 0)
             if (j < 0): 
                 continue
             del res.children[j]
             res.children[j:j] = changed[i].children