Beispiel #1
0
 def get_normal_case_text(self,
                          mc: 'MorphClass' = None,
                          num: 'MorphNumber' = MorphNumber.UNDEFINED,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
     if ((isinstance(self.begin_token, ReferentToken))
             and self.begin_token == self.end_token):
         return self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     res = None
     max_coef = 0
     def_coef = -1
     for it in self.morph.items:
         v = Utils.asObjectOrNull(it, NounPhraseItemTextVar)
         if (v is None):
             continue
         if (v.undef_coef > 0
                 and (((v.undef_coef < max_coef) or def_coef >= 0))):
             continue
         if (num == MorphNumber.SINGULAR
                 and v.single_number_value is not None):
             if (mc is not None and ((gender == MorphGender.NEUTER
                                      or gender == MorphGender.FEMINIE))
                     and mc.is_adjective):
                 bi = MorphBaseInfo._new401(MorphClass._new53(mc.value),
                                            gender, MorphNumber.SINGULAR,
                                            MorphCase.NOMINATIVE,
                                            self.morph.language)
                 str0_ = MorphologyService.get_wordform(
                     v.single_number_value, bi)
                 if (str0_ is not None):
                     res = str0_
             else:
                 res = v.single_number_value
             if (v.undef_coef == 0):
                 break
             max_coef = v.undef_coef
             continue
         if (Utils.isNullOrEmpty(v.normal_value)):
             continue
         if (str.isdigit(v.normal_value[0]) and mc is not None
                 and mc.is_adjective):
             val = 0
             wrapval402 = RefOutArgWrapper(0)
             inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402)
             val = wrapval402.value
             if (inoutres403):
                 str0_ = NumberHelper.get_number_adjective(
                     val, gender,
                     (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR
                      or val == 1 else MorphNumber.PLURAL))
                 if (str0_ is not None):
                     res = str0_
                     if (v.undef_coef == 0):
                         break
                     max_coef = v.undef_coef
                     continue
         res1 = it.normal_value
         if (num == MorphNumber.SINGULAR):
             if (res1 == "ДЕТИ"):
                 res1 = "РЕБЕНОК"
             elif (res1 == "ЛЮДИ"):
                 res1 = "ЧЕЛОВЕК"
         max_coef = v.undef_coef
         if (v.undef_coef > 0):
             res = res1
             continue
         def_co = 0
         if (mc is not None and mc.is_adjective and v.undef_coef == 0):
             pass
         elif (
             ((isinstance(self.begin_token, TextToken))
              and res1 == self.begin_token.term and it.case_.is_nominative)
                 and it.number == MorphNumber.SINGULAR):
             def_co = 1
         if (num == MorphNumber.PLURAL and
             ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)):
             def_co += 3
         if (res is None or def_co > def_coef):
             res = res1
             def_coef = def_co
             if (def_co > 0):
                 break
     if (res is not None):
         return self.__corr_chars(res, keep_chars)
     if (res is None and self.begin_token == self.end_token):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     elif (res is None):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
         if (res is None):
             res = MiscHelper.get_text_value_of_meta_token(
                 self, (GetTextAttr.KEEPREGISTER
                        if keep_chars else GetTextAttr.NO))
         else:
             res = "{0} {1}".format(
                 res,
                 MiscHelper.get_text_value(
                     self.begin_token.next0_, self.end_token,
                     (GetTextAttr.KEEPREGISTER
                      if keep_chars else GetTextAttr.NO)))
     return Utils.ifNotNull(res, "?")
Beispiel #2
0
 def find(self, word: str, try_create: bool,
          lang_: 'MorphLang') -> typing.List['DerivateGroup']:
     if (Utils.isNullOrEmpty(word)):
         return None
     tn = self._m_root
     i = 0
     while i < len(word):
         k = ord(word[i])
         tn1 = None
         if (tn.nodes is None):
             break
         wraptn14 = RefOutArgWrapper(None)
         inoutres5 = Utils.tryGetValue(tn.nodes, k, wraptn14)
         tn1 = wraptn14.value
         if (not inoutres5):
             break
         tn = tn1
         if (tn.lazy_pos > 0):
             pos = tn.lazy_pos
             wrappos3 = RefOutArgWrapper(pos)
             DeserializeHelper.deserialize_tree_node(
                 self.__m_buf, self, tn, True, wrappos3)
             pos = wrappos3.value
             tn.lazy_pos = 0
         i += 1
     res = (None if i < len(word) else tn.groups)
     li = None
     if (isinstance(res, list)):
         li = list(Utils.asObjectOrNull(res, list))
         gen = False
         nogen = False
         for g in li:
             if (g.is_generated):
                 gen = True
             else:
                 nogen = True
         if (gen and nogen):
             for i in range(len(li) - 1, -1, -1):
                 if (li[i].is_generated):
                     del li[i]
             else:
                 i = -1
     elif (isinstance(res, DerivateGroup)):
         li = list()
         li.append(Utils.asObjectOrNull(res, DerivateGroup))
     if (li is not None and lang_ is not None and not lang_.is_undefined):
         for i in range(len(li) - 1, -1, -1):
             if (not li[i].contains_word(word, lang_)):
                 del li[i]
         else:
             i = -1
     if (li is not None and len(li) > 0):
         return li
     if (len(word) < 4):
         return None
     ch0 = word[len(word) - 1]
     ch1 = word[len(word) - 2]
     ch2 = word[len(word) - 3]
     if (ch0 == 'О' or ((ch0 == 'И' and ch1 == 'К'))):
         word1 = word[0:0 + len(word) - 1]
         li = self.find(word1 + "ИЙ", False, lang_)
         if ((li) is not None):
             return li
         li = self.find(word1 + "ЫЙ", False, lang_)
         if ((li) is not None):
             return li
         if (ch0 == 'О' and ch1 == 'Н'):
             li = self.find(word1 + "СКИЙ", False, lang_)
             if ((li) is not None):
                 return li
     elif (((ch0 == 'Я' or ch0 == 'Ь')) and ((word[len(word) - 2] == 'С'))):
         word1 = word[0:0 + len(word) - 2]
         if (word1 == "ЯТЬ"):
             return None
         li = self.find(word1, False, lang_)
         if ((li) is not None):
             return li
     elif (ch0 == 'Е' and ch1 == 'Ь'):
         word1 = word[0:0 + len(word) - 2] + "ИЕ"
         li = self.find(word1, False, lang_)
         if ((li) is not None):
             return li
     elif (ch0 == 'Й' and ch2 == 'Н' and try_create):
         ch3 = word[len(word) - 4]
         word1 = None
         if (ch3 != 'Н'):
             if (LanguageHelper.is_cyrillic_vowel(ch3)):
                 word1 = (word[0:0 + len(word) - 3] + "Н" +
                          word[len(word) - 3:])
         else:
             word1 = (word[0:0 + len(word) - 4] + word[len(word) - 3:])
         if (word1 is not None):
             li = self.find(word1, False, lang_)
             if ((li) is not None):
                 return li
     if (ch0 == 'Й' and ch1 == 'О'):
         word2 = word[0:0 + len(word) - 2]
         li = self.find(word2 + "ИЙ", False, lang_)
         if ((li) is not None):
             return li
         li = self.find(word2 + "ЫЙ", False, lang_)
         if ((li) is not None):
             return li
     if (not try_create):
         return None
     len0_ = len(word) - 4
     i = 1
     first_pass2883 = True
     while True:
         if first_pass2883: first_pass2883 = False
         else: i += 1
         if (not (i <= len0_)): break
         rest = word[i:]
         li1 = self.find(rest, False, lang_)
         if (li1 is None):
             continue
         pref = word[0:0 + i]
         gen = list()
         for dg in li1:
             if (not dg.is_dummy and not dg.is_generated):
                 if (dg.not_generate):
                     if (len(rest) < 5):
                         continue
                 gg = dg.create_by_prefix(pref, lang_)
                 if (gg is not None):
                     gen.append(gg)
                     self.add(gg)
         if (len(gen) == 0):
             return None
         return gen
     return None
Beispiel #3
0
 def __tryParse(t: 'Token',
                is_in_lit: bool,
                max_char: int = 0) -> typing.List['ReferentToken']:
     if (t is None):
         return None
     is_bracket_regime = False
     if (t.previous is not None and t.previous.isChar('(')):
         is_bracket_regime = True
     blt = BookLinkToken.tryParse(t, 0)
     if (blt is None):
         blt = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
     if (blt is None and not is_bracket_regime):
         return None
     t0 = t
     coef = 0
     is_electr_res = False
     decree = None
     regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED
     num = None
     spec_see = None
     book_prev = None
     if (is_bracket_regime):
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.PERSON):
         if (not is_in_lit):
             return None
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.NUMBER):
         num = blt.value
         t = blt.end_token.next0_
         if (t is None or t.is_newline_before):
             return None
         if (not t.is_whitespace_before):
             if (isinstance(t, NumberToken)):
                 n = (t).value
                 if ((((n == "3" or n == "0")) and not t.is_whitespace_after
                      and (isinstance(t.next0_, TextToken)))
                         and t.next0_.chars.is_all_lower):
                     pass
                 else:
                     return None
             elif (not ((isinstance(t, TextToken)))
                   or t.chars.is_all_lower):
                 r = t.getReferent()
                 if (isinstance(r, PersonReferent)):
                     pass
                 elif (is_in_lit and r is not None
                       and r.type_name == "DECREE"):
                     pass
                 else:
                     return None
         first_pass2757 = True
         while True:
             if first_pass2757: first_pass2757 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, NumberToken)):
                 break
             if (not ((isinstance(t, TextToken)))):
                 break
             if (BracketHelper.canBeStartOfSequence(t, True, False)):
                 break
             if (not t.chars.is_letter):
                 continue
             bbb = BookLinkToken.tryParse(t, 0)
             if (bbb is not None):
                 if (bbb.typ == BookLinkTyp.TAMZE):
                     spec_see = bbb
                     t = bbb.end_token.next0_
                     break
                 if (bbb.typ == BookLinkTyp.SEE):
                     t = bbb.end_token
                     continue
             break
         if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE):
             coef += 1
             max0_ = 1000
             tt = t0
             while tt is not None and max0_ > 0:
                 if (isinstance(tt.getReferent(), BookLinkRefReferent)):
                     book_prev = (tt.getReferent()).book
                     break
                 tt = tt.previous
                 max0_ -= 1
         blt1 = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
         if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON):
             regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
         else:
             ok = False
             tt = t
             first_pass2758 = True
             while True:
                 if first_pass2758: first_pass2758 = False
                 else: tt = (None if tt is None else tt.next0_)
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (is_in_lit and tt.getReferent() is not None
                         and tt.getReferent().type_name == "DECREE"):
                     ok = True
                     decree = tt
                     break
                 bbb = BookLinkToken.tryParse(tt, 0)
                 if (bbb is None):
                     continue
                 if (bbb.typ == BookLinkTyp.ELECTRONRES):
                     is_electr_res = True
                     ok = True
                     break
                 if (bbb.typ == BookLinkTyp.DELIMETER):
                     tt = bbb.end_token.next0_
                     if (BookLinkToken.tryParseAuthor(
                             tt, FioTemplateType.UNDEFINED) is not None):
                         ok = True
                         break
                     bbb = BookLinkToken.tryParse(tt, 0)
                     if (bbb is not None):
                         if (bbb.typ == BookLinkTyp.EDITORS
                                 or bbb.typ == BookLinkTyp.TRANSLATE
                                 or bbb.typ == BookLinkTyp.SOSTAVITEL):
                             ok = True
                             break
             if (not ok and not is_in_lit):
                 if (BookLinkToken.checkLinkBefore(t0, num)):
                     pass
                 else:
                     return None
             regtyp = BookLinkAnalyzer.RegionTyp.NAME
     else:
         return None
     res = BookLinkReferent()
     corr_authors = list()
     t00 = t
     blt00 = None
     start_of_name = None
     prev_pers_templ = FioTemplateType.UNDEFINED
     if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS):
         first_pass2759 = True
         while True:
             if first_pass2759: first_pass2759 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (max_char > 0 and t.begin_char >= max_char):
                 break
             if (t.isCharOf(".;") or t.is_comma_and):
                 continue
             if (t.isChar('/')):
                 break
             if ((t.isChar('(') and t.next0_ is not None
                  and t.next0_.isValue("EDS", None))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.isChar(')')):
                 t = t.next0_.next0_.next0_
                 break
             blt = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt is None and t.previous is not None
                     and t.previous.is_and):
                 blt = BookLinkToken.tryParseAuthor(
                     t.previous, FioTemplateType.UNDEFINED)
             if (blt is None):
                 if ((isinstance(t.getReferent(), OrganizationReferent))
                         and blt00 is not None):
                     bbb2 = BookLinkToken.tryParse(t.next0_, 0)
                     if (bbb2 is not None):
                         if (bbb2.typ == BookLinkTyp.YEAR):
                             res.addSlot(BookLinkReferent.ATTR_AUTHOR,
                                         t.getReferent(), False, 0)
                             res.year = int(bbb2.value)
                             coef += .5
                             t = bbb2.end_token.next0_
                 break
             if (blt.typ == BookLinkTyp.PERSON):
                 tt2 = blt.end_token.next0_
                 bbb2 = BookLinkToken.tryParse(tt2, 0)
                 if (bbb2 is not None):
                     if (bbb2.typ == BookLinkTyp.YEAR):
                         res.year = int(bbb2.value)
                         coef += .5
                         blt.end_token = bbb2.end_token
                         blt00 = (None)
                 if (blt00 is not None
                         and ((blt00.end_token.next0_ == blt.begin_token
                               or blt.begin_token.previous.isChar('.')))):
                     tt11 = blt.end_token.next0_
                     nex = BookLinkToken.tryParse(tt11, 0)
                     if (nex is not None
                             and nex.typ == BookLinkTyp.ANDOTHERS):
                         pass
                     else:
                         if (tt11 is None):
                             break
                         if (tt11.isChar('/') and tt11.next0_ is not None
                                 and tt11.next0_.isChar('/')):
                             break
                         if (tt11.isChar(':')):
                             break
                         if ((str(blt).find('.') < 0)
                                 and str(blt00).find('.') > 0):
                             break
                         if ((isinstance(tt11, TextToken))
                                 and tt11.chars.is_all_lower):
                             break
                         if (tt11.isCharOf(",.;")
                                 and tt11.next0_ is not None):
                             tt11 = tt11.next0_
                         nex = BookLinkToken.tryParse(tt11, 0)
                         if (nex is not None
                                 and nex.typ != BookLinkTyp.PERSON
                                 and nex.typ != BookLinkTyp.ANDOTHERS):
                             break
                 elif (
                     (blt00 is not None
                      and blt00.person_template != FioTemplateType.UNDEFINED
                      and blt.person_template != blt00.person_template)
                         and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     if (blt.end_token.next0_ is None
                             or not blt.end_token.next0_.is_comma_and):
                         break
                     if (BookLinkToken.tryParseAuthor(
                             blt.end_token.next0_.next0_,
                             FioTemplateType.UNDEFINED) is not None):
                         pass
                     else:
                         break
                 if (blt00 is None and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     tt = blt.end_token.next0_
                     if (tt is not None and tt.is_hiphen):
                         tt = tt.next0_
                     if (isinstance(tt, NumberToken)):
                         break
                 BookLinkAnalyzer.__addAuthor(res, blt)
                 coef += 1
                 t = blt.end_token
                 if (isinstance(t.getReferent(), PersonReferent)):
                     corr_authors.append(
                         Utils.asObjectOrNull(t, ReferentToken))
                 blt00 = blt
                 prev_pers_templ = blt.person_template
                 start_of_name = blt.start_of_name
                 if ((start_of_name) is not None):
                     t = t.next0_
                     break
                 continue
             if (blt.typ == BookLinkTyp.ANDOTHERS):
                 coef += .5
                 t = blt.end_token.next0_
                 res.authors_and_other = True
                 break
             break
     if (t is None):
         return None
     if ((t.is_newline_before and t != t0 and num is None) and res.findSlot(
             BookLinkReferent.ATTR_AUTHOR, None, True) is None):
         return None
     if (start_of_name is None):
         if (t.chars.is_all_lower):
             coef -= (1)
         if (t.chars.is_latin_letter and not is_electr_res and num is None):
             if (res.getSlotValue(BookLinkReferent.ATTR_AUTHOR) is None):
                 return None
     tn0 = t
     tn1 = None
     uri = None
     next_num = None
     wrapnn393 = RefOutArgWrapper(0)
     inoutres394 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn393)
     nn = wrapnn393.value
     if (inoutres394):
         next_num = str((nn + 1))
     br = (BracketHelper.tryParse(
         t,
         Utils.valToEnum(
             (BracketParseAttr.CANCONTAINSVERBS) |
             (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100)
           if BracketHelper.canBeStartOfSequence(t, True, False) else None)
     if (br is not None):
         t = t.next0_
     pages = None
     first_pass2760 = True
     while True:
         if first_pass2760: first_pass2760 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (br is not None and br.end_token == t):
             tn1 = t
             break
         tit = TitleItemToken.tryAttach(t)
         if (tit is not None):
             if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t
                  and br is None) and BracketHelper.canBeStartOfSequence(
                      tit.end_token.next0_, True, False)):
                 br = BracketHelper.tryParse(tit.end_token.next0_,
                                             BracketParseAttr.NO, 100)
                 if (br is not None):
                     coef += (1)
                     if (num is not None):
                         coef += 1
                     tn0 = br.begin_token
                     tn1 = br.end_token
                     res.typ = tit.value.lower()
                     t = br.end_token.next0_
                     break
         if (t.is_newline_before and t != tn0):
             if (br is not None and (t.end_char < br.end_char)):
                 pass
             elif (not MiscHelper.canBeStartOfSentence(t)):
                 pass
             else:
                 if (t.newlines_before_count > 1):
                     break
                 if ((isinstance(t, NumberToken)) and num is not None
                         and (t).int_value is not None):
                     if (num == str(((t).int_value - 1))):
                         break
                 elif (num is not None):
                     pass
                 else:
                     nnn = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.PARSEPREPOSITION) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))
                             | (NounPhraseParseAttr.MULTILINES),
                             NounPhraseParseAttr), 0)
                     if (nnn is not None and nnn.end_char >= t.end_char):
                         pass
                     else:
                         break
         if (t.isCharOf(".;") and t.whitespaces_after_count > 0):
             tit = TitleItemToken.tryAttach(t.next0_)
             if ((tit) is not None):
                 if (tit.typ == TitleItemToken.Types.TYP):
                     break
             stop = True
             words = 0
             notwords = 0
             tt = t.next0_
             first_pass2761 = True
             while True:
                 if first_pass2761: first_pass2761 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 blt0 = BookLinkToken.tryParse(tt, 0)
                 if (blt0 is None):
                     if (tt.is_newline_before):
                         break
                     if ((isinstance(tt, TextToken)) and
                             not tt.getMorphClassInDictionary().is_undefined
                         ):
                         words += 1
                     else:
                         notwords += 1
                     if (words > 6 and words > (notwords * 4)):
                         stop = False
                         break
                     continue
                 if ((blt0.typ == BookLinkTyp.DELIMETER
                      or blt0.typ == BookLinkTyp.TRANSLATE
                      or blt0.typ == BookLinkTyp.TYPE)
                         or blt0.typ == BookLinkTyp.GEO
                         or blt0.typ == BookLinkTyp.PRESS):
                     stop = False
                 break
             if (br is not None
                     and br.end_token.previous.end_char > t.end_char):
                 stop = False
             if (stop):
                 break
         if (t == decree):
             t = t.next0_
             break
         blt = BookLinkToken.tryParse(t, 0)
         if (blt is None):
             tn1 = t
             continue
         if (blt.typ == BookLinkTyp.DELIMETER):
             break
         if (((blt.typ == BookLinkTyp.MISC or blt.typ
               == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL)
              or blt.typ == BookLinkTyp.TYPE
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.PAGERANGE
                 or blt.typ == BookLinkTyp.PAGES):
             coef += 1
             break
         if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS):
             if (t.previous.is_hiphen or t.previous.isCharOf(".;")
                     or blt.add_coef > 0):
                 break
         if (blt.typ == BookLinkTyp.YEAR):
             if (t.previous is not None and t.previous.is_comma):
                 break
         if (blt.typ == BookLinkTyp.ELECTRONRES):
             is_electr_res = True
             break
         if (blt.typ == BookLinkTyp.URL):
             if (t == tn0 or t.previous.isCharOf(":.")):
                 is_electr_res = True
                 break
         tn1 = t
     if (tn1 is None and start_of_name is None):
         if (is_electr_res):
             uri_re = BookLinkReferent()
             rt0 = ReferentToken(uri_re, t00, t)
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(uri_re)
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, rt0.end_token)
             ok = False
             while t is not None:
                 if (t.is_newline_before):
                     break
                 blt0 = BookLinkToken.tryParse(t, 0)
                 if (blt0 is not None):
                     if (isinstance(blt0.ref, UriReferent)):
                         uri_re.addSlot(
                             BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt0.ref, UriReferent),
                             False, 0)
                         ok = True
                     t = blt0.end_token
                 rt0.end_token = rt01.end_token = t
                 t = t.next0_
             if (ok):
                 rts0.append(rt01)
                 rts0.append(rt0)
                 return rts0
         if (decree is not None and num is not None):
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(decree.getReferent())
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, decree)
             t = decree.next0_
             while t is not None:
                 if (t.is_newline_before):
                     break
                 if (isinstance(t, TextToken)):
                     if ((t).is_pure_verb):
                         return None
                 rt01.end_token = t
                 t = t.next0_
             rts0.append(rt01)
             return rts0
         if (book_prev is not None):
             tt = t
             while tt is not None and ((tt.isCharOf(",.") or tt.is_hiphen)):
                 tt = tt.next0_
             blt0 = BookLinkToken.tryParse(tt, 0)
             if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE):
                 rts0 = list()
                 bref0 = BookLinkRefReferent._new389(book_prev)
                 if (num is not None):
                     bref0.number = num
                 bref0.pages = blt0.value
                 rt00 = ReferentToken(bref0, t0, blt0.end_token)
                 rts0.append(rt00)
                 return rts0
         return None
     if (br is not None
             and ((tn1 == br.end_token or tn1 == br.end_token.previous))):
         tn0 = tn0.next0_
         tn1 = tn1.previous
     if (start_of_name is None):
         while tn0 is not None:
             if (tn0.isCharOf(":,~")):
                 tn0 = tn0.next0_
             else:
                 break
     while tn1 is not None and tn1.begin_char > tn0.begin_char:
         if (tn1.isCharOf(".;,:(~") or tn1.is_hiphen
                 or tn1.isValue("РЕД", None)):
             pass
         else:
             break
         tn1 = tn1.previous
     nam = MiscHelper.getTextValue(
         tn0, tn1,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     if (start_of_name is not None):
         if (nam is None or (len(nam) < 3)):
             nam = start_of_name
         else:
             nam = "{0}{1}{2}".format(
                 start_of_name, (" " if tn0.is_whitespace_before else ""),
                 nam)
     if (nam is None):
         return None
     res.name = nam
     if (num is None and not is_in_lit):
         if (len(nam) < 20):
             return None
         coef -= (2)
     if (len(nam) > 500):
         coef -= (math.floor(len(nam) / 500))
     if (is_bracket_regime):
         coef -= 1
     if (len(nam) > 200):
         if (num is None):
             return None
         if (res.findSlot(BookLinkReferent.ATTR_AUTHOR, None, True) is None
                 and not BookLinkToken.checkLinkBefore(t0, num)):
             return None
     en = 0
     ru = 0
     ua = 0
     cha = 0
     nocha = 0
     chalen = 0
     lt0 = tn0
     lt1 = tn1
     if (tn1 is None):
         if (t is None):
             return None
         lt0 = t0
         lt1 = t
         tn1 = t.previous
     tt = lt0
     while tt is not None and tt.end_char <= lt1.end_char:
         if ((isinstance(tt, TextToken)) and tt.chars.is_letter):
             if (tt.chars.is_latin_letter):
                 en += 1
             elif (tt.morph.language.is_ua):
                 ua += 1
             elif (tt.morph.language.is_ru):
                 ru += 1
             if (tt.length_char > 2):
                 cha += 1
                 chalen += tt.length_char
         elif (not ((isinstance(tt, ReferentToken)))):
             nocha += 1
         tt = tt.next0_
     if (ru > (ua + en)):
         res.lang = "RU"
     elif (ua > (ru + en)):
         res.lang = "UA"
     elif (en > (ru + ua)):
         res.lang = "EN"
     if (nocha > 3 and nocha > cha and start_of_name is None):
         if (nocha > (math.floor(chalen / 3))):
             coef -= (2)
     if (res.lang == "EN"):
         tt = tn0.next0_
         first_pass2762 = True
         while True:
             if first_pass2762: first_pass2762 = False
             else: tt = tt.next0_
             if (not (tt is not None and (tt.end_char < tn1.end_char))):
                 break
             if (tt.is_comma and tt.next0_ is not None
                     and ((not tt.next0_.chars.is_all_lower or
                           (isinstance(tt.next0_, ReferentToken))))):
                 if (tt.next0_.next0_ is not None
                         and tt.next0_.next0_.is_comma_and):
                     if (isinstance(tt.next0_, ReferentToken)):
                         pass
                     else:
                         continue
                 nam = MiscHelper.getTextValue(
                     tn0, tt.previous,
                     Utils.valToEnum((GetTextAttr.KEEPQUOTES) |
                                     (GetTextAttr.KEEPREGISTER),
                                     GetTextAttr))
                 if (nam is not None and len(nam) > 15):
                     res.name = nam
                     break
     rt = ReferentToken(res, t00, tn1)
     authors = True
     edits = False
     br = (None)
     first_pass2763 = True
     while True:
         if first_pass2763: first_pass2763 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (BracketHelper.canBeStartOfSequence(t, False, False)):
             br = BracketHelper.tryParse(t, BracketParseAttr.CANBEMANYLINES,
                                         100)
             if (br is not None and br.length_char > 300):
                 br = (None)
         blt = BookLinkToken.tryParse(t, 0)
         if (t.is_newline_before and not t.isChar('/')
                 and not t.previous.isChar('/')):
             if (blt is not None and blt.typ == BookLinkTyp.NUMBER):
                 break
             if (t.previous.isCharOf(":")):
                 pass
             elif (blt is not None and ((
                 ((blt.typ == BookLinkTyp.DELIMETER or blt.typ
                   == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES)
                  or blt.typ == BookLinkTyp.GEO or blt.typ
                  == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))):
                 pass
             elif (num is not None and BookLinkToken.tryParseAuthor(
                     t, FioTemplateType.UNDEFINED) is not None):
                 pass
             elif (num is not None and blt is not None
                   and blt.typ != BookLinkTyp.NUMBER):
                 pass
             elif (br is not None and (t.end_char < br.end_char)
                   and t.begin_char > br.begin_char):
                 pass
             else:
                 ok = False
                 mmm = 50
                 tt = t.next0_
                 while tt is not None and mmm > 0:
                     if (tt.is_newline_before):
                         blt2 = BookLinkToken.tryParse(tt, 0)
                         if (blt2 is not None
                                 and blt2.typ == BookLinkTyp.NUMBER
                                 and blt2.value == next_num):
                             ok = True
                             break
                         if (blt2 is not None):
                             if (blt2.typ == BookLinkTyp.PAGES
                                     or blt2.typ == BookLinkTyp.GEO
                                     or blt2.typ == BookLinkTyp.PRESS):
                                 ok = True
                                 break
                     tt = tt.next0_
                     mmm -= 1
                 if (not ok):
                     npt = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.MULTILINES) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSEPREPOSITION)) |
                             (NounPhraseParseAttr.PARSEVERBS) |
                             (NounPhraseParseAttr.PARSEPRONOUNS),
                             NounPhraseParseAttr), 0)
                     if (npt is not None and npt.end_char >= t.end_char):
                         ok = True
                 if (not ok):
                     break
         rt.end_token = t
         if (blt is not None):
             rt.end_token = blt.end_token
         if (t.isCharOf(".,") or t.is_hiphen):
             continue
         if (t.isValue("С", None)):
             pass
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.EDITORS):
             edits = True
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.SOSTAVITEL):
             edits = False
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors):
             blt2 = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON):
                 prev_pers_templ = blt2.person_template
                 if (not edits):
                     BookLinkAnalyzer.__addAuthor(res, blt2)
                 coef += 1
                 t = blt2.end_token
                 continue
             if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS):
                 if (not edits):
                     res.authors_and_other = True
                 coef += 1
                 t = blt2.end_token
                 continue
             authors = False
         if (blt is None):
             continue
         if (blt.typ == BookLinkTyp.ELECTRONRES
                 or blt.typ == BookLinkTyp.URL):
             is_electr_res = True
             if (blt.typ == BookLinkTyp.ELECTRONRES):
                 coef += 1.5
             else:
                 coef += .5
             if (isinstance(blt.ref, UriReferent)):
                 res.addSlot(BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt.ref, UriReferent),
                             False, 0)
         elif (blt.typ == BookLinkTyp.YEAR):
             if (res.year == 0):
                 res.year = int(blt.value)
                 coef += .5
         elif (blt.typ == BookLinkTyp.DELIMETER):
             coef += 1
             if (blt.length_char == 2):
                 regtyp = BookLinkAnalyzer.RegionTyp.SECOND
             else:
                 regtyp = BookLinkAnalyzer.RegionTyp.FIRST
         elif (
             (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE
                or blt.typ == BookLinkTyp.PAGES) or blt.typ
               == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE)
              or blt.typ == BookLinkTyp.PRESS
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.N):
             coef += 1
         elif (blt.typ == BookLinkTyp.PAGERANGE):
             pages = blt
             coef += 1
             if (is_bracket_regime and blt.end_token.next0_ is not None
                     and blt.end_token.next0_.isChar(')')):
                 coef += (2)
                 if (res.name is not None
                         and res.findSlot(BookLinkReferent.ATTR_AUTHOR,
                                          None, True) is not None):
                     coef = (10)
         elif (blt.typ == BookLinkTyp.GEO
               and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND
                     or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))):
             coef += 1
         elif (blt.typ == BookLinkTyp.GEO and t.previous is not None
               and t.previous.isChar('.')):
             coef += 1
         elif (blt.typ == BookLinkTyp.ANDOTHERS):
             coef += 1
             if (authors):
                 res.authors_and_other = True
         coef += blt.add_coef
         t = blt.end_token
     if ((coef < 2.5) and num is not None):
         if (BookLinkToken.checkLinkBefore(t0, num)):
             coef += (2)
         elif (BookLinkToken.checkLinkAfter(rt.end_token, num)):
             coef += (1)
     if (rt.length_char > 500):
         return None
     if (is_in_lit):
         coef += 1
     if (coef < 2.5):
         if (is_electr_res and uri is not None):
             pass
         elif (coef >= 2 and is_in_lit):
             pass
         else:
             return None
     for rr in corr_authors:
         pits0 = PersonItemToken.tryAttachList(
             rr.begin_token, None,
             PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10)
         if (pits0 is None or (len(pits0) < 2)):
             continue
         if (pits0[0].typ == PersonItemToken.ItemType.VALUE):
             exi = False
             for i in range(len(rr.referent.slots) - 1, -1, -1):
                 s = rr.referent.slots[i]
                 if (s.type_name == PersonReferent.ATTR_LASTNAME):
                     ln = Utils.asObjectOrNull(s.value, str)
                     if (ln is None):
                         continue
                     if (ln == pits0[0].value):
                         exi = True
                         continue
                     if (ln.find('-') > 0):
                         ln = ln[0:0 + ln.find('-')]
                     if (pits0[0].begin_token.isValue(ln, None)):
                         del rr.referent.slots[i]
             if (not exi):
                 rr.referent.addSlot(PersonReferent.ATTR_LASTNAME,
                                     pits0[0].value, False, 0)
     rts = list()
     bref = BookLinkRefReferent._new389(res)
     if (num is not None):
         bref.number = num
     rt1 = ReferentToken(bref, t0, rt.end_token)
     if (pages is not None):
         if (pages.value is not None):
             bref.pages = pages.value
         rt.end_token = pages.begin_token.previous
     rts.append(rt1)
     rts.append(rt)
     return rts
 def try_attach(self, t : 'Token', for_ontology : bool=False) -> 'ReferentToken':
     if (t is None): 
         return None
     rt0 = self.__try_attach_spec(t)
     if (rt0 is not None): 
         return rt0
     if (t.chars.is_all_lower): 
         if (not t.is_whitespace_after and (isinstance(t.next0_, NumberToken))): 
             if (t.previous is None or t.is_whitespace_before or t.previous.is_char_of(",:")): 
                 pass
             else: 
                 return None
         else: 
             return None
     tmp = io.StringIO()
     t1 = t
     hiph = False
     ok = True
     nums = 0
     chars = 0
     w = t1.next0_
     first_pass3148 = True
     while True:
         if first_pass3148: first_pass3148 = False
         else: w = w.next0_
         if (not (w is not None)): break
         if (w.is_whitespace_before and not for_ontology): 
             break
         if (w.is_char_of("/\\_") or w.is_hiphen): 
             hiph = True
             print('-', end="", file=tmp)
             continue
         hiph = False
         nt = Utils.asObjectOrNull(w, NumberToken)
         if (nt is not None): 
             if (nt.typ != NumberSpellingType.DIGIT): 
                 break
             t1 = (nt)
             print(nt.get_source_text(), end="", file=tmp)
             nums += 1
             continue
         tt = Utils.asObjectOrNull(w, TextToken)
         if (tt is None): 
             break
         if (tt.length_char > 3): 
             ok = False
             break
         if (not str.isalpha(tt.term[0])): 
             if (tt.is_char_of(",:") or BracketHelper.can_be_end_of_sequence(tt, False, None, False)): 
                 break
             if (not tt.is_char_of("+*&^#@!")): 
                 ok = False
                 break
             chars += 1
         t1 = (tt)
         print(tt.get_source_text(), end="", file=tmp)
     if (not for_ontology): 
         if ((tmp.tell() < 1) or not ok or hiph): 
             return None
         if (tmp.tell() > 12): 
             return None
         last = Utils.getCharAtStringIO(tmp, tmp.tell() - 1)
         if (last == '!'): 
             return None
         if ((nums + chars) == 0): 
             return None
         if (not self.__check_attach(t, t1)): 
             return None
     new_dr = DenominationReferent()
     new_dr._add_value(t, t1)
     return ReferentToken(new_dr, t, t1)
 def org0_(self) -> 'OrganizationReferent':
     """ Организация """
     return Utils.asObjectOrNull(
         self.get_slot_value(TitlePageReferent.ATTR_ORG),
         OrganizationReferent)
Beispiel #6
0
 def url(self) -> 'UriReferent':
     """ URL """
     return Utils.asObjectOrNull(
         self.get_slot_value(BookLinkReferent.ATTR_URL), UriReferent)
Beispiel #7
0
 def url(self) -> 'UriReferent':
     return Utils.asObjectOrNull(
         self.getSlotValue(BookLinkReferent.ATTR_URL), UriReferent)
Beispiel #8
0
 def __tryNounName(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                   always: bool) -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 2)
             or ((li[0].typ != CityItemToken.ItemType.NOUN
                  and li[0].typ != CityItemToken.ItemType.MISC))):
         return None
     ok = not li[0].doubtful
     if (ok and li[0].typ == CityItemToken.ItemType.MISC):
         ok = False
     typ = (None
            if li[0].typ == CityItemToken.ItemType.MISC else li[0].value)
     typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else
             li[0].alt_value)
     prob_adj = None
     i1 = 1
     org0_ = None
     if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and
          ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and
         (((LanguageHelper.endsWith(typ, "ПОСЕЛОК")
            or LanguageHelper.endsWith(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ")
           or typ == "СЕЛО"))):
         if (li[i1].begin_token == li[i1].end_token):
             ooo = AddressItemToken.tryAttachOrg(li[i1].begin_token)
             if (ooo is not None and ooo.ref_token is not None):
                 return None
         typ2 = li[i1].value
         if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.isValue("СТ", None)
                 and ((i1 + 1) < len(li))):
             m = li[i1 + 1].morph
             if (m.number == MorphNumber.PLURAL):
                 prob_adj = "СТАРЫЕ"
             elif (m.gender == MorphGender.FEMINIE):
                 prob_adj = "СТАРАЯ"
             elif (m.gender == MorphGender.MASCULINE):
                 prob_adj = "СТАРЫЙ"
             else:
                 prob_adj = "СТАРОЕ"
         i1 += 1
     name = Utils.ifNotNull(li[i1].value,
                            ((None if li[i1].onto_item is None else
                              li[i1].onto_item.canonic_text)))
     alt_name = li[i1].alt_value
     if (name is None):
         return None
     mc = li[0].morph
     if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY
             and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО"
                   or li[0].typ == CityItemToken.ItemType.MISC))):
         if (typ is None and ((i1 + 1) < len(li))
                 and li[i1 + 1].typ == CityItemToken.ItemType.NOUN):
             return None
         oi.value = li[i1].onto_item
         if (oi.value is not None):
             name = oi.value.canonic_text
         if (len(name) > 2 or oi.value.misc_attr is not None):
             if (not li[1].doubtful
                     or ((oi.value is not None
                          and oi.value.misc_attr is not None))):
                 ok = True
             elif (not ok and not li[1].is_newline_before):
                 if (li[0].geo_object_before or li[1].geo_object_after):
                     ok = True
                 elif (StreetDefineHelper.checkStreetAfter(
                         li[1].end_token.next0_)):
                     ok = True
                 elif (li[1].end_token.next0_ is not None
                       and (isinstance(li[1].end_token.next0_.getReferent(),
                                       DateReferent))):
                     ok = True
                 elif ((li[1].whitespaces_before_count < 2)
                       and li[1].onto_item is not None):
                     if (li[1].is_newline_after):
                         ok = True
             if (li[1].doubtful and li[1].end_token.next0_ is not None and
                     li[1].end_token.chars == li[1].end_token.next0_.chars):
                 ok = False
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 ok = True
         if (not ok):
             ok = CityAttachHelper.checkYearAfter(li[1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkCityAfter(li[1].end_token.next0_)
     elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME
            or li[i1].typ == CityItemToken.ItemType.CITY)):
         if (((li[0].value == "АДМИНИСТРАЦИЯ"
               or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1):
             return None
         if (li[i1].is_newline_before):
             if (len(li) != 2):
                 return None
         if (not li[0].doubtful):
             ok = True
             if (len(name) < 2):
                 ok = False
             elif ((len(name) < 3)
                   and li[0].morph.number != MorphNumber.SINGULAR):
                 ok = False
             if (li[i1].doubtful and not li[i1].geo_object_after
                     and not li[0].geo_object_before):
                 if (li[i1].morph.case_.is_genitive):
                     if (((li[0].begin_token.previous is None
                           or MiscLocationHelper.checkGeoObjectBefore(
                               li[0].begin_token))) and
                         ((li[i1].end_token.next0_ is None
                           or MiscLocationHelper.checkGeoObjectAfter(
                               li[i1].end_token.next0_)
                           or AddressItemToken.checkHouseAfter(
                               li[i1].end_token.next0_, False, True)))):
                         pass
                     else:
                         ok = False
                 else:
                     rt0 = li[i1].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt0 is not None):
                         rt1 = li[i1].kit.processReferent(
                             "PERSON", li[i1].begin_token)
                         if (rt1 is not None):
                             ok = False
             npt = NounPhraseHelper.tryParse(li[i1].begin_token,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None):
                 if (npt.end_token.end_char > li[i1].end_char
                         and len(npt.adjectives) > 0 and
                         not npt.adjectives[0].end_token.next0_.is_comma):
                     ok = False
                 elif (TerrItemToken._m_unknown_regions.tryParse(
                         npt.end_token, TerminParseAttr.FULLWORDSONLY)
                       is not None):
                     ok1 = False
                     if (li[0].begin_token.previous is not None):
                         ttt = li[0].begin_token.previous
                         if (ttt.is_comma and ttt.previous is not None):
                             ttt = ttt.previous
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (npt.end_token.next0_ is not None):
                         ttt = npt.end_token.next0_
                         if (ttt.is_comma and ttt.next0_ is not None):
                             ttt = ttt.next0_
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (not ok1):
                         return None
             if (li[0].value == "ПОРТ"):
                 if (li[i1].chars.is_all_upper
                         or li[i1].chars.is_latin_letter):
                     return None
         elif (li[0].geo_object_before):
             ok = True
         elif (li[i1].geo_object_after and not li[i1].is_newline_after):
             ok = True
         else:
             ok = CityAttachHelper.checkYearAfter(li[i1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkStreetAfter(li[i1].end_token.next0_)
         if (not ok and li[0].begin_token.previous is not None
                 and li[0].begin_token.previous.isValue("В", None)):
             ok = True
     else:
         return None
     if (not ok and not always):
         if (MiscLocationHelper.checkNearBefore(li[0].begin_token.previous)
                 is None):
             return None
     if (len(li) > (i1 + 1)):
         del li[i1 + 1:i1 + 1 + len(li) - i1 - 1]
     city = GeoReferent()
     if (oi.value is not None and oi.value.referent is not None):
         city = (Utils.asObjectOrNull(oi.value.referent.clone(),
                                      GeoReferent))
         city.occurrence.clear()
     if (not li[0].morph.case_.is_undefined
             and li[0].morph.gender != MorphGender.UNDEFINED):
         if (li[i1].end_token.morph.class0_.is_adjective
                 and li[i1].begin_token == li[i1].end_token):
             nam = ProperNameHelper.getNameEx(
                 li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE,
                 li[0].morph.case_, li[0].morph.gender, False, False)
             if (nam is not None and nam != name):
                 name = nam
     if (li[0].morph.case_.is_nominative):
         if (alt_name is not None):
             city._addName(alt_name)
         alt_name = (None)
     city._addName(name)
     if (prob_adj is not None):
         city._addName(prob_adj + " " + name)
     if (alt_name is not None):
         city._addName(alt_name)
         if (prob_adj is not None):
             city._addName(prob_adj + " " + alt_name)
     if (typ is not None):
         city._addTyp(typ)
     elif (not city.is_city):
         city._addTypCity(li[0].kit.base_language)
     if (typ2 is not None):
         city._addTyp(typ2.lower())
     if (li[0].higher_geo is not None
             and GeoOwnerHelper.canBeHigher(li[0].higher_geo, city)):
         city.higher = li[0].higher_geo
     if (li[0].typ == CityItemToken.ItemType.MISC):
         del li[0]
     res = ReferentToken._new719(city, li[0].begin_token,
                                 li[len(li) - 1].end_token, mc)
     if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen
             and (isinstance(res.end_token.next0_.next0_, NumberToken))):
         num = Utils.asObjectOrNull(res.end_token.next0_.next0_,
                                    NumberToken)
         if ((num.typ == NumberSpellingType.DIGIT
              and not num.morph.class0_.is_adjective
              and num.int_value is not None) and (num.int_value < 50)):
             for s in city.slots:
                 if (s.type_name == GeoReferent.ATTR_NAME):
                     city.uploadSlot(s,
                                     "{0}-{1}".format(s.value, num.value))
             res.end_token = num
     if (li[0].begin_token == li[0].end_token
             and li[0].begin_token.isValue("ГОРОДОК", None)):
         if (AddressItemToken.checkHouseAfter(res.end_token.next0_, True,
                                              False)):
             return None
     return res
Beispiel #9
0
 def __tryNameExist(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                    always: bool) -> 'ReferentToken':
     """ Это проверяем некоторые частные случаи
     
     Args:
         li(typing.List[CityItemToken]): 
         oi(IntOntologyItem): 
     
     """
     oi.value = (None)
     if (li is None or li[0].typ != CityItemToken.ItemType.CITY):
         return None
     oi.value = li[0].onto_item
     tt = Utils.asObjectOrNull(li[0].begin_token, TextToken)
     if (tt is None):
         return None
     ok = False
     nam = (li[0].value if oi.value is None else oi.value.canonic_text)
     if (nam is None):
         return None
     if (nam == "РИМ"):
         if (tt.term == "РИМ"):
             if ((isinstance(tt.next0_, TextToken)) and
                     tt.next0_.getMorphClassInDictionary().is_proper_secname
                 ):
                 pass
             else:
                 ok = True
         elif (tt.previous is not None and tt.previous.isValue("В", None)
               and tt.term == "РИМЕ"):
             ok = True
     elif (oi.value is not None and oi.value.referent is not None
           and oi.value.owner.is_ext_ontology):
         ok = True
     elif (nam.endswith("ГРАД") or nam.endswith("СК")):
         ok = True
     elif (nam.endswith("TOWN") or nam.startswith("SAN")):
         ok = True
     elif (li[0].chars.is_latin_letter
           and li[0].begin_token.previous is not None
           and ((li[0].begin_token.previous.isValue("IN", None)
                 or li[0].begin_token.previous.isValue("FROM", None)))):
         ok = True
     else:
         tt2 = li[0].end_token.next0_
         first_pass2890 = True
         while True:
             if first_pass2890: first_pass2890 = False
             else: tt2 = tt2.next0_
             if (not (tt2 is not None)): break
             if (tt2.is_newline_before):
                 break
             if ((tt2.isCharOf(",(") or tt2.morph.class0_.is_preposition
                  or tt2.morph.class0_.is_conjunction)
                     or tt2.morph.class0_.is_misc):
                 continue
             if ((isinstance(tt2.getReferent(), GeoReferent))
                     and tt2.chars.is_cyrillic_letter
                     == li[0].chars.is_cyrillic_letter):
                 ok = True
             break
         if (not ok):
             tt2 = li[0].begin_token.previous
             first_pass2891 = True
             while True:
                 if first_pass2891: first_pass2891 = False
                 else: tt2 = tt2.previous
                 if (not (tt2 is not None)): break
                 if (tt2.is_newline_after):
                     break
                 if ((tt2.isCharOf(",)") or tt2.morph.class0_.is_preposition
                      or tt2.morph.class0_.is_conjunction)
                         or tt2.morph.class0_.is_misc):
                     continue
                 if ((isinstance(tt2.getReferent(), GeoReferent))
                         and tt2.chars.is_cyrillic_letter
                         == li[0].chars.is_cyrillic_letter):
                     ok = True
                 if (ok):
                     sits = StreetItemToken.tryParseList(
                         li[0].begin_token, None, 10)
                     if (sits is not None and len(sits) > 1):
                         ss = StreetDefineHelper._tryParseStreet(
                             sits, False, False)
                         if (ss is not None):
                             del sits[0]
                             if (StreetDefineHelper._tryParseStreet(
                                     sits, False, False) is None):
                                 ok = False
                 if (ok):
                     if (len(li) > 1 and li[1].typ
                             == CityItemToken.ItemType.PROPERNAME
                             and (li[1].whitespaces_before_count < 3)):
                         ok = False
                     else:
                         mc = li[0].begin_token.getMorphClassInDictionary()
                         if (mc.is_proper_name or mc.is_proper_surname
                                 or mc.is_adjective):
                             ok = False
                         else:
                             npt = NounPhraseHelper.tryParse(
                                 li[0].begin_token, NounPhraseParseAttr.NO,
                                 0)
                             if (npt is not None
                                     and npt.end_char > li[0].end_char):
                                 ok = False
                 if (AddressItemToken.tryAttachOrg(li[0].begin_token)
                         is not None):
                     ok = False
                     break
                 break
     if (always):
         if (li[0].whitespaces_before_count > 3 and li[0].doubtful
                 and li[0].begin_token.getMorphClassInDictionary(
                 ).is_proper_surname):
             pp = li[0].kit.processReferent("PERSON", li[0].begin_token)
             if (pp is not None):
                 always = False
     if (li[0].begin_token.chars.is_latin_letter
             and li[0].begin_token == li[0].end_token):
         tt1 = li[0].end_token.next0_
         if (tt1 is not None and tt1.isChar(',')):
             tt1 = tt1.next0_
         if (((isinstance(tt1, TextToken)) and tt1.chars.is_latin_letter and
              (tt1.length_char < 3)) and not tt1.chars.is_all_lower):
             ok = False
     if (not ok and not always):
         return None
     city = None
     if (oi.value is not None
             and (isinstance(oi.value.referent, GeoReferent))
             and not oi.value.owner.is_ext_ontology):
         city = (Utils.asObjectOrNull(oi.value.referent, GeoReferent))
     else:
         city = GeoReferent()
         city._addName(nam)
         if (oi.value is not None
                 and (isinstance(oi.value.referent, GeoReferent))):
             city._mergeSlots2(
                 Utils.asObjectOrNull(oi.value.referent, GeoReferent),
                 li[0].kit.base_language)
         if (not city.is_city):
             city._addTypCity(li[0].kit.base_language)
     return ReferentToken._new719(city, li[0].begin_token, li[0].end_token,
                                  li[0].morph)
Beispiel #10
0
 def try_attach_org(t: 'Token',
                    can_be_cyr: bool = False) -> 'ReferentToken':
     from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken
     if (t is None):
         return None
     br = False
     if (t.is_char('(') and t.next0_ is not None):
         t = t.next0_
         br = True
     if (isinstance(t, NumberToken)):
         if (t.typ == NumberSpellingType.WORDS
                 and t.morph.class0_.is_adjective
                 and t.chars.is_capital_upper):
             pass
         else:
             return None
     else:
         if (t.chars.is_all_lower):
             return None
         if ((t.length_char < 3) and not t.chars.is_letter):
             return None
         if (not t.chars.is_latin_letter):
             if (not can_be_cyr or not t.chars.is_cyrillic_letter):
                 return None
     t0 = t
     t1 = t0
     nam_wo = 0
     tok = None
     geo_ = None
     add_typ = None
     first_pass3312 = True
     while True:
         if first_pass3312: first_pass3312 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t != t0 and t.whitespaces_before_count > 1):
             break
         if (t.is_char(')')):
             break
         if (t.is_char('(') and t.next0_ is not None):
             if ((isinstance(t.next0_.get_referent(), GeoReferent))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.is_char(')')):
                 geo_ = (Utils.asObjectOrNull(t.next0_.get_referent(),
                                              GeoReferent))
                 t = t.next0_.next0_
                 continue
             typ = OrgItemTypeToken.try_attach(t.next0_, True, None)
             if ((typ is not None and typ.end_token.next0_ is not None
                  and typ.end_token.next0_.is_char(')'))
                     and typ.chars.is_latin_letter):
                 add_typ = typ
                 t = typ.end_token.next0_
                 continue
             if (((isinstance(t.next0_, TextToken)) and t.next0_.next0_
                  is not None and t.next0_.next0_.is_char(')'))
                     and t.next0_.chars.is_capital_upper):
                 t = t.next0_.next0_
                 t1 = t
                 continue
             break
         tok = OrgItemEngItem.try_attach(t, can_be_cyr)
         if (tok is None and t.is_char_of(".,") and t.next0_ is not None):
             tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr)
             if (tok is None and t.next0_.is_char_of(",.")):
                 tok = OrgItemEngItem.try_attach(t.next0_.next0_,
                                                 can_be_cyr)
         if (tok is not None):
             if (tok.length_char == 1 and t0.chars.is_cyrillic_letter):
                 return None
             break
         if (t.is_hiphen and not t.is_whitespace_after
                 and not t.is_whitespace_before):
             continue
         if (t.is_char_of("&+") or t.is_and):
             continue
         if (t.is_char('.')):
             if (t.previous is not None and t.previous.length_char == 1):
                 continue
             elif (MiscHelper.can_be_start_of_sentence(t.next0_)):
                 break
         if (not t.chars.is_latin_letter):
             if (not can_be_cyr or not t.chars.is_cyrillic_letter):
                 break
         if (t.chars.is_all_lower):
             if (t.morph.class0_.is_preposition
                     or t.morph.class0_.is_conjunction):
                 continue
             if (br):
                 continue
             break
         mc = t.get_morph_class_in_dictionary()
         if (mc.is_verb):
             if (t.next0_ is not None
                     and t.next0_.morph.class0_.is_preposition):
                 break
         if (t.next0_ is not None and t.next0_.is_value("OF", None)):
             break
         if (isinstance(t, TextToken)):
             nam_wo += 1
         t1 = t
     if (tok is None):
         return None
     if (t0 == tok.begin_token):
         br2 = BracketHelper.try_parse(tok.end_token.next0_,
                                       BracketParseAttr.NO, 100)
         if (br2 is not None):
             org1 = OrganizationReferent()
             if (tok.short_value is not None):
                 org1.add_type_str(tok.short_value)
             org1.add_type_str(tok.full_value)
             nam1 = MiscHelper.get_text_value(br2.begin_token,
                                              br2.end_token, GetTextAttr.NO)
             if (nam1 is not None):
                 org1.add_name(nam1, True, None)
                 return ReferentToken(org1, t0, br2.end_token)
         return None
     org0_ = OrganizationReferent()
     te = tok.end_token
     if (tok.is_bank):
         t1 = tok.end_token
     if (tok.full_value == "company" and (tok.whitespaces_after_count < 3)):
         tok1 = OrgItemEngItem.try_attach(tok.end_token.next0_, can_be_cyr)
         if (tok1 is not None):
             t1 = tok.end_token
             tok = tok1
             te = tok.end_token
     if (tok.full_value == "company"):
         if (nam_wo == 0):
             return None
     nam = MiscHelper.get_text_value(t0, t1, GetTextAttr.IGNOREARTICLES)
     if (nam == "STOCK" and tok.full_value == "company"):
         return None
     alt_nam = None
     if (Utils.isNullOrEmpty(nam)):
         return None
     if (nam.find('(') > 0):
         i1 = nam.find('(')
         i2 = nam.find(')')
         if (i1 < i2):
             alt_nam = nam
             tai = None
             if ((i2 + 1) < len(nam)):
                 tai = nam[i2:].strip()
             nam = nam[0:0 + i1].strip()
             if (tai is not None):
                 nam = "{0} {1}".format(nam, tai)
     if (tok.is_bank):
         org0_.add_type_str(
             ("bank" if tok.kit.base_language.is_en else "банк"))
         org0_.add_profile(OrgProfile.FINANCE)
         if ((t1.next0_ is not None and t1.next0_.is_value("OF", None)
              and t1.next0_.next0_ is not None)
                 and t1.next0_.next0_.chars.is_latin_letter):
             nam0 = OrgItemNameToken.try_attach(t1.next0_, None, False,
                                                False)
             if (nam0 is not None):
                 te = nam0.end_token
             else:
                 te = t1.next0_.next0_
             nam = MiscHelper.get_text_value(t0, te, GetTextAttr.NO)
             if (isinstance(te.get_referent(), GeoReferent)):
                 org0_._add_geo_object(
                     Utils.asObjectOrNull(te.get_referent(), GeoReferent))
         elif (t0 == t1):
             return None
     else:
         if (tok.short_value is not None):
             org0_.add_type_str(tok.short_value)
         org0_.add_type_str(tok.full_value)
     if (Utils.isNullOrEmpty(nam)):
         return None
     org0_.add_name(nam, True, None)
     if (alt_nam is not None):
         org0_.add_name(alt_nam, True, None)
     res = ReferentToken(org0_, t0, te)
     t = te
     while t.next0_ is not None:
         if (t.next0_.is_char_of(",.")):
             t = t.next0_
         else:
             break
     if (t.whitespaces_after_count < 2):
         tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr)
         if (tok is not None):
             if (tok.short_value is not None):
                 org0_.add_type_str(tok.short_value)
             org0_.add_type_str(tok.full_value)
             res.end_token = tok.end_token
     if (geo_ is not None):
         org0_._add_geo_object(geo_)
     if (add_typ is not None):
         org0_.add_type(add_typ, False)
     if (not br):
         return res
     t = res.end_token
     if (t.next0_ is None or t.next0_.is_char(')')):
         res.end_token = t.next0_
     else:
         return None
     return res
Beispiel #11
0
 def tryParse(t: 'Token',
              loc_onto: 'IntOntologyCollection') -> 'NamedItemToken':
     if (t is None):
         return None
     if (isinstance(t, ReferentToken)):
         r = t.getReferent()
         if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or
              (isinstance(r, GeoReferent)))
                 or r.type_name == "ORGANIZATION"):
             return NamedItemToken._new1635(t, t, r, t.morph)
         return None
     typ = NamedItemToken.__m_types.tryParse(t, TerminParseAttr.NO)
     nam = NamedItemToken.__m_names.tryParse(t, TerminParseAttr.NO)
     if (typ is not None):
         if (not ((isinstance(t, TextToken)))):
             return None
         res = NamedItemToken._new1636(typ.begin_token, typ.end_token,
                                       typ.morph, typ.chars)
         res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind))
         res.type_value = typ.termin.canonic_text
         if ((nam is not None and nam.end_token == typ.end_token
              and not t.chars.is_all_lower) and (Utils.valToEnum(
                  nam.termin.tag, NamedEntityKind)) == res.kind):
             res.name_value = nam.termin.canonic_text
             res.is_wellknown = True
         return res
     if (nam is not None):
         if (nam.begin_token.chars.is_all_lower):
             return None
         res = NamedItemToken._new1636(nam.begin_token, nam.end_token,
                                       nam.morph, nam.chars)
         res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
         res.name_value = nam.termin.canonic_text
         ok = True
         if (not t.is_whitespace_before and t.previous is not None):
             ok = False
         elif (not t.is_whitespace_after and t.next0_ is not None):
             if (t.next0_.isCharOf(",.;!?")
                     and t.next0_.is_whitespace_after):
                 pass
             else:
                 ok = False
         if (ok):
             res.is_wellknown = True
             res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str))
         return res
     adj = MiscLocationHelper.tryAttachNordWest(t)
     if (adj is not None):
         if (adj.morph.class0_.is_noun):
             if (adj.end_token.isValue("ВОСТОК", None)):
                 if (adj.begin_token == adj.end_token):
                     return None
                 re = NamedItemToken._new1638(t, adj.end_token, adj.morph)
                 re.kind = NamedEntityKind.LOCATION
                 re.name_value = MiscHelper.getTextValue(
                     t, adj.end_token,
                     GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                 re.is_wellknown = True
                 return re
             return None
         if (adj.whitespaces_after_count > 2):
             return None
         if ((isinstance(adj.end_token.next0_, ReferentToken)) and
             (isinstance(adj.end_token.next0_.getReferent(), GeoReferent))):
             re = NamedItemToken._new1638(t, adj.end_token.next0_,
                                          adj.end_token.next0_.morph)
             re.kind = NamedEntityKind.LOCATION
             re.name_value = MiscHelper.getTextValue(
                 t, adj.end_token.next0_,
                 GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
             re.is_wellknown = True
             re.ref = adj.end_token.next0_.getReferent()
             return re
         res = NamedItemToken.tryParse(adj.end_token.next0_, loc_onto)
         if (res is not None and res.kind == NamedEntityKind.LOCATION):
             s = adj.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                       res.morph.gender, False)
             if (s is not None):
                 if (res.name_value is None):
                     res.name_value = s.upper()
                 else:
                     res.name_value = "{0} {1}".format(
                         s.upper(), res.name_value)
                     res.type_value = (None)
                 res.begin_token = t
                 res.chars = t.chars
                 res.is_wellknown = True
                 return res
     if (t.chars.is_capital_upper
             and not MiscHelper.canBeStartOfSentence(t)):
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt is not None and len(npt.adjectives) > 0):
             test = NamedItemToken.tryParse(npt.noun.begin_token, loc_onto)
             if (test is not None and test.end_token == npt.end_token
                     and test.type_value is not None):
                 test.begin_token = t
                 tmp = io.StringIO()
                 for a in npt.adjectives:
                     s = a.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                             test.morph.gender, False)
                     if (tmp.tell() > 0):
                         print(' ', end="", file=tmp)
                     print(s, end="", file=tmp)
                 test.name_value = Utils.toStringStringIO(tmp)
                 test.chars = t.chars
                 if (test.kind == NamedEntityKind.LOCATION):
                     test.is_wellknown = True
                 return test
     if ((BracketHelper.isBracket(t, True) and t.next0_ is not None
          and t.next0_.chars.is_letter)
             and not t.next0_.chars.is_all_lower):
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             res = NamedItemToken(t, br.end_token)
             res.is_in_bracket = True
             res.name_value = MiscHelper.getTextValue(
                 t, br.end_token, GetTextAttr.NO)
             nam = NamedItemToken.__m_names.tryParse(
                 t.next0_, TerminParseAttr.NO)
             if (nam is not None
                     and nam.end_token == br.end_token.previous):
                 res.kind = (Utils.valToEnum(nam.termin.tag,
                                             NamedEntityKind))
                 res.is_wellknown = True
                 res.name_value = nam.termin.canonic_text
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter
          and not t.chars.is_all_lower) and t.length_char > 2):
         res = NamedItemToken._new1638(t, t, t.morph)
         str0_ = (t).term
         if (str0_.endswith("О") or str0_.endswith("И")
                 or str0_.endswith("Ы")):
             res.name_value = str0_
         else:
             res.name_value = t.getNormalCaseText(None, False,
                                                  MorphGender.UNDEFINED,
                                                  False)
         res.chars = t.chars
         if (((not t.is_whitespace_after and t.next0_ is not None
               and t.next0_.is_hiphen) and
              (isinstance(t.next0_.next0_, TextToken))
              and not t.next0_.next0_.is_whitespace_after)
                 and t.chars.is_cyrillic_letter
                 == t.next0_.next0_.chars.is_cyrillic_letter):
             res.end_token = t.next0_.next0_
             t = res.end_token
             res.name_value = "{0}-{1}".format(
                 res.name_value,
                 t.getNormalCaseText(None, False, MorphGender.UNDEFINED,
                                     False))
         return res
     return None
 def __try_parse(t: 'Token', lev: int) -> 'BookLinkToken':
     if (t is None or lev > 3):
         return None
     if (t.is_char('[')):
         re = BookLinkToken.__try_parse(t.next0_, lev + 1)
         if (re is not None and re.end_token.next0_ is not None
                 and re.end_token.next0_.is_char(']')):
             re.begin_token = t
             re.end_token = re.end_token.next0_
             return re
         if (re is not None and re.end_token.is_char(']')):
             re.begin_token = t
             return re
         if (re is not None):
             if (re.typ == BookLinkTyp.SOSTAVITEL
                     or re.typ == BookLinkTyp.EDITORS):
                 return re
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             if ((isinstance(br.end_token.previous, NumberToken))
                     and (br.length_char < 30)):
                 return BookLinkToken._new329(
                     t, br.end_token, BookLinkTyp.NUMBER,
                     MiscHelper.get_text_value(br.begin_token.next0_,
                                               br.end_token.previous,
                                               GetTextAttr.NO))
     t0 = t
     if (isinstance(t, ReferentToken)):
         if (isinstance(t.get_referent(), PersonReferent)):
             return BookLinkToken.try_parse_author(
                 t, FioTemplateType.UNDEFINED)
         if (isinstance(t.get_referent(), GeoReferent)):
             return BookLinkToken._new326(t, t, BookLinkTyp.GEO,
                                          t.get_referent())
         if (isinstance(t.get_referent(), DateReferent)):
             dr = Utils.asObjectOrNull(t.get_referent(), DateReferent)
             if (len(dr.slots) == 1 and dr.year > 0):
                 return BookLinkToken._new329(t, t, BookLinkTyp.YEAR,
                                              str(dr.year))
             if (dr.year > 0 and t.previous is not None
                     and t.previous.is_comma):
                 return BookLinkToken._new329(t, t, BookLinkTyp.YEAR,
                                              str(dr.year))
         if (isinstance(t.get_referent(), OrganizationReferent)):
             org0_ = Utils.asObjectOrNull(t.get_referent(),
                                          OrganizationReferent)
             if (org0_.kind == OrganizationKind.PRESS):
                 return BookLinkToken._new326(t, t, BookLinkTyp.PRESS,
                                              org0_)
         if (isinstance(t.get_referent(), UriReferent)):
             uri = Utils.asObjectOrNull(t.get_referent(), UriReferent)
             if ((uri.scheme == "http" or uri.scheme == "https"
                  or uri.scheme == "ftp") or uri.scheme is None):
                 return BookLinkToken._new326(t, t, BookLinkTyp.URL, uri)
     tok_ = BookLinkToken.__m_termins.try_parse(t, TerminParseAttr.NO)
     if (tok_ is not None):
         typ_ = Utils.valToEnum(tok_.termin.tag, BookLinkTyp)
         ok = True
         if (typ_ == BookLinkTyp.TYPE or typ_ == BookLinkTyp.NAMETAIL
                 or typ_ == BookLinkTyp.ELECTRONRES):
             if (t.previous is not None and
                 ((t.previous.is_char_of(".:[") or t.previous.is_hiphen))):
                 pass
             else:
                 ok = False
         if (ok):
             return BookLinkToken._new329(t, tok_.end_token, typ_,
                                          tok_.termin.canonic_text)
         if (typ_ == BookLinkTyp.ELECTRONRES):
             tt = tok_.end_token.next0_
             first_pass3019 = True
             while True:
                 if first_pass3019: first_pass3019 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if ((isinstance(tt, TextToken))
                         and not tt.chars.is_letter):
                     continue
                 if (isinstance(tt.get_referent(), UriReferent)):
                     return BookLinkToken._new326(t, tt,
                                                  BookLinkTyp.ELECTRONRES,
                                                  tt.get_referent())
                 break
     if (t.is_char('/')):
         res = BookLinkToken._new329(t, t, BookLinkTyp.DELIMETER, "/")
         if (t.next0_ is not None and t.next0_.is_char('/')):
             res.end_token = t.next0_
             res.value = "//"
         if (not t.is_whitespace_before and not t.is_whitespace_after):
             coo = 3
             no = True
             tt = t.next0_
             while tt is not None and coo > 0:
                 vvv = BookLinkToken.try_parse(tt, lev + 1)
                 if (vvv is not None and vvv.typ != BookLinkTyp.NUMBER):
                     no = False
                     break
                 tt = tt.next0_
                 coo -= 1
             if (no):
                 return None
         return res
     if ((isinstance(t, NumberToken)) and t.int_value is not None
             and t.typ == NumberSpellingType.DIGIT):
         res = BookLinkToken._new329(t, t, BookLinkTyp.NUMBER, str(t.value))
         val = t.int_value
         if (val >= 1930 and (val < 2030)):
             res.typ = BookLinkTyp.YEAR
         if (t.next0_ is not None and t.next0_.is_char('.')):
             res.end_token = t.next0_
         elif ((t.next0_ is not None and t.next0_.length_char == 1
                and not t.next0_.chars.is_letter)
               and t.next0_.is_whitespace_after):
             res.end_token = t.next0_
         elif (isinstance(t.next0_, TextToken)):
             term = t.next0_.term
             if (((term == "СТР" or term == "C" or term == "С")
                  or term == "P" or term == "S") or term == "PAGES"):
                 res.end_token = t.next0_
                 res.typ = BookLinkTyp.PAGES
                 res.value = str(t.value)
         return res
     if (isinstance(t, TextToken)):
         term = t.term
         if (((((
             ((term == "СТР" or term == "C" or term == "С") or term == "ТОМ"
              or term == "T") or term == "Т" or term == "P") or term == "PP"
                or term == "V") or term == "VOL" or term == "S")
              or term == "СТОР" or t.is_value("PAGE", None))
                 or t.is_value("СТРАНИЦА", "СТОРІНКА")):
             tt = t.next0_
             while tt is not None:
                 if (tt.is_char_of(".:~")):
                     tt = tt.next0_
                 else:
                     break
             if (isinstance(tt, NumberToken)):
                 res = BookLinkToken._new328(t, tt, BookLinkTyp.PAGERANGE)
                 tt0 = tt
                 tt1 = tt
                 tt = tt.next0_
                 first_pass3020 = True
                 while True:
                     if first_pass3020: first_pass3020 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_char_of(",") or tt.is_hiphen):
                         if (isinstance(tt.next0_, NumberToken)):
                             tt = tt.next0_
                             res.end_token = tt
                             tt1 = tt
                             continue
                     break
                 res.value = MiscHelper.get_text_value(
                     tt0, tt1, GetTextAttr.NO)
                 return res
         if ((term == "M" or term == "М" or term == "СПБ") or term == "K"
                 or term == "К"):
             if (t.next0_ is not None and t.next0_.is_char_of(":;")):
                 re = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO)
                 return re
             if (t.next0_ is not None and t.next0_.is_char_of(".")):
                 res = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO)
                 if (t.next0_.next0_ is not None
                         and t.next0_.next0_.is_char_of(":;")):
                     res.end_token = t.next0_.next0_
                 elif (t.next0_.next0_ is not None
                       and (isinstance(t.next0_.next0_, NumberToken))):
                     pass
                 elif (t.next0_.next0_ is not None
                       and t.next0_.next0_.is_comma and
                       (isinstance(t.next0_.next0_.next0_, NumberToken))):
                     pass
                 else:
                     return None
                 return res
         if (term == "ПЕР" or term == "ПЕРЕВ" or term == "ПЕРЕВОД"):
             tt = t
             if (tt.next0_ is not None and tt.next0_.is_char('.')):
                 tt = tt.next0_
             if (tt.next0_ is not None
                     and ((tt.next0_.is_value("C", None)
                           or tt.next0_.is_value("С", None)))):
                 tt = tt.next0_
                 if (tt.next0_ is None or tt.whitespaces_after_count > 2):
                     return None
                 re = BookLinkToken._new328(t, tt.next0_,
                                            BookLinkTyp.TRANSLATE)
                 return re
         if (term == "ТАМ" or term == "ТАМЖЕ"):
             res = BookLinkToken._new328(t, t, BookLinkTyp.TAMZE)
             if (t.next0_ is not None and t.next0_.is_value("ЖЕ", None)):
                 res.end_token = t.next0_
             return res
         if (((term == "СМ" or term == "CM" or term == "НАПР")
              or term == "НАПРИМЕР" or term == "SEE") or term == "ПОДРОБНЕЕ"
                 or term == "ПОДРОБНО"):
             res = BookLinkToken._new328(t, t, BookLinkTyp.SEE)
             t = t.next0_
             first_pass3021 = True
             while True:
                 if first_pass3021: first_pass3021 = False
                 else: t = t.next0_
                 if (not (t is not None)): break
                 if (t.is_char_of(".:") or t.is_value("ALSO", None)):
                     res.end_token = t
                     continue
                 if (t.is_value("В", None) or t.is_value("IN", None)):
                     res.end_token = t
                     continue
                 vvv = BookLinkToken.__try_parse(t, lev + 1)
                 if (vvv is not None and vvv.typ == BookLinkTyp.SEE):
                     res.end_token = vvv.end_token
                     break
                 break
             return res
         if (term == "БОЛЕЕ"):
             vvv = BookLinkToken.__try_parse(t.next0_, lev + 1)
             if (vvv is not None and vvv.typ == BookLinkTyp.SEE):
                 vvv.begin_token = t
                 return vvv
         no = MiscHelper.check_number_prefix(t)
         if (isinstance(no, NumberToken)):
             return BookLinkToken._new328(t, no, BookLinkTyp.N)
         if (((term == "B" or term == "В"))
                 and (isinstance(t.next0_, NumberToken))
                 and (isinstance(t.next0_.next0_, TextToken))):
             term2 = t.next0_.next0_.term
             if (((term2 == "Т" or term2 == "T" or term2.startswith("ТОМ"))
                  or term2 == "TT" or term2 == "ТТ") or term2 == "КН"
                     or term2.startswith("КНИГ")):
                 return BookLinkToken._new328(t, t.next0_.next0_,
                                              BookLinkTyp.VOLUME)
     if (t.is_char('(')):
         if (((isinstance(t.next0_, NumberToken)) and t.next0_.int_value
              is not None and t.next0_.next0_ is not None)
                 and t.next0_.next0_.is_char(')')):
             num = t.next0_.int_value
             if (num > 1900 and num <= 2040):
                 if (num <= datetime.datetime.now().year):
                     return BookLinkToken._new329(t, t.next0_.next0_,
                                                  BookLinkTyp.YEAR,
                                                  str(num))
         if (((isinstance(t.next0_, ReferentToken)) and
              (isinstance(t.next0_.get_referent(), DateReferent))
              and t.next0_.next0_ is not None)
                 and t.next0_.next0_.is_char(')')):
             num = t.next0_.get_referent().year
             if (num > 0):
                 return BookLinkToken._new329(t, t.next0_.next0_,
                                              BookLinkTyp.YEAR, str(num))
     return None
Beispiel #13
0
 def tryAttachTerritory(
         li: typing.List['TerrItemToken'],
         ad: 'AnalyzerData',
         attach_always: bool = False,
         cits: typing.List['CityItemToken'] = None,
         exists: typing.List['GeoReferent'] = None) -> 'ReferentToken':
     if (li is None or len(li) == 0):
         return None
     ex_obj = None
     new_name = None
     adj_list = list()
     noun = None
     add_noun = None
     rt = TerrAttachHelper.__tryAttachMoscowAO(li, ad)
     if (rt is not None):
         return rt
     if (li[0].termin_item is not None
             and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"):
         res2 = TerrAttachHelper.__tryAttachPureTerr(li, ad)
         return res2
     if (len(li) == 2):
         if (li[0].rzd is not None and li[1].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._addName(li[1].rzd_dir)
             rzd._addTypTer(li[0].kit.base_language)
             rzd.addSlot(GeoReferent.ATTR_REF, li[0].rzd.referent, False, 0)
             rzd.addExtReferent(li[0].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
         if (li[1].rzd is not None and li[0].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._addName(li[0].rzd_dir)
             rzd._addTypTer(li[0].kit.base_language)
             rzd.addSlot(GeoReferent.ATTR_REF, li[1].rzd.referent, False, 0)
             rzd.addExtReferent(li[1].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
     can_be_city_before = False
     adj_terr_before = False
     if (cits is not None):
         if (cits[0].typ == CityItemToken.ItemType.CITY):
             can_be_city_before = True
         elif (cits[0].typ == CityItemToken.ItemType.NOUN
               and len(cits) > 1):
             can_be_city_before = True
     k = 0
     while k < len(li):
         if (li[k].onto_item is not None):
             if (ex_obj is not None or new_name is not None):
                 break
             if (noun is not None):
                 if (k == 1):
                     if (noun.termin_item.canonic_text == "РАЙОН"
                             or noun.termin_item.canonic_text == "ОБЛАСТЬ"
                             or noun.termin_item.canonic_text == "СОЮЗ"):
                         if (isinstance(li[k].onto_item.referent,
                                        GeoReferent)):
                             if ((li[k].onto_item.referent).is_state):
                                 break
                         ok = False
                         tt = li[k].end_token.next0_
                         if (tt is None):
                             ok = True
                         elif (tt.isCharOf(",.")):
                             ok = True
                         if (not ok):
                             ok = MiscLocationHelper.checkGeoObjectBefore(
                                 li[0].begin_token)
                         if (not ok):
                             adr = AddressItemToken.tryParse(
                                 tt, None, False, False, None)
                             if (adr is not None):
                                 if (adr.typ ==
                                         AddressItemToken.ItemType.STREET):
                                     ok = True
                         if (not ok):
                             break
                     if (li[k].onto_item is not None):
                         if (noun.begin_token.isValue("МО", None)
                                 or noun.begin_token.isValue("ЛО", None)):
                             return None
             ex_obj = li[k]
         elif (li[k].termin_item is not None):
             if (noun is not None):
                 break
             if (li[k].termin_item.is_always_prefix and k > 0):
                 break
             if (k > 0 and li[k].is_doubt):
                 if (li[k].begin_token == li[k].end_token
                         and li[k].begin_token.isValue("ЗАО", None)):
                     break
             if (li[k].termin_item.is_adjective
                     or li[k].is_geo_in_dictionary):
                 adj_list.append(li[k])
             else:
                 if (ex_obj is not None):
                     geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                                 GeoReferent)
                     if (geo_ is None):
                         break
                     if (ex_obj.is_adjective and
                         ((li[k].termin_item.canonic_text == "СОЮЗ" or
                           li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                         str0_ = str(ex_obj.onto_item)
                         if (not li[k].termin_item.canonic_text in str0_):
                             return None
                     if (li[k].termin_item.canonic_text == "РАЙОН"
                             or li[k].termin_item.canonic_text == "ОКРУГ"
                             or li[k].termin_item.canonic_text == "КРАЙ"):
                         tmp = io.StringIO()
                         for s in geo_.slots:
                             if (s.type_name == GeoReferent.ATTR_TYPE):
                                 print("{0};".format(s.value),
                                       end="",
                                       file=tmp,
                                       flush=True)
                         if (not li[k].termin_item.canonic_text
                                 in Utils.toStringStringIO(tmp).upper()):
                             if (k != 1 or new_name is not None):
                                 break
                             new_name = li[0]
                             new_name.is_adjective = True
                             new_name.onto_item = (None)
                             ex_obj = (None)
                 noun = li[k]
                 if (k == 0):
                     tt = TerrItemToken.tryParse(li[k].begin_token.previous,
                                                 None, True, False)
                     if (tt is not None and tt.morph.class0_.is_adjective):
                         adj_terr_before = True
         else:
             if (ex_obj is not None):
                 break
             if (new_name is not None):
                 break
             new_name = li[k]
         k += 1
     name = None
     alt_name = None
     full_name = None
     morph_ = None
     if (ex_obj is not None):
         if (ex_obj.is_adjective and not ex_obj.morph.language.is_en
                 and noun is None):
             if (attach_always and ex_obj.end_token.next0_ is not None):
                 npt = NounPhraseHelper.tryParse(ex_obj.begin_token,
                                                 NounPhraseParseAttr.NO, 0)
                 if (ex_obj.end_token.next0_.is_comma_and):
                     pass
                 elif (npt is None):
                     pass
                 else:
                     str0_ = StreetItemToken.tryParse(
                         ex_obj.end_token.next0_, None, False, None, False)
                     if (str0_ is not None):
                         if (str0_.typ == StreetItemType.NOUN
                                 and str0_.end_token == npt.end_token):
                             return None
             else:
                 cit = CityItemToken.tryParse(ex_obj.end_token.next0_, None,
                                              False, None)
                 if (cit is not None
                         and ((cit.typ == CityItemToken.ItemType.NOUN
                               or cit.typ == CityItemToken.ItemType.CITY))):
                     npt = NounPhraseHelper.tryParse(
                         ex_obj.begin_token, NounPhraseParseAttr.NO, 0)
                     if (npt is not None
                             and npt.end_token == cit.end_token):
                         pass
                     else:
                         return None
                 elif (ex_obj.begin_token.isValue("ПОДНЕБЕСНЫЙ", None)):
                     pass
                 else:
                     return None
         if (noun is None and ex_obj.can_be_city):
             cit0 = CityItemToken.tryParseBack(ex_obj.begin_token.previous)
             if (cit0 is not None
                     and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                 return None
         if (ex_obj.is_doubt and noun is None):
             ok2 = False
             if (TerrAttachHelper.__canBeGeoAfter(ex_obj.end_token.next0_)):
                 ok2 = True
             elif (not ex_obj.can_be_surname and not ex_obj.can_be_city):
                 if ((ex_obj.end_token.next0_ is not None
                      and ex_obj.end_token.next0_.isChar(')')
                      and ex_obj.begin_token.previous is not None)
                         and ex_obj.begin_token.previous.isChar('(')):
                     ok2 = True
                 elif (ex_obj.chars.is_latin_letter
                       and ex_obj.begin_token.previous is not None):
                     if (ex_obj.begin_token.previous.isValue("IN", None)):
                         ok2 = True
                     elif (ex_obj.begin_token.previous.isValue("THE", None)
                           and ex_obj.begin_token.previous.previous
                           is not None
                           and ex_obj.begin_token.previous.previous.isValue(
                               "IN", None)):
                         ok2 = True
             if (not ok2):
                 cit0 = CityItemToken.tryParseBack(
                     ex_obj.begin_token.previous)
                 if (cit0 is not None
                         and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                     pass
                 elif (MiscLocationHelper.checkGeoObjectBefore(
                         ex_obj.begin_token.previous)):
                     pass
                 else:
                     return None
         name = ex_obj.onto_item.canonic_text
         morph_ = ex_obj.morph
     elif (new_name is not None):
         if (noun is None):
             return None
         j = 1
         while j < k:
             if (li[j].is_newline_before and not li[0].is_newline_before):
                 return None
             j += 1
         morph_ = noun.morph
         if (new_name.is_adjective):
             if (noun.termin_item.acronym == "АО"):
                 if (noun.begin_token != noun.end_token):
                     return None
                 if (new_name.morph.gender != MorphGender.FEMINIE):
                     return None
             geo_before = None
             tt0 = li[0].begin_token.previous
             if (tt0 is not None and tt0.is_comma_and):
                 tt0 = tt0.previous
             if (not li[0].is_newline_before and tt0 is not None):
                 geo_before = (Utils.asObjectOrNull(tt0.getReferent(),
                                                    GeoReferent))
             if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList(
                     li, new_name, 0)):
                 if (noun.termin_item.is_state):
                     return None
                 if (new_name.can_be_surname and geo_before is None):
                     if (((noun.morph.case_)
                          & new_name.morph.case_).is_undefined):
                         return None
                 if (MiscHelper.isExistsInDictionary(
                         new_name.begin_token, new_name.end_token,
                     (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                         | MorphClass.VERB)):
                     if (noun.begin_token != new_name.begin_token):
                         if (geo_before is None):
                             if (len(li) == 2
                                     and TerrAttachHelper.__canBeGeoAfter(
                                         li[1].end_token.next0_)):
                                 pass
                             elif (len(li) == 3
                                   and li[2].termin_item is not None
                                   and TerrAttachHelper.__canBeGeoAfter(
                                       li[2].end_token.next0_)):
                                 pass
                             elif (new_name.is_geo_in_dictionary):
                                 pass
                             elif (new_name.end_token.is_newline_after):
                                 pass
                             else:
                                 return None
                 npt = NounPhraseHelper.tryParse(
                     new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS,
                     0)
                 if (npt is not None
                         and npt.end_token != new_name.end_token):
                     if (len(li) >= 3 and li[2].termin_item is not None
                             and npt.end_token == li[2].end_token):
                         add_noun = li[2]
                     else:
                         return None
                 rtp = new_name.kit.processReferent("PERSON",
                                                    new_name.begin_token)
                 if (rtp is not None):
                     return None
                 name = ProperNameHelper.getNameEx(new_name.begin_token,
                                                   new_name.end_token,
                                                   MorphClass.ADJECTIVE,
                                                   MorphCase.UNDEFINED,
                                                   noun.termin_item.gender,
                                                   False, False)
             else:
                 ok = False
                 if (((k + 1) < len(li)) and li[k].termin_item is None
                         and li[k + 1].termin_item is not None):
                     ok = True
                 elif ((k < len(li)) and li[k].onto_item is not None):
                     ok = True
                 elif (k == len(li) and not new_name.is_adj_in_dictionary):
                     ok = True
                 elif (MiscLocationHelper.checkGeoObjectBefore(
                         li[0].begin_token) or can_be_city_before):
                     ok = True
                 elif (MiscLocationHelper.checkGeoObjectAfter(
                         li[k - 1].end_token)):
                     ok = True
                 elif (len(li) == 3 and k == 2):
                     cit = CityItemToken.tryParse(li[2].begin_token, None,
                                                  False, None)
                     if (cit is not None):
                         if (cit.typ == CityItemToken.ItemType.CITY
                                 or cit.typ == CityItemToken.ItemType.NOUN):
                             ok = True
                 elif (len(li) == 2):
                     ok = TerrAttachHelper.__canBeGeoAfter(
                         li[len(li) - 1].end_token.next0_)
                 if (not ok and not li[0].is_newline_before
                         and not li[0].chars.is_all_lower):
                     rt00 = li[0].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt00 is not None):
                         ok = True
                 if (noun.termin_item is not None
                         and noun.termin_item.is_strong
                         and new_name.is_adjective):
                     ok = True
                 if (noun.is_doubt and len(adj_list) == 0
                         and geo_before is None):
                     return None
                 name = ProperNameHelper.getNameEx(new_name.begin_token,
                                                   new_name.end_token,
                                                   MorphClass.ADJECTIVE,
                                                   MorphCase.UNDEFINED,
                                                   noun.termin_item.gender,
                                                   False, False)
                 if (not ok and not attach_always):
                     if (MiscHelper.isExistsInDictionary(
                             new_name.begin_token, new_name.end_token,
                         (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                             | MorphClass.VERB)):
                         if (exists is not None):
                             for e0_ in exists:
                                 if (e0_.findSlot(GeoReferent.ATTR_NAME,
                                                  name, True) is not None):
                                     ok = True
                                     break
                         if (not ok):
                             return None
                 full_name = "{0} {1}".format(
                     ProperNameHelper.getNameEx(li[0].begin_token,
                                                noun.begin_token.previous,
                                                MorphClass.ADJECTIVE,
                                                MorphCase.UNDEFINED,
                                                noun.termin_item.gender,
                                                False, False),
                     noun.termin_item.canonic_text)
         else:
             if (not attach_always or
                 ((noun.termin_item is not None
                   and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                 is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter
                 if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList(
                         li, new_name, 0)):
                     if (not is_latin):
                         return None
                 if (not new_name.is_district_name
                         and not BracketHelper.canBeStartOfSequence(
                             new_name.begin_token, False, False)):
                     if (len(adj_list) == 0
                             and MiscHelper.isExistsInDictionary(
                                 new_name.begin_token, new_name.end_token,
                                 (MorphClass.NOUN) | MorphClass.PRONOUN)):
                         if (len(li) == 2 and noun.is_city_region
                                 and (noun.whitespaces_after_count < 2)):
                             pass
                         else:
                             return None
                     if (not is_latin):
                         if ((noun.termin_item.is_region
                              and not attach_always and
                              ((not adj_terr_before or new_name.is_doubt)))
                                 and not noun.is_city_region and
                                 not noun.termin_item.is_specific_prefix):
                             if (not MiscLocationHelper.
                                     checkGeoObjectBefore(
                                         noun.begin_token)):
                                 if (not noun.is_doubt and noun.begin_token
                                         != noun.end_token):
                                     pass
                                 else:
                                     return None
                         if (noun.is_doubt and len(adj_list) == 0):
                             if (((noun.termin_item.acronym == "МО"
                                   or noun.termin_item.acronym == "ЛО"))
                                     and k == (len(li) - 1)
                                     and li[k].termin_item is not None):
                                 add_noun = li[k]
                                 k += 1
                             else:
                                 return None
                         pers = new_name.kit.processReferent(
                             "PERSON", new_name.begin_token)
                         if (pers is not None):
                             return None
             name = MiscHelper.getTextValue(new_name.begin_token,
                                            new_name.end_token,
                                            GetTextAttr.NO)
             if (new_name.begin_token != new_name.end_token):
                 ttt = new_name.begin_token.next0_
                 while ttt is not None and ttt.end_char <= new_name.end_char:
                     if (ttt.chars.is_letter):
                         ty = TerrItemToken.tryParse(
                             ttt, None, False, False)
                         if ((ty is not None and ty.termin_item is not None
                              and noun is not None)
                                 and ((noun.termin_item.canonic_text
                                       in ty.termin_item.canonic_text
                                       or ty.termin_item.canonic_text
                                       in noun.termin_item.canonic_text))):
                             name = MiscHelper.getTextValue(
                                 new_name.begin_token, ttt.previous,
                                 GetTextAttr.NO)
                             break
                     ttt = ttt.next0_
             if (len(adj_list) > 0):
                 npt = NounPhraseHelper.tryParse(adj_list[0].begin_token,
                                                 NounPhraseParseAttr.NO, 0)
                 if (npt is not None and npt.end_token == noun.end_token):
                     alt_name = "{0} {1}".format(
                         npt.getNormalCaseText(None, False,
                                               MorphGender.UNDEFINED,
                                               False), name)
     else:
         if ((len(li) == 1 and noun is not None
              and noun.end_token.next0_ is not None) and (isinstance(
                  noun.end_token.next0_.getReferent(), GeoReferent))):
             g = Utils.asObjectOrNull(noun.end_token.next0_.getReferent(),
                                      GeoReferent)
             if (noun.termin_item is not None):
                 tyy = noun.termin_item.canonic_text.lower()
                 ooo = False
                 if (g.findSlot(GeoReferent.ATTR_TYPE, tyy, True)
                         is not None):
                     ooo = True
                 elif (tyy.endswith("район") and g.findSlot(
                         GeoReferent.ATTR_TYPE, "район", True) is not None):
                     ooo = True
                 if (ooo):
                     return ReferentToken._new719(g, noun.begin_token,
                                                  noun.end_token.next0_,
                                                  noun.begin_token.morph)
         if ((len(li) == 1 and noun == li[0]
              and li[0].termin_item is not None) and TerrItemToken.tryParse(
                  li[0].end_token.next0_, None, True, False) is None
                 and TerrItemToken.tryParse(li[0].begin_token.previous,
                                            None, True, False) is None):
             if (li[0].morph.number == MorphNumber.PLURAL):
                 return None
             cou = 0
             str0_ = li[0].termin_item.canonic_text.lower()
             tt = li[0].begin_token.previous
             first_pass2898 = True
             while True:
                 if first_pass2898: first_pass2898 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after):
                     cou += 10
                 else:
                     cou += 1
                 if (cou > 500):
                     break
                 g = Utils.asObjectOrNull(tt.getReferent(), GeoReferent)
                 if (g is None):
                     continue
                 ok = True
                 cou = 0
                 tt = li[0].end_token.next0_
                 first_pass2899 = True
                 while True:
                     if first_pass2899: first_pass2899 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_newline_before):
                         cou += 10
                     else:
                         cou += 1
                     if (cou > 500):
                         break
                     tee = TerrItemToken.tryParse(tt, None, True, False)
                     if (tee is None):
                         continue
                     ok = False
                     break
                 if (ok):
                     ii = 0
                     while g is not None and (ii < 3):
                         if (g.findSlot(GeoReferent.ATTR_TYPE, str0_, True)
                                 is not None):
                             return ReferentToken._new719(
                                 g, li[0].begin_token, li[0].end_token,
                                 noun.begin_token.morph)
                         g = g.higher
                         ii += 1
                 break
         return None
     ter = None
     if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))):
         ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent))
     else:
         ter = GeoReferent()
         if (ex_obj is not None):
             geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                         GeoReferent)
             if (geo_ is not None and not geo_.is_city):
                 ter._mergeSlots2(geo_, li[0].kit.base_language)
             else:
                 ter._addName(name)
             if (noun is None and ex_obj.can_be_city):
                 ter._addTypCity(li[0].kit.base_language)
             else:
                 pass
         elif (new_name is not None):
             ter._addName(name)
             if (alt_name is not None):
                 ter._addName(alt_name)
         if (noun is not None):
             if (noun.termin_item.canonic_text == "АО"):
                 ter._addTyp(
                     ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua
                      else "АВТОНОМНЫЙ ОКРУГ"))
             elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ"
                   or noun.termin_item.canonic_text
                   == "МУНІЦИПАЛЬНЕ ЗБОРИ"):
                 ter._addTyp(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ"
                              if li[0].kit.base_language.is_ua else
                              "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"))
             elif (noun.termin_item.acronym == "МО"
                   and add_noun is not None):
                 ter._addTyp(add_noun.termin_item.canonic_text)
             else:
                 if (noun.termin_item.canonic_text == "СОЮЗ"
                         and ex_obj is not None
                         and ex_obj.end_char > noun.end_char):
                     return ReferentToken._new719(ter, ex_obj.begin_token,
                                                  ex_obj.end_token,
                                                  ex_obj.morph)
                 ter._addTyp(noun.termin_item.canonic_text)
                 if (noun.termin_item.is_region and ter.is_state):
                     ter._addTypReg(li[0].kit.base_language)
         if (ter.is_state and ter.is_region):
             for a in adj_list:
                 if (a.termin_item.is_region):
                     ter._addTypReg(li[0].kit.base_language)
                     break
         if (ter.is_state):
             if (full_name is not None):
                 ter._addName(full_name)
     res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token)
     if (noun is not None and noun.morph.class0_.is_noun):
         res.morph = noun.morph
     else:
         res.morph = MorphCollection()
         ii = 0
         while ii < k:
             for v in li[ii].morph.items:
                 bi = MorphBaseInfo(v)
                 if (noun is not None):
                     if (bi.class0_.is_adjective):
                         bi.class0_ = MorphClass.NOUN
                 res.morph.addItem(bi)
             ii += 1
     if (li[0].termin_item is not None
             and li[0].termin_item.is_specific_prefix):
         res.begin_token = li[0].end_token.next0_
     if (add_noun is not None and add_noun.end_char > res.end_char):
         res.end_token = add_noun.end_token
     if ((isinstance(res.begin_token.previous, TextToken))
             and (res.whitespaces_before_count < 2)):
         tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken)
         if (tt.term == "АР"):
             for ty in ter.typs:
                 if ("республика" in ty or "республіка" in ty):
                     res.begin_token = tt
                     break
     return res
Beispiel #14
0
 def _canBeHigherToken(rhi: 'Token', rlo: 'Token') -> bool:
     if (rhi is None or rlo is None):
         return False
     if (rhi.morph.case_.is_instrumental
             and not rhi.morph.case_.is_genitive):
         return False
     hi = Utils.asObjectOrNull(rhi.getReferent(), GeoReferent)
     lo = Utils.asObjectOrNull(rlo.getReferent(), GeoReferent)
     if (hi is None or lo is None):
         return False
     citi_in_reg = False
     if (hi.is_city and lo.is_region):
         if (hi.findSlot(GeoReferent.ATTR_TYPE, "город", True) is not None
                 or hi.findSlot(GeoReferent.ATTR_TYPE, "місто", True)
                 is not None or hi.findSlot(GeoReferent.ATTR_TYPE, "city",
                                            True) is not None):
             s = GeoOwnerHelper.__getTypesString(lo)
             if ((("район" in s or "административный округ" in s
                   or "муниципальный округ" in s)
                  or "адміністративний округ" in s
                  or "муніципальний округ" in s) or lo.findSlot(
                      GeoReferent.ATTR_TYPE, "округ", True) is not None):
                 if (rhi.next0_ == rlo and rlo.morph.case_.is_genitive):
                     citi_in_reg = True
     if (hi.is_region and lo.is_city):
         if (lo.findSlot(GeoReferent.ATTR_TYPE, "город", True) is not None
                 or lo.findSlot(GeoReferent.ATTR_TYPE, "місто", True)
                 is not None or lo.findSlot(GeoReferent.ATTR_TYPE, "city",
                                            True) is not None):
             s = GeoOwnerHelper.__getTypesString(hi)
             if (s == "район;"):
                 if (hi.higher is not None and hi.higher.is_region):
                     citi_in_reg = True
                 elif (rhi.end_char <= rlo.begin_char
                       and rhi.next0_.is_comma
                       and not rlo.morph.case_.is_genitive):
                     citi_in_reg = True
                 elif (rhi.end_char <= rlo.begin_char
                       and rhi.next0_.is_comma):
                     citi_in_reg = True
         else:
             citi_in_reg = True
     if (rhi.end_char <= rlo.begin_char):
         if (not rhi.morph.class0_.is_adjective):
             if (hi.is_state and not rhi.chars.is_latin_letter):
                 return False
         if (rhi.is_newline_after or rlo.is_newline_before):
             if (not citi_in_reg):
                 return False
     else:
         pass
     if (rlo.previous is not None
             and rlo.previous.morph.class0_.is_preposition):
         if (rlo.previous.morph.language.is_ua):
             if ((rlo.previous.isValue("У", None)
                  and not rlo.morph.case_.is_dative
                  and not rlo.morph.case_.is_prepositional)
                     and not rlo.morph.case_.is_undefined):
                 return False
             if (rlo.previous.isValue("З", None)
                     and not rlo.morph.case_.is_genitive
                     and not rlo.morph.case_.is_undefined):
                 return False
         else:
             if ((rlo.previous.isValue("В", None)
                  and not rlo.morph.case_.is_dative
                  and not rlo.morph.case_.is_prepositional)
                     and not rlo.morph.case_.is_undefined):
                 return False
             if (rlo.previous.isValue("ИЗ", None)
                     and not rlo.morph.case_.is_genitive
                     and not rlo.morph.case_.is_undefined):
                 return False
     if (not GeoOwnerHelper.canBeHigher(hi, lo)):
         return citi_in_reg
     return True
Beispiel #15
0
 def toString(self,
              short_variant: bool,
              lang: 'MorphLang',
              lev: int = 0) -> str:
     res = io.StringIO()
     ki = self.kind
     str0_ = (Utils.asObjectOrNull(
         MetaInstrumentBlock.GLOBAL_META.kind_feature.
         convertInnerValueToOuterValue(Utils.enumToString(ki), lang), str))
     if (str0_ is not None):
         print(str0_, end="", file=res)
         if (self.kind2 != InstrumentKind.UNDEFINED):
             str0_ = (Utils.asObjectOrNull(
                 MetaInstrumentBlock.GLOBAL_META.kind_feature.
                 convertInnerValueToOuterValue(
                     Utils.enumToString(self.kind2), lang), str))
             if (str0_ is not None):
                 print(" ({0})".format(str0_), end="", file=res, flush=True)
     if (self.number > 0):
         if (ki == InstrumentKind.TABLE):
             print(" {0} строк, {1} столбцов".format(
                 len(self.children), self.number),
                   end="",
                   file=res,
                   flush=True)
         else:
             print(" №{0}".format(self.number),
                   end="",
                   file=res,
                   flush=True)
             if (self.sub_number > 0):
                 print(".{0}".format(self.sub_number),
                       end="",
                       file=res,
                       flush=True)
                 if (self.sub_number2 > 0):
                     print(".{0}".format(self.sub_number2),
                           end="",
                           file=res,
                           flush=True)
                     if (self.sub_number3 > 0):
                         print(".{0}".format(self.sub_number3),
                               end="",
                               file=res,
                               flush=True)
             if (self.min_number > 0):
                 for i in range(res.tell() - 1, -1, -1):
                     if (Utils.getCharAtStringIO(res, i) == ' '
                             or Utils.getCharAtStringIO(res, i) == '.'):
                         Utils.insertStringIO(
                             res, i + 1, "{0}-".format(self.min_number))
                         break
     ignore_ref = False
     if (self.is_expired):
         print(" (утратить силу)", end="", file=res)
         ignore_ref = True
     elif (ki != InstrumentKind.EDITIONS and ki != InstrumentKind.APPROVED
           and (isinstance(self.ref, DecreeReferent))):
         print(" (*)", end="", file=res)
         ignore_ref = True
     str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_NAME)
     if ((str0_) is None):
         str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_VALUE)
     if (str0_ is not None):
         if (len(str0_) > 100):
             str0_ = (str0_[0:0 + 100] + "...")
         print(" \"{0}\"".format(str0_), end="", file=res, flush=True)
     elif (not ignore_ref and (isinstance(self.ref, Referent))
           and (lev < 30)):
         print(" \"{0}\"".format(
             self.ref.toString(short_variant, lang, lev + 1)),
               end="",
               file=res,
               flush=True)
     return Utils.toStringStringIO(res).strip()
Beispiel #16
0
 def __try1(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
            ad: 'AnalyzerDataWithOntology') -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 1)):
         return None
     elif (li[0].typ != CityItemToken.ItemType.CITY):
         if (len(li) != 2 or li[0].typ != CityItemToken.ItemType.PROPERNAME
                 or li[1].typ != CityItemToken.ItemType.NOUN):
             return None
     i = 1
     oi.value = li[0].onto_item
     ok = not li[0].doubtful
     if ((ok and li[0].onto_item is not None
          and li[0].onto_item.misc_attr is None) and ad is not None):
         if (li[0].onto_item.owner != ad.local_ontology
                 and not li[0].onto_item.owner.is_ext_ontology):
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 pass
             else:
                 ok = False
     if (len(li) == 1 and li[0].begin_token.morph.class0_.is_adjective):
         sits = StreetItemToken.tryParseList(li[0].begin_token, None, 3)
         if (sits is not None and len(sits) == 2
                 and sits[1].typ == StreetItemType.NOUN):
             return None
     typ = None
     alttyp = None
     mc = li[0].morph
     if (i < len(li)):
         if (li[i].typ == CityItemToken.ItemType.NOUN):
             at = None
             if (not li[i].chars.is_all_lower
                     and (li[i].whitespaces_after_count < 2)):
                 sit = StreetItemToken.tryParse(li[i].end_token.next0_,
                                                None, False, None, False)
                 if (sit is not None and sit.typ == StreetItemType.NOUN):
                     at = AddressItemToken.tryParse(li[i].begin_token, None,
                                                    False, False, None)
                     if (at is not None):
                         at2 = AddressItemToken.tryParse(
                             li[i].end_token.next0_, None, False, False,
                             None)
                         if (at2 is not None and at2.typ
                                 == AddressItemToken.ItemType.STREET):
                             at = (None)
             if (at is None):
                 typ = li[i].value
                 alttyp = li[i].alt_value
                 if (li[i].begin_token.isValue("СТ", None)
                         and li[i].begin_token.chars.is_all_upper):
                     return None
                 if ((i + 1) == len(li)):
                     ok = True
                     if (not li[i].morph.case_.is_undefined):
                         mc = li[i].morph
                     i += 1
                 elif (ok):
                     i += 1
                 else:
                     tt0 = li[0].begin_token.previous
                     if ((isinstance(tt0, TextToken))
                             and (tt0.whitespaces_after_count < 3)):
                         if (tt0.isValue("МЭР", "МЕР")
                                 or tt0.isValue("ГЛАВА", None)
                                 or tt0.isValue("ГРАДОНАЧАЛЬНИК", None)):
                             ok = True
                             i += 1
     if (not ok and oi.value is not None
             and (len(oi.value.canonic_text) < 4)):
         return None
     if (not ok and li[0].begin_token.morph.class0_.is_proper_name):
         return None
     if (not ok):
         if (not MiscHelper.isExistsInDictionary(
                 li[0].begin_token, li[0].end_token, (MorphClass.ADJECTIVE)
                 | MorphClass.NOUN | MorphClass.PRONOUN)):
             ok = (li[0].geo_object_before or li[i - 1].geo_object_after)
             if (ok and li[0].begin_token == li[0].end_token):
                 mcc = li[0].begin_token.getMorphClassInDictionary()
                 if (mcc.is_proper_name or mcc.is_proper_surname):
                     ok = False
                 elif (li[0].geo_object_before
                       and (li[0].whitespaces_after_count < 2)):
                     ad1 = AddressItemToken.tryParse(
                         li[0].begin_token, None, False, False, None)
                     if (ad1 is not None and ad1.typ
                             == AddressItemToken.ItemType.STREET):
                         ad2 = AddressItemToken.tryParse(
                             li[0].end_token.next0_, None, False, False,
                             None)
                         if (ad2 is None or ad2.typ !=
                                 AddressItemToken.ItemType.STREET):
                             ok = False
                     elif (AddressItemToken.tryAttachOrg(li[0].begin_token)
                           is not None):
                         ok = False
         if (ok):
             if (li[0].kit.processReferent("PERSON", li[0].begin_token)
                     is not None):
                 ok = False
     if (not ok):
         ok = CityAttachHelper.checkYearAfter(li[0].end_token.next0_)
     if (not ok and ((not li[0].begin_token.morph.class0_.is_adjective
                      or li[0].begin_token != li[0].end_token))):
         ok = CityAttachHelper.checkCityAfter(li[0].end_token.next0_)
     if (not ok):
         return None
     if (i < len(li)):
         del li[i:i + len(li) - i]
     rt = None
     if (oi.value is None):
         if (li[0].value is not None and li[0].higher_geo is not None):
             cap = GeoReferent()
             cap._addName(li[0].value)
             cap._addTypCity(li[0].kit.base_language)
             cap.higher = li[0].higher_geo
             if (typ is not None):
                 cap._addTyp(typ)
             if (alttyp is not None):
                 cap._addTyp(alttyp)
             rt = ReferentToken(cap, li[0].begin_token, li[0].end_token)
         else:
             if (li[0].value is None):
                 return None
             if (typ is None):
                 if ((len(li) == 1
                      and li[0].begin_token.previous is not None
                      and li[0].begin_token.previous.is_hiphen) and
                     (isinstance(li[0].begin_token.previous.previous,
                                 ReferentToken)) and
                     (isinstance(
                         li[0].begin_token.previous.previous.getReferent(),
                         GeoReferent))):
                     pass
                 else:
                     return None
             else:
                 if (not LanguageHelper.endsWithEx(typ, "ПУНКТ",
                                                   "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ",
                                                   "ПОСЕЛОК")):
                     if (not LanguageHelper.endsWith(typ, "CITY")):
                         if (typ == "СТАНЦИЯ" and
                             ((MiscLocationHelper.checkGeoObjectBefore(
                                 li[0].begin_token)))):
                             pass
                         elif (len(li) > 1
                               and li[1].typ == CityItemToken.ItemType.NOUN
                               and li[0].typ
                               == CityItemToken.ItemType.CITY):
                             pass
                         else:
                             return None
                 if (li[0].begin_token.morph.class0_.is_adjective):
                     li[0].value = ProperNameHelper.getNameEx(
                         li[0].begin_token, li[0].end_token,
                         MorphClass.ADJECTIVE, li[1].morph.case_,
                         li[1].morph.gender, False, False)
     elif (isinstance(oi.value.referent, GeoReferent)):
         rt = ReferentToken._new719(
             Utils.asObjectOrNull(oi.value.referent, GeoReferent),
             li[0].begin_token, li[len(li) - 1].end_token, mc)
     elif (typ is None):
         typ = oi.value.typ
     if (rt is None):
         city = GeoReferent()
         city._addName(
             (li[0].value if oi.value is None else oi.value.canonic_text))
         if (typ is not None):
             city._addTyp(typ)
         else:
             city._addTypCity(li[0].kit.base_language)
         if (alttyp is not None):
             city._addTyp(alttyp)
         rt = ReferentToken._new719(city, li[0].begin_token,
                                    li[len(li) - 1].end_token, mc)
     if ((isinstance(rt.referent, GeoReferent)) and len(li) == 1
             and (rt.referent).is_city):
         if (rt.begin_token.previous is not None
                 and rt.begin_token.previous.isValue("Г", None)):
             rt.begin_token = rt.begin_token.previous
         elif ((rt.begin_token.previous is not None
                and rt.begin_token.previous.isChar('.')
                and rt.begin_token.previous.previous is not None)
               and rt.begin_token.previous.previous.isValue("Г", None)):
             rt.begin_token = rt.begin_token.previous.previous
         elif (rt.end_token.next0_ is not None
               and (rt.whitespaces_after_count < 2)
               and rt.end_token.next0_.isValue("Г", None)):
             rt.end_token = rt.end_token.next0_
             if (rt.end_token.next0_ is not None
                     and rt.end_token.next0_.isChar('.')):
                 rt.end_token = rt.end_token.next0_
     return rt
Beispiel #17
0
 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.getAnalyzerData(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3136 = True
     while True:
         if first_pass3136: first_pass3136 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = TransItemToken.tryParseList(t, 10)
         if (its is None):
             continue
         rts = self.__tryAttach(its, False)
         if (rts is not None):
             for rt in rts:
                 cou = 0
                 tt = t.previous
                 first_pass3137 = True
                 while True:
                     if first_pass3137: first_pass3137 = False
                     else:
                         tt = tt.previous
                         cou += 1
                     if (not (tt is not None and (cou < 1000))): break
                     tr = Utils.asObjectOrNull(tt.getReferent(),
                                               TransportReferent)
                     if (tr is None):
                         continue
                     ok = True
                     for s in rt.referent.slots:
                         if (tr.findSlot(s.type_name, s.value, True) is
                                 None):
                             ok = False
                             break
                     if (ok):
                         rt.referent = (tr)
                         break
                 rt.referent = ad.registerReferent(rt.referent)
                 kit.embedToken(rt)
                 t = (rt)
                 for s in rt.referent.slots:
                     if (s.type_name == TransportReferent.ATTR_MODEL):
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])):
                                 li = []
                                 wrapli2546 = RefOutArgWrapper(None)
                                 inoutres2547 = Utils.tryGetValue(
                                     objs_by_model, mod, wrapli2546)
                                 li = wrapli2546.value
                                 if (not inoutres2547):
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li):
                                     li.append(rt.referent)
                                 models.addStr(mod, li, None, False)
                             if (k > 0):
                                 break
                             brand = rt.referent.getStringValue(
                                 TransportReferent.ATTR_BRAND)
                             if (brand is None):
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == TransportReferent.ATTR_NAME):
                         obj_by_names.add(
                             Termin._new117(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0):
         return
     t = kit.first_token
     first_pass3138 = True
     while True:
         if first_pass3138: first_pass3138 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10)
         if (br is not None):
             toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO)
             if (toks is not None
                     and toks.end_token.next0_ == br.end_token):
                 rt0 = ReferentToken(
                     Utils.asObjectOrNull(toks.termin.tag, Referent),
                     br.begin_token, br.end_token)
                 kit.embedToken(rt0)
                 t = (rt0)
                 continue
         if (not ((isinstance(t, TextToken)))):
             continue
         if (not t.chars.is_letter):
             continue
         tok = models.tryParse(t, TerminParseAttr.NO)
         if (tok is None):
             if (not t.chars.is_all_lower):
                 tok = obj_by_names.tryParse(t, TerminParseAttr.NO)
             if (tok is None):
                 continue
         if (not tok.is_whitespace_after):
             if (tok.end_token.next0_ is None
                     or not tok.end_token.next0_.isCharOf(",.)")):
                 if (not BracketHelper.isBracket(tok.end_token.next0_,
                                                 False)):
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1):
             tr = li[0]
         else:
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None):
             tit = TransItemToken.tryParse(tok.begin_token.previous, None,
                                           False, True)
             if (tit is not None and tit.typ == TransItemToken.Typs.BRAND):
                 tr.addSlot(TransportReferent.ATTR_BRAND, tit.value, False,
                            0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embedToken(rt0)
             t = (rt0)
             continue
Beispiel #18
0
 def process(self, kit : 'AnalysisKit') -> None:
     # Основная функция выделения телефонов
     ad = kit.get_analyzer_data(self)
     has_denoms = False
     for a in kit.processor.analyzers: 
         if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): 
             has_denoms = True
     if (not has_denoms): 
         a = DenominationAnalyzer()
         a.process(kit)
     li = list()
     tmp = io.StringIO()
     tmp2 = list()
     max0_ = 0
     t = kit.first_token
     while t is not None: 
         max0_ += 1
         t = t.next0_
     cur = 0
     t = kit.first_token
     first_pass3292 = True
     while True:
         if first_pass3292: first_pass3292 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         r = t.get_referent()
         if (r is not None): 
             t = self.__add_referents(ad, t, cur, max0_)
             continue
         if (not (isinstance(t, TextToken))): 
             continue
         if (not t.chars.is_letter or (t.length_char < 3)): 
             continue
         term = t.term
         if (term == "ЕСТЬ"): 
             if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): 
                 pass
             else: 
                 continue
         npt = None
         npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
         if (npt is None): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_verb and not mc.is_preposition): 
                 if (t.is_verb_be): 
                     continue
                 if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): 
                     continue
                 kref = KeywordReferent._new1595(KeywordType.PREDICATE)
                 norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                 if (norm is None): 
                     norm = t.lemma
                 if (norm.endswith("ЬСЯ")): 
                     norm = norm[0:0+len(norm) - 2]
                 kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
                 drv = DerivateService.find_derivates(norm, True, t.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
                 kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
                 KeywordAnalyzer.__set_rank(kref, cur, max0_)
                 rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph)
                 kit.embed_token(rt1)
                 t = (rt1)
                 continue
             continue
         if (npt.internal_noun is not None): 
             continue
         if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): 
             if (npt.preposition is not None): 
                 t = npt.end_token
                 continue
         if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): 
             t = npt.end_token
             continue
         if (npt.begin_token == npt.end_token): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_preposition): 
                 continue
             elif (mc.is_adverb): 
                 if (t.is_value("ПОТОМ", None)): 
                     continue
         else: 
             pass
         li.clear()
         t0 = t
         tt = t
         first_pass3293 = True
         while True:
             if first_pass3293: first_pass3293 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= npt.end_char)): break
             if (not (isinstance(tt, TextToken))): 
                 continue
             if (tt.is_value("NATURAL", None)): 
                 pass
             if ((tt.length_char < 3) or not tt.chars.is_letter): 
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): 
                 if (tt.is_value("ОТНОШЕНИЕ", None)): 
                     pass
                 else: 
                     continue
             if (mc.is_misc): 
                 if (MiscHelper.is_eng_article(tt)): 
                     continue
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             norm = tt.lemma
             kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
             if (norm != "ЕСТЬ"): 
                 drv = DerivateService.find_derivates(norm, True, tt.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, tt, tt, tt.morph)
             kit.embed_token(rt1)
             if (tt == t and len(li) == 0): 
                 t0 = (rt1)
             t = (rt1)
             li.append(kref)
         if (len(li) > 1): 
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.clear()
             has_norm = False
             for kw in li: 
                 s = kw.get_string_value(KeywordReferent.ATTR_VALUE)
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
                 n = kw.get_string_value(KeywordReferent.ATTR_NORMAL)
                 if (n is not None): 
                     has_norm = True
                     tmp2.append(n)
                 else: 
                     tmp2.append(s)
                 kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0)
             val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.sort()
             for s in tmp2: 
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
             norm = Utils.toStringStringIO(tmp)
             if (norm != val): 
                 kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, t0, t, npt.morph)
             kit.embed_token(rt1)
             t = (rt1)
     cur = 0
     t = kit.first_token
     first_pass3294 = True
     while True:
         if first_pass3294: first_pass3294 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent)
         if (kw is None or kw.typ != KeywordType.OBJECT): 
             continue
         if (t.next0_ is None or kw.child_words > 2): 
             continue
         t1 = t.next0_
         if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): 
             t1 = t1.next0_
             if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): 
                 t1 = t1.next0_
         elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): 
             continue
         kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent)
         if (kw2 is None): 
             continue
         if (kw == kw2): 
             continue
         if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): 
             continue
         kw_un = KeywordReferent()
         kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO))
         kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent))
         KeywordAnalyzer.__set_rank(kw_un, cur, max0_)
         rt1 = ReferentToken._new734(kw_un, t, t1, t.morph)
         kit.embed_token(rt1)
         t = (rt1)
     if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): 
         all0_ = list(ad.referents)
         all0_.sort(key=operator.attrgetter('rank'), reverse=True)
         ad.referents = all0_
     if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): 
         ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES)
         if (ano is not None): 
             ad.register_referent(ano)
Beispiel #19
0
 def tryParse(t: 'Token',
              typ: 'BracketParseAttr' = BracketParseAttr.NO,
              max_tokens: int = 100) -> 'BracketSequenceToken':
     """ Попробовать восстановить последовательность, обрамляемой кавычками
     
     Args:
         t(Token): 
         typ(BracketParseAttr): параметры выделения
         max_tokens(int): максимально токенов (вдруг забыли закрывающую ккавычку)
     
     """
     t0 = t
     cou = 0
     if (not BracketHelper.canBeStartOfSequence(t0, False, False)):
         return None
     br_list = list()
     br_list.append(BracketHelper.Bracket(t0))
     cou = 0
     crlf = 0
     last = None
     lev = 1
     is_assim = br_list[
         0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find(
             br_list[0].char0_) >= 0
     t = t0.next0_
     first_pass2802 = True
     while True:
         if first_pass2802: first_pass2802 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         last = t
         if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                 or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
             if (t.is_newline_before
                     and (((typ) & (BracketParseAttr.CANBEMANYLINES)))
                     == (BracketParseAttr.NO)):
                 if (t.whitespaces_before_count > 10
                         or BracketHelper.canBeStartOfSequence(
                             t, False, False)):
                     if (t.isChar('(') and not t0.isChar('(')):
                         pass
                     else:
                         last = t.previous
                         break
             bb = BracketHelper.Bracket(t)
             br_list.append(bb)
             if (len(br_list) > 20):
                 break
             if ((len(br_list) == 3 and br_list[1].can_be_open
                  and bb.can_be_close) and BracketHelper.__mustBeCloseChar(
                      bb.char0_, br_list[1].char0_)
                     and BracketHelper.__mustBeCloseChar(
                         bb.char0_, br_list[0].char0_)):
                 ok = False
                 tt = t.next0_
                 while tt is not None:
                     if (tt.is_newline_before):
                         break
                     if (tt.isChar(',')):
                         break
                     if (tt.isChar('.')):
                         tt = tt.next0_
                         while tt is not None:
                             if (tt.is_newline_before):
                                 break
                             elif (tt.isCharOf(BracketHelper.M_OPEN_CHARS)
                                   or tt.isCharOf(
                                       BracketHelper.M_CLOSE_CHARS)):
                                 bb2 = BracketHelper.Bracket(tt)
                                 if (BracketHelper.canBeEndOfSequence(
                                         tt, False, None, False)
                                         and BracketHelper.__canBeCloseChar(
                                             bb2.char0_,
                                             br_list[0].char0_)):
                                     ok = True
                                 break
                             tt = tt.next0_
                         break
                     if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                             or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
                         ok = True
                         break
                     tt = tt.next0_
                 if (not ok):
                     break
             if (is_assim):
                 if (bb.can_be_open and not bb.can_be_close
                         and bb.char0_ == br_list[0].char0_):
                     lev += 1
                 elif (bb.can_be_close and not bb.can_be_open and
                       BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_)
                       == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)):
                     lev -= 1
                     if (lev == 0):
                         break
         else:
             cou += 1
             if ((cou) > max_tokens):
                 break
             if ((((typ) & (BracketParseAttr.CANCONTAINSVERBS))) == (
                     BracketParseAttr.NO)):
                 if (t.morph.language.is_cyrillic):
                     if (t.getMorphClassInDictionary() == MorphClass.VERB):
                         if (not t.morph.class0_.is_adjective
                                 and not t.morph.containsAttr(
                                     "страд.з.", None)):
                             if (t.chars.is_all_lower):
                                 norm = t.getNormalCaseText(
                                     None, False, MorphGender.UNDEFINED,
                                     False)
                                 if (not LanguageHelper.endsWith(
                                         norm, "СЯ")):
                                     if (len(br_list) > 1):
                                         break
                                     if (br_list[0].char0_ != '('):
                                         break
                 elif (t.morph.language.is_en):
                     if (t.morph.class0_ == MorphClass.VERB
                             and t.chars.is_all_lower):
                         break
                 r = t.getReferent()
                 if (r is not None and r.type_name == "ADDRESS"):
                     if (not t0.isChar('(')):
                         break
         if ((((typ) & (BracketParseAttr.CANBEMANYLINES))) !=
             (BracketParseAttr.NO)):
             if (t.is_newline_before):
                 if (t.newlines_before_count > 1):
                     break
                 crlf += 1
             continue
         if (t.is_newline_before):
             if (t.whitespaces_before_count > 15):
                 break
             crlf += 1
             if (not t.chars.is_all_lower):
                 if (t.previous is not None and t.previous.isChar('.')):
                     break
             if ((isinstance(t.previous, MetaToken))
                     and BracketHelper.canBeEndOfSequence(
                         (t.previous).end_token, False, None, False)):
                 break
         if (crlf > 1):
             if (len(br_list) > 1):
                 break
             if (crlf > 10):
                 break
         if (t.isChar(';') and t.is_newline_after):
             break
     if ((len(br_list) == 1 and br_list[0].can_be_open and
          (isinstance(last, MetaToken))) and last.is_newline_after):
         if (BracketHelper.canBeEndOfSequence((last).end_token, False, None,
                                              False)):
             return BracketSequenceToken(t0, last)
     if (len(br_list) < 1):
         return None
     i = 1
     while i < (len(br_list) - 1):
         if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'):
             br_list[i].can_be_open = True
             br_list[i + 1].can_be_close = True
         i += 1
     internals = None
     while len(br_list) > 3:
         i = len(br_list) - 1
         if ((br_list[i].can_be_close and br_list[i - 1].can_be_open
              and not BracketHelper.__canBeCloseChar(
                  br_list[i].char0_, br_list[0].char0_))
                 and BracketHelper.__canBeCloseChar(br_list[i].char0_,
                                                    br_list[i - 1].char0_)):
             del br_list[len(br_list) - 2:len(br_list) - 2 + 2]
             continue
         break
     while len(br_list) >= 4:
         changed = False
         i = 1
         while i < (len(br_list) - 2):
             if ((br_list[i].can_be_open and not br_list[i].can_be_close
                  and br_list[i + 1].can_be_close)
                     and not br_list[i + 1].can_be_open):
                 ok = False
                 if (BracketHelper.__mustBeCloseChar(
                         br_list[i + 1].char0_, br_list[i].char0_)
                         or br_list[i].char0_ != br_list[0].char0_):
                     ok = True
                     if ((i == 1 and ((i + 2) < len(br_list))
                          and br_list[i + 2].char0_ == ')')
                             and br_list[i + 1].char0_ != ')'
                             and BracketHelper.__canBeCloseChar(
                                 br_list[i + 1].char0_,
                                 br_list[i - 1].char0_)):
                         br_list[i + 2] = br_list[i + 1]
                 elif (i > 1 and ((i + 2) < len(br_list))
                       and BracketHelper.__mustBeCloseChar(
                           br_list[i + 2].char0_, br_list[i - 1].char0_)):
                     ok = True
                 if (ok):
                     if (internals is None):
                         internals = list()
                     internals.append(
                         BracketSequenceToken(br_list[i].source,
                                              br_list[i + 1].source))
                     del br_list[i:i + 2]
                     changed = True
                     break
             i += 1
         if (not changed):
             break
     res = None
     if ((len(br_list) >= 4 and br_list[1].can_be_open
          and br_list[2].can_be_close) and br_list[3].can_be_close
             and not br_list[3].can_be_open):
         if (BracketHelper.__canBeCloseChar(br_list[3].char0_,
                                            br_list[0].char0_)):
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[3].source)
             if (br_list[0].source.next0_ != br_list[1].source
                     or br_list[2].source.next0_ != br_list[3].source):
                 res.internal.append(
                     BracketSequenceToken(br_list[1].source,
                                          br_list[2].source))
             if (internals is not None):
                 res.internal.extend(internals)
     if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close)
             and not br_list[2].can_be_open):
         if ((((typ) & (BracketParseAttr.NEARCLOSEBRACKET))) !=
             (BracketParseAttr.NO)):
             if (BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                br_list[0].char0_)):
                 return BracketSequenceToken(br_list[0].source,
                                             br_list[1].source)
         ok = True
         if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                            br_list[0].char0_)
                 and BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                    br_list[0].char0_)
                 and br_list[1].can_be_close):
             t = br_list[1].source
             while t != br_list[2].source and t is not None:
                 if (t.is_newline_before):
                     ok = False
                     break
                 if (t.chars.is_letter and t.chars.is_all_lower):
                     ok = False
                     break
                 npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None):
                     t = npt.end_token
                 t = t.next0_
             if (ok):
                 t = br_list[0].source.next0_
                 while t != br_list[1].source and t is not None:
                     if (t.is_newline_before):
                         return BracketSequenceToken(
                             br_list[0].source, t.previous)
                     t = t.next0_
             lev1 = 0
             tt = br_list[0].source.previous
             first_pass2803 = True
             while True:
                 if first_pass2803: first_pass2803 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after or tt.is_table_control_char):
                     break
                 if (not ((isinstance(tt, TextToken)))):
                     continue
                 if (tt.chars.is_letter or tt.length_char > 1):
                     continue
                 ch = (tt).term[0]
                 if (BracketHelper.__canBeCloseChar(ch, br_list[0].char0_)):
                     lev1 += 1
                 elif (BracketHelper.__canBeCloseChar(
                         br_list[1].char0_, ch)):
                     lev1 -= 1
                     if (lev1 < 0):
                         return BracketSequenceToken(
                             br_list[0].source, br_list[1].source)
         if (ok and BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                   br_list[0].char0_)):
             intern = BracketSequenceToken(br_list[1].source,
                                           br_list[2].source)
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[2].source)
             res.internal.append(intern)
         elif (ok and BracketHelper.__canBeCloseChar(
                 br_list[2].char0_, br_list[1].char0_)
               and br_list[0].can_be_open):
             if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                br_list[0].char0_)):
                 intern = BracketSequenceToken(br_list[1].source,
                                               br_list[2].source)
                 res = BracketSequenceToken(br_list[0].source,
                                            br_list[2].source)
                 res.internal.append(intern)
             elif (len(br_list) == 3):
                 return None
     if (res is None and len(br_list) > 1 and br_list[1].can_be_close):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None
             and len(br_list) > 1 and BracketHelper.__canBeCloseChar(
                 br_list[1].char0_, br_list[0].char0_)):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None and len(br_list) == 2
             and br_list[0].char0_ == br_list[1].char0_):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is not None and internals is not None):
         for i in internals:
             if (i.begin_char < res.end_char):
                 res.internal.append(i)
     if (res is None):
         cou = 0
         tt = t0.next0_
         first_pass2804 = True
         while True:
             if first_pass2804: first_pass2804 = False
             else:
                 tt = tt.next0_
                 cou += 1
             if (not (tt is not None)): break
             if (tt.is_table_control_char):
                 break
             if (MiscHelper.canBeStartOfSentence(tt)):
                 break
             if (max_tokens > 0 and cou > max_tokens):
                 break
             mt = Utils.asObjectOrNull(tt, MetaToken)
             if (mt is None):
                 continue
             if (isinstance(mt.end_token, TextToken)):
                 if ((mt.end_token).isCharOf(BracketHelper.M_CLOSE_CHARS)):
                     bb = BracketHelper.Bracket(
                         Utils.asObjectOrNull(mt.end_token, TextToken))
                     if (bb.can_be_close and BracketHelper.__canBeCloseChar(
                             bb.char0_, br_list[0].char0_)):
                         return BracketSequenceToken(t0, tt)
     return res
 def when(self) -> 'Referent':
     """ Когда (DateReferent или DateRangeReferent) """
     return Utils.asObjectOrNull(self.getSlotValue(BusinessFactReferent.ATTR_WHEN), Referent)
Beispiel #21
0
 def __tryAttach(self, t : 'Token', key_word : bool) -> 'ReferentToken':
     if (t is None): 
         return None
     t0 = t
     t1 = t
     uris_keys = None
     uris = None
     org0_ = None
     cor_org = None
     org_is_bank = False
     empty = 0
     last_uri = None
     first_pass2749 = True
     while True:
         if first_pass2749: first_pass2749 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char and t != t0): 
             break
         if (t.is_comma or t.morph.class0_.is_preposition or t.isCharOf("/\\")): 
             continue
         bank_keyword = False
         if (t.isValue("ПОЛНЫЙ", None) and t.next0_ is not None and ((t.next0_.isValue("НАИМЕНОВАНИЕ", None) or t.next0_.isValue("НАЗВАНИЕ", None)))): 
             t = t.next0_.next0_
             if (t is None): 
                 break
         if (t.isValue("БАНК", None)): 
             if ((isinstance(t, ReferentToken)) and t.getReferent().type_name == "ORGANIZATION"): 
                 bank_keyword = True
             tt = t.next0_
             npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
             if (npt is not None): 
                 tt = npt.end_token.next0_
             if (tt is not None and tt.isChar(':')): 
                 tt = tt.next0_
             if (tt is not None): 
                 if (not bank_keyword): 
                     t = tt
                     bank_keyword = True
                 elif (tt.getReferent() is not None and tt.getReferent().type_name == "ORGANIZATION"): 
                     t = tt
         r = t.getReferent()
         if (r is not None and r.type_name == "ORGANIZATION"): 
             is_bank = False
             kk = 0
             rr = r
             while rr is not None and (kk < 4): 
                 is_bank = Utils.compareStrings(Utils.ifNotNull(rr.getStringValue("KIND"), ""), "Bank", True) == 0
                 if (is_bank): 
                     break
                 rr = rr.parent_referent; kk += 1
             if (not is_bank and bank_keyword): 
                 is_bank = True
             if (not is_bank and uris is not None and "ИНН" in uris_keys): 
                 return None
             if ((last_uri is not None and last_uri.scheme == "К/С" and t.previous is not None) and t.previous.isValue("В", None)): 
                 cor_org = r
                 t1 = t
             elif (org0_ is None or ((not org_is_bank and is_bank))): 
                 org0_ = r
                 t1 = t
                 org_is_bank = is_bank
                 if (is_bank): 
                     continue
             if (uris is None and not key_word): 
                 return None
             continue
         if (isinstance(r, UriReferent)): 
             u = Utils.asObjectOrNull(r, UriReferent)
             if (uris is None): 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     return None
                 if (u.scheme == "ИНН" and t.is_newline_after): 
                     return None
                 uris = list()
                 uris_keys = list()
             else: 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     break
                 if (u.scheme in uris_keys): 
                     break
                 if (u.scheme == "ИНН"): 
                     if (empty > 0): 
                         break
             uris_keys.append(u.scheme)
             uris.append(u)
             last_uri = u
             t1 = t
             empty = 0
             continue
         elif (uris is None and not key_word and not org_is_bank): 
             return None
         if (r is not None and ((r.type_name == "GEO" or r.type_name == "ADDRESS"))): 
             empty += 1
             continue
         if (isinstance(t, TextToken)): 
             if (t.isValue("ПОЛНЫЙ", None) or t.isValue("НАИМЕНОВАНИЕ", None) or t.isValue("НАЗВАНИЕ", None)): 
                 pass
             elif (t.chars.is_letter): 
                 tok = BankAnalyzer.__m_ontology.tryParse(t, TerminParseAttr.NO)
                 if (tok is not None): 
                     t = tok.end_token
                     empty = 0
                 else: 
                     empty += 1
                     if (t.is_newline_before): 
                         nnn = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
                         if (nnn is not None and nnn.end_token.next0_ is not None and nnn.end_token.next0_.isChar(':')): 
                             break
                 if (uris is None): 
                     break
         if (empty > 2): 
             break
         if (empty > 0 and t.isChar(':') and t.is_newline_after): 
             break
         if (((isinstance(t, NumberToken)) and t.is_newline_before and t.next0_ is not None) and not t.next0_.chars.is_letter): 
             break
     if (uris is None): 
         return None
     if (not "Р/С" in uris_keys and not "Л/С" in uris_keys): 
         return None
     ok = False
     if ((len(uris) < 2) and org0_ is None): 
         return None
     bdr = BankDataReferent()
     for u in uris: 
         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     if (org0_ is not None): 
         bdr.addSlot(BankDataReferent.ATTR_BANK, org0_, False, 0)
     if (cor_org is not None): 
         bdr.addSlot(BankDataReferent.ATTR_CORBANK, cor_org, False, 0)
     org0 = (None if t0.previous is None else t0.previous.getReferent())
     if (org0 is not None and org0.type_name == "ORGANIZATION"): 
         for s in org0.slots: 
             if (isinstance(s.value, UriReferent)): 
                 u = Utils.asObjectOrNull(s.value, UriReferent)
                 if (BankAnalyzer.__isBankReq(u.scheme)): 
                     if (not u.scheme in uris_keys): 
                         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     return ReferentToken(bdr, t0, t1)
 def who(self) -> 'Referent':
     """ Кто (действительный залог) """
     return Utils.asObjectOrNull(self.getSlotValue(BusinessFactReferent.ATTR_WHO), Referent)
 def date(self) -> 'DateReferent':
     """ Дата """
     return Utils.asObjectOrNull(
         self.get_slot_value(TitlePageReferent.ATTR_DATE), DateReferent)
 def whom(self) -> 'Referent':
     """ Кого (страдательный залог) """
     return Utils.asObjectOrNull(self.getSlotValue(BusinessFactReferent.ATTR_WHOM), Referent)
 def city(self) -> 'GeoReferent':
     """ Город """
     return Utils.asObjectOrNull(
         self.get_slot_value(TitlePageReferent.ATTR_CITY), GeoReferent)
Beispiel #26
0
 def canBeEquals(self, obj : 'Referent', typ : 'EqualType') -> bool:
     geo_ = Utils.asObjectOrNull(obj, GeoReferent)
     if (geo_ is None): 
         return False
     if (geo_.alpha2 is not None and geo_.alpha2 == self.alpha2): 
         return True
     if (self.is_city != geo_.is_city): 
         return False
     if (self.is_union != geo_.is_union): 
         return False
     if (self.is_union): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_REF): 
                 if (obj.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): 
                     return False
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_REF): 
                 if (self.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): 
                     return False
         return True
     ref1 = Utils.asObjectOrNull(self.getSlotValue(GeoReferent.ATTR_REF), Referent)
     ref2 = Utils.asObjectOrNull(geo_.getSlotValue(GeoReferent.ATTR_REF), Referent)
     if (ref1 is not None and ref2 is not None): 
         if (ref1 != ref2): 
             return False
     r = self.is_region or self.is_state
     r1 = geo_.is_region or geo_.is_state
     if (r != r1): 
         if (self.is_territory != geo_.is_territory): 
             return False
         return False
     eq_names = False
     for s in self.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME): 
             if (geo_.findSlot(s.type_name, s.value, True) is not None): 
                 eq_names = True
                 break
     if (not eq_names): 
         return False
     if (self.is_region and geo_.is_region): 
         typs1 = self.typs
         typs2 = geo_.typs
         ok = False
         for t in typs1: 
             if (t in typs2): 
                 ok = True
             else: 
                 for tt in typs2: 
                     if (LanguageHelper.endsWith(tt, t) or LanguageHelper.endsWith(t, tt)): 
                         ok = True
         if (not ok): 
             return False
     if (self.higher is not None and geo_.higher is not None): 
         if (GeoReferent.__checkRoundDep(self) or GeoReferent.__checkRoundDep(geo_)): 
             return False
         if (self.higher.canBeEquals(geo_.higher, typ)): 
             pass
         elif (geo_.higher.higher is not None and self.higher.canBeEquals(geo_.higher.higher, typ)): 
             pass
         elif (self.higher.higher is not None and self.higher.higher.canBeEquals(geo_.higher, typ)): 
             pass
         else: 
             return False
     return True
Beispiel #27
0
 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.getAnalyzerData(self)
     is_lit_block = 0
     refs_by_num = dict()
     rts = []
     t = kit.first_token
     first_pass2754 = True
     while True:
         if first_pass2754: first_pass2754 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None and br.length_char > 70
                     and (br.length_char < 400)):
                 if (br.is_newline_after
                         or ((br.end_token.next0_ is not None
                              and br.end_token.next0_.isCharOf(".;")))):
                     rts = BookLinkAnalyzer.__tryParse(
                         t.next0_, False, br.end_char)
                     if (rts is not None and len(rts) >= 1):
                         if (len(rts) > 1):
                             rts[1].referent = ad.registerReferent(
                                 rts[1].referent)
                             kit.embedToken(rts[1])
                             (rts[0].referent).book = Utils.asObjectOrNull(
                                 rts[1].referent, BookLinkReferent)
                             if (rts[0].begin_char == rts[1].begin_char):
                                 rts[0].begin_token = rts[1]
                             if (rts[0].end_char == rts[1].end_char):
                                 rts[0].end_token = rts[1]
                         rts[0].begin_token = t
                         rts[0].end_token = br.end_token
                         (rts[0].referent).typ = BookLinkRefType.INLINE
                         rts[0].referent = ad.registerReferent(
                             rts[0].referent)
                         kit.embedToken(rts[0])
                         t = (rts[0])
                         continue
         if (not t.is_newline_before):
             continue
         if (is_lit_block <= 0):
             tt = BookLinkToken.parseStartOfLitBlock(t)
             if (tt is not None):
                 is_lit_block = 5
                 t = tt
                 continue
         rts = BookLinkAnalyzer.__tryParse(t, is_lit_block > 0, 0)
         if (rts is None or (len(rts) < 1)):
             is_lit_block -= 1
             if ((is_lit_block) < 0):
                 is_lit_block = 0
             continue
         is_lit_block += 1
         if ((is_lit_block) > 5):
             is_lit_block = 5
         if (len(rts) > 1):
             rts[1].referent = ad.registerReferent(rts[1].referent)
             kit.embedToken(rts[1])
             (rts[0].referent).book = Utils.asObjectOrNull(
                 rts[1].referent, BookLinkReferent)
             if (rts[0].begin_char == rts[1].begin_char):
                 rts[0].begin_token = rts[1]
             if (rts[0].end_char == rts[1].end_char):
                 rts[0].end_token = rts[1]
         re = Utils.asObjectOrNull(rts[0].referent, BookLinkRefReferent)
         re = (Utils.asObjectOrNull(ad.registerReferent(re),
                                    BookLinkRefReferent))
         rts[0].referent = (re)
         kit.embedToken(rts[0])
         t = (rts[0])
         if (re.number is not None):
             li = []
             wrapli385 = RefOutArgWrapper(None)
             inoutres386 = Utils.tryGetValue(refs_by_num, re.number,
                                             wrapli385)
             li = wrapli385.value
             if (not inoutres386):
                 li = list()
                 refs_by_num[re.number] = li
             li.append(re)
     t = kit.first_token
     first_pass2755 = True
     while True:
         if first_pass2755: first_pass2755 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (not ((isinstance(t, TextToken)))):
             continue
         rt = BookLinkAnalyzer.__tryParseShortInline(t)
         if (rt is None):
             continue
         re = Utils.asObjectOrNull(rt.referent, BookLinkRefReferent)
         li = []
         wrapli387 = RefOutArgWrapper(None)
         inoutres388 = Utils.tryGetValue(refs_by_num,
                                         Utils.ifNotNull(re.number, ""),
                                         wrapli387)
         li = wrapli387.value
         if (not inoutres388):
             continue
         i = 0
         while i < len(li):
             if (t.begin_char < li[i].occurrence[0].begin_char):
                 break
             i += 1
         if (i >= len(li)):
             continue
         re.book = li[i].book
         if (re.pages is None):
             re.pages = li[i].pages
         re.typ = BookLinkRefType.INLINE
         re = (Utils.asObjectOrNull(ad.registerReferent(re),
                                    BookLinkRefReferent))
         rt.referent = (re)
         kit.embedToken(rt)
         t = (rt)
Beispiel #28
0
 def ref(self) -> 'Referent':
     return Utils.asObjectOrNull(
         self.getSlotValue(InstrumentBlockReferent.ATTR_REF), Referent)
Beispiel #29
0
 def _process(begin : 'Token', max_char_pos : int, kit : 'AnalysisKit', end_token : 'Token') -> 'TitlePageReferent':
     end_token.value = begin
     res = TitlePageReferent()
     term = None
     lines = Line.parse(begin, 30, 1500, max_char_pos)
     if (len(lines) < 1): 
         return None
     cou = len(lines)
     min_newlines_count = 10
     lines_count_stat = dict()
     i = 0
     while i < len(lines): 
         if (TitleNameToken.can_be_start_of_text_or_content(lines[i].begin_token, lines[i].end_token)): 
             cou = i
             break
         j = lines[i].newlines_before_count
         if (i > 0 and j > 0): 
             if (not j in lines_count_stat): 
                 lines_count_stat[j] = 1
             else: 
                 lines_count_stat[j] += 1
         i += 1
     max0_ = 0
     for kp in lines_count_stat.items(): 
         if (kp[1] > max0_): 
             max0_ = kp[1]
             min_newlines_count = kp[0]
     end_char = (lines[cou - 1].end_char if cou > 0 else 0)
     if (max_char_pos > 0 and end_char > max_char_pos): 
         end_char = max_char_pos
     names = list()
     i = 0
     while i < cou: 
         if (i == 6): 
             pass
         j = i
         while (j < cou) and (j < (i + 5)): 
             if (i == 6 and j == 8): 
                 pass
             if (j > i): 
                 if (lines[j - 1].is_pure_en and lines[j].is_pure_ru): 
                     break
                 if (lines[j - 1].is_pure_ru and lines[j].is_pure_en): 
                     break
                 if (lines[j].newlines_before_count >= (min_newlines_count * 2)): 
                     break
             ttt = TitleNameToken.try_parse(lines[i].begin_token, lines[j].end_token, min_newlines_count)
             if (ttt is not None): 
                 if (lines[i].is_pure_en): 
                     ttt.morph.language = MorphLang.EN
                 elif (lines[i].is_pure_ru): 
                     ttt.morph.language = MorphLang.RU
                 names.append(ttt)
             j += 1
         i += 1
     TitleNameToken.sort(names)
     name_rt = None
     if (len(names) > 0): 
         i0 = 0
         if (names[i0].morph.language.is_en): 
             ii = 1
             while ii < len(names): 
                 if (names[ii].morph.language.is_ru and names[ii].rank > 0): 
                     i0 = ii
                     break
                 ii += 1
         term = res._add_name(names[i0].begin_name_token, names[i0].end_name_token)
         if (names[i0].type_value is not None): 
             res._add_type(names[i0].type_value)
         if (names[i0].speciality is not None): 
             res.speciality = names[i0].speciality
         rt = ReferentToken(res, names[i0].begin_token, names[i0].end_token)
         if (kit is not None): 
             kit.embed_token(rt)
         else: 
             res.add_occurence(TextAnnotation(rt.begin_token, rt.end_token))
         end_token.value = rt.end_token
         name_rt = rt
         if (begin.begin_char == rt.begin_char): 
             begin = (rt)
     if (term is not None and kit is not None): 
         t = kit.first_token
         first_pass3397 = True
         while True:
             if first_pass3397: first_pass3397 = False
             else: t = t.next0_
             if (not (t is not None)): break
             tok = term.try_parse(t, TerminParseAttr.NO)
             if (tok is None): 
                 continue
             t0 = t
             t1 = tok.end_token
             if (t1.next0_ is not None and t1.next0_.is_char('.')): 
                 t1 = t1.next0_
             if (BracketHelper.can_be_start_of_sequence(t0.previous, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
                 t0 = t0.previous
                 t1 = t1.next0_
             rt = ReferentToken(res, t0, t1)
             kit.embed_token(rt)
             t = (rt)
     pr = PersonRelations()
     pers_typ = TitleItemToken.Types.UNDEFINED
     pers_types = pr.rel_types
     t = begin
     first_pass3398 = True
     while True:
         if first_pass3398: first_pass3398 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos): 
             break
         if (t == name_rt): 
             continue
         tpt = TitleItemToken.try_attach(t)
         if (tpt is not None): 
             pers_typ = TitleItemToken.Types.UNDEFINED
             if (tpt.typ == TitleItemToken.Types.TYP): 
                 if (len(res.types) == 0): 
                     res._add_type(tpt.value)
                 elif (len(res.types) == 1): 
                     ty = res.types[0].upper()
                     if (ty == "РЕФЕРАТ"): 
                         res._add_type(tpt.value)
                     elif (ty == "АВТОРЕФЕРАТ"): 
                         if (tpt.value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", True, 0)
                         elif (tpt.value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", True, 0)
                         elif (tpt.value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", True, 0)
                         elif (tpt.value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", True, 0)
                         elif (tpt.value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", True, 0)
                         elif (tpt.value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", True, 0)
                         else: 
                             res._add_type(tpt.value)
                     elif (tpt.value == "РЕФЕРАТ" or tpt.value == "АВТОРЕФЕРАТ"): 
                         if (not tpt.value in ty): 
                             res._add_type(tpt.value)
             elif (tpt.typ == TitleItemToken.Types.SPECIALITY): 
                 if (res.speciality is None): 
                     res.speciality = tpt.value
             elif (tpt.typ in pers_types): 
                 pers_typ = tpt.typ
             t = tpt.end_token
             if (t.end_char > end_token.value.end_char): 
                 end_token.value = t
             if (t.next0_ is not None and t.next0_.is_char_of(":-")): 
                 t = t.next0_
             continue
         if (t.end_char > end_char): 
             break
         rli = t.get_referents()
         if (rli is None): 
             continue
         if (not t.is_newline_before and (isinstance(t.previous, TextToken))): 
             s = t.previous.term
             if (s == "ИМЕНИ" or s == "ИМ"): 
                 continue
             if (s == "." and t.previous.previous is not None and t.previous.previous.is_value("ИМ", None)): 
                 continue
         for r in rli: 
             if (isinstance(r, PersonReferent)): 
                 if (r != rli[0]): 
                     continue
                 p = Utils.asObjectOrNull(r, PersonReferent)
                 if (pers_typ != TitleItemToken.Types.UNDEFINED): 
                     if (t.previous is not None and t.previous.is_char('.')): 
                         pers_typ = TitleItemToken.Types.UNDEFINED
                 typ = pr.calc_typ_from_attrs(p)
                 if (typ != TitleItemToken.Types.UNDEFINED): 
                     pr.add(p, typ, 1)
                     pers_typ = typ
                 elif (pers_typ != TitleItemToken.Types.UNDEFINED): 
                     pr.add(p, pers_typ, 1)
                 elif (t.previous is not None and t.previous.is_char('©')): 
                     pers_typ = TitleItemToken.Types.WORKER
                     pr.add(p, pers_typ, 1)
                 else: 
                     tt = t.next0_
                     first_pass3399 = True
                     while True:
                         if first_pass3399: first_pass3399 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         rr = tt.get_referent()
                         if (rr == res): 
                             pers_typ = TitleItemToken.Types.WORKER
                             break
                         if (isinstance(rr, PersonReferent)): 
                             if (pr.calc_typ_from_attrs(Utils.asObjectOrNull(r, PersonReferent)) != TitleItemToken.Types.UNDEFINED): 
                                 break
                             else: 
                                 continue
                         if (rr is not None): 
                             break
                         tpt = TitleItemToken.try_attach(tt)
                         if (tpt is not None): 
                             if (tpt.typ != TitleItemToken.Types.TYP and tpt.typ != TitleItemToken.Types.TYPANDTHEME): 
                                 break
                             tt = tpt.end_token
                             if (tt.end_char > end_token.value.end_char): 
                                 end_token.value = tt
                             continue
                     if (pers_typ == TitleItemToken.Types.UNDEFINED): 
                         tt = t.previous
                         while tt is not None: 
                             rr = tt.get_referent()
                             if (rr == res): 
                                 pers_typ = TitleItemToken.Types.WORKER
                                 break
                             if (rr is not None): 
                                 break
                             if ((tt.is_value("СТУДЕНТ", None) or tt.is_value("СТУДЕНТКА", None) or tt.is_value("СЛУШАТЕЛЬ", None)) or tt.is_value("ДИПЛОМНИК", None) or tt.is_value("ИСПОЛНИТЕЛЬ", None)): 
                                 pers_typ = TitleItemToken.Types.WORKER
                                 break
                             tpt = TitleItemToken.try_attach(tt)
                             if (tpt is not None and tpt.typ != TitleItemToken.Types.TYP): 
                                 break
                             tt = tt.previous
                     if (pers_typ != TitleItemToken.Types.UNDEFINED): 
                         pr.add(p, pers_typ, 1)
                     else: 
                         pr.add(p, pers_typ, 0.5)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
                 continue
             if (r == rli[0]): 
                 pers_typ = TitleItemToken.Types.UNDEFINED
             if (isinstance(r, DateReferent)): 
                 if (res.date is None): 
                     res.date = Utils.asObjectOrNull(r, DateReferent)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
             elif (isinstance(r, GeoReferent)): 
                 if (res.city is None and r.is_city): 
                     res.city = Utils.asObjectOrNull(r, GeoReferent)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
             if (isinstance(r, OrganizationReferent)): 
                 org0_ = Utils.asObjectOrNull(r, OrganizationReferent)
                 if ("курс" in org0_.types and org0_.number is not None): 
                     i = 0
                     wrapi2673 = RefOutArgWrapper(0)
                     inoutres2674 = Utils.tryParseInt(org0_.number, wrapi2673)
                     i = wrapi2673.value
                     if (inoutres2674): 
                         if (i > 0 and (i < 8)): 
                             res.student_year = i
                 while org0_.higher is not None: 
                     if (org0_.kind != OrganizationKind.DEPARTMENT): 
                         break
                     org0_ = org0_.higher
                 if (org0_.kind != OrganizationKind.DEPARTMENT): 
                     if (res.org0_ is None): 
                         res.org0_ = org0_
                     elif (OrganizationReferent.can_be_higher(res.org0_, org0_)): 
                         res.org0_ = org0_
                 if (t.end_char > end_token.value.end_char): 
                     end_token.value = t
             if ((isinstance(r, UriReferent)) or (isinstance(r, GeoReferent))): 
                 if (t.end_char > end_token.value.end_char): 
                     end_token.value = t
     for ty in pers_types: 
         for p in pr.get_persons(ty): 
             if (pr.get_attr_name_for_type(ty) is not None): 
                 res.add_slot(pr.get_attr_name_for_type(ty), p, False, 0)
     if (res.get_slot_value(TitlePageReferent.ATTR_AUTHOR) is None): 
         for p in pr.get_persons(TitleItemToken.Types.UNDEFINED): 
             res.add_slot(TitlePageReferent.ATTR_AUTHOR, p, False, 0)
             break
     if (res.city is None and res.org0_ is not None): 
         s = res.org0_.find_slot(OrganizationReferent.ATTR_GEO, None, True)
         if (s is not None and (isinstance(s.value, GeoReferent))): 
             if (s.value.is_city): 
                 res.city = Utils.asObjectOrNull(s.value, GeoReferent)
     if (res.date is None): 
         t = begin
         first_pass3400 = True
         while True:
             if first_pass3400: first_pass3400 = False
             else: t = t.next0_
             if (not (t is not None and t.end_char <= end_char)): break
             city = Utils.asObjectOrNull(t.get_referent(), GeoReferent)
             if (city is None): 
                 continue
             if (isinstance(t.next0_, TextToken)): 
                 if (t.next0_.is_char_of(":,") or t.next0_.is_hiphen): 
                     t = t.next0_
             rt = t.kit.process_referent(DateAnalyzer.ANALYZER_NAME, t.next0_)
             if (rt is not None): 
                 rt.save_to_local_ontology()
                 res.date = Utils.asObjectOrNull(rt.referent, DateReferent)
                 if (kit is not None): 
                     kit.embed_token(rt)
                 break
     if (len(res.slots) == 0): 
         return None
     else: 
         return res
Beispiel #30
0
 def try_parse(t: 'Token', items: typing.List['NounPhraseItem'],
               attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token
             and (isinstance(rt.begin_token, TextToken))):
         res = NounPhraseItem.try_parse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             res.can_be_noun = True
             return res
     if (rt is not None):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.get_referent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = t.term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and wf.is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.get_morph_class_in_dictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.ends_with_ex(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.contains_attr("к.ф.", None)):
                             if (t.get_morph_class_in_dictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.ends_with_ex(
                             s, "ИН", "ЕН", "ЫН", None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.ends_with(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.contains_attr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.get_indexer_item(0).contains_attr(
                     "в.ср.ст.", None)):
                 return None
         mc1 = t.get_morph_class_in_dictionary()
         if (mc1 == MorphClass.VERB and t.morph.case_.is_undefined):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.contains_attr("дейст.з.", None)):
                         if (LanguageHelper.ends_with(t.term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if (((isinstance(t0, TextToken)) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun and
                         not (isinstance(t0.next0_.next0_, NumberToken))):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.ends_with(t0.term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new404(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_verb and not v.case_.is_undefined):
                 it.can_be_adj = True
                 it.adj_morph.append(NounPhraseItemTextVar(v, t))
                 continue
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun
                          and not v.contains_attr("неизм.", None))) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.try_accord_variant(
                         items, (0 if items is None else len(items)), v,
                         False)):
                     is_doub = False
                     if (v.contains_attr("к.ф.", None)):
                         continue
                     if (v.contains_attr("собир.", None)
                             and not (isinstance(t, NumberToken))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.contains_attr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = t.term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.ends_with(s, "ОВ") and
                               t.get_morph_class_in_dictionary().is_noun):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
                         if (wf is not None and wf.class0_.is_pronoun):
                             it.can_be_noun = True
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             if (len(items) == 1
                                     and not v.case_.is_nominative):
                                 can_be_noun_ = True
                             else:
                                 return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif (
                 (v.class0_.is_pronoun and
                  ((items is None or len(items) == 0 or
                    ((len(items) == 1
                      and items[0].can_be_adj_for_personal_pronoun))))
                  and wf is not None) and
                 (((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                      or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                     or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                    or wf.normal_case == "КТО") or wf.normal_full
                   == "КОТОРЫЙ" or wf.normal_case == "КОТОРЫЙ"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.is_value("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"
                   and (((attrs) & (NounPhraseParseAttr.PARSEPRONOUNS)))
                   == (NounPhraseParseAttr.NO)):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 added = False
                 if (items is not None and len(items) > 1 and
                     (((attrs) & (NounPhraseParseAttr.MULTINOUNS))) !=
                     (NounPhraseParseAttr.NO)):
                     ok1 = True
                     ii = 1
                     while ii < len(items):
                         if (not items[ii].conj_before):
                             ok1 = False
                             break
                         ii += 1
                     if (ok1):
                         if (NounPhraseItem.try_accord_variant(
                                 items,
                             (0 if items is None else len(items)), v,
                                 True)):
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
                             it.can_be_noun = True
                             it.multi_nouns = True
                             added = True
                 if (not added):
                     if (NounPhraseItem.try_accord_variant(
                             items, (0 if items is None else len(items)), v,
                             False)):
                         it.noun_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_noun = True
                         if (v.class0_.is_personal_pronoun
                                 and t.morph.contains_attr("неизм.", None)
                                 and not it.can_be_adj):
                             itt = NounPhraseItemTextVar(v, t)
                             itt.case_ = MorphCase.ALL_CASES
                             itt.number = MorphNumber.UNDEFINED
                             if (itt.normal_value is None):
                                 pass
                             it.adj_morph.append(itt)
                             it.can_be_adj = True
                     elif ((len(items) > 0 and len(items[0].adj_morph) > 0
                            and items[0].adj_morph[0].number
                            == MorphNumber.PLURAL)
                           and not ((items[0].adj_morph[0].case_)
                                    & v.case_).is_undefined
                           and not items[0].adj_morph[0].class0_.is_verb):
                         if (t.next0_ is not None and t.next0_.is_comma_and
                                 and
                             (isinstance(t.next0_.next0_, TextToken))):
                             npt2 = NounPhraseHelper.try_parse(
                                 t.next0_.next0_, attrs, 0, None)
                             if (npt2 is not None
                                     and npt2.preposition is None
                                     and not ((npt2.morph.case_) & v.case_
                                              & items[0].adj_morph[0].case_
                                              ).is_undefined):
                                 it.noun_morph.append(
                                     NounPhraseItemTextVar(v, t))
                                 it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken),
                                  False)
             for v in it.noun_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.get_normal_case_text(
                             None, MorphNumber.UNDEFINED,
                             MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.try_parse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.try_parse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.try_parse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None):
                     mc = LanguageHelper.get_case_after_preposition(t.lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = tt2.term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.is_value("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None