Esempio n. 1
0
 def __str__(self) -> str:
     return "{0} {1}".format(Utils.enumToString(self.typ),
                             (self.string_value if self.int_value == 0 else
                              str(self.int_value)))
Esempio n. 2
0
 def can_be_equals(self, obj: 'Referent',
                   typ: 'ReferentsEqualType') -> bool:
     uri_ = Utils.asObjectOrNull(obj, UriReferent)
     if (uri_ is None):
         return False
     return Utils.compareStrings(self.value, uri_.value, True) == 0
Esempio n. 3
0
 def parent_referent(self) -> 'Referent':
     return Utils.asObjectOrNull(
         self.get_slot_value(UnitReferent.ATTR_BASEUNIT), Referent)
Esempio n. 4
0
 def __calc_agent(self, noplural: bool) -> float:
     if (not Utils.isNullOrEmpty(self.from_prep)):
         self.coef = -1
         return self.coef
     vf = self.to_verb.first_verb.verb_morph
     if (vf is None):
         self.coef = -1
         return self.coef
     vf2 = self.to_verb.last_verb.verb_morph
     if (vf2 is None):
         self.coef = -1
         return self.coef
     if (vf.misc.mood == MorphMood.IMPERATIVE):
         self.coef = -1
         return self.coef
     morph_ = self.from_morph
     if (vf2.misc.voice == MorphVoice.PASSIVE
             or self.to_verb.last_verb.morph.contains_attr(
                 "страд.з.", None)):
         if (not morph_.case_.is_undefined):
             if (morph_.case_.is_instrumental):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 if (vf2.case_.is_instrumental):
                     self.coef /= (2)
                 return self.coef
             self.coef = -1
             return self.coef
         self.coef = 0
         return self.coef
     if ("инф." in vf.misc.attrs):
         self.coef = -1
         return self.coef
     if (NGLink.__is_rev_verb(vf2)):
         ag_case = MorphCase.UNDEFINED
         grs = DerivateService.find_derivates(
             Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None)
         if (grs is not None):
             for gr in grs:
                 if (gr.cm_rev.agent is not None):
                     ag_case = gr.cm_rev.agent.case_
                     break
         if (not morph_.case_.is_undefined):
             if (ag_case.is_dative):
                 if (morph_.case_.is_dative):
                     self.coef = SemanticService.PARAMS.transitive_coef
                     if (morph_.case_.is_genitive):
                         self.coef /= (2)
                     return self.coef
                 self.coef = -1
                 return self.coef
             if (ag_case.is_instrumental):
                 if (morph_.case_.is_instrumental):
                     if (morph_.case_.is_nominative):
                         self.coef = 0
                         return self.coef
                     self.coef = SemanticService.PARAMS.transitive_coef
                     return self.coef
                 self.coef = -1
                 return self.coef
             if (not morph_.case_.is_nominative):
                 self.coef = -1
                 return self.coef
         else:
             self.coef = 0
             return self.coef
     if (vf.number == MorphNumber.PLURAL):
         if (not morph_.case_.is_undefined):
             if (vf.case_.is_undefined):
                 if (not morph_.case_.is_nominative):
                     self.coef = -1
                     return self.coef
             elif (((vf.case_) & morph_.case_).is_undefined):
                 self.coef = -1
                 return self.coef
         if (noplural):
             if (self.from_is_plural):
                 pass
             elif (((morph_.number) &
                    (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED)):
                 self.coef = -1
                 return self.coef
             elif (not NGLink.__check_morph_accord(morph_, False, vf)):
                 self.coef = -1
                 return self.coef
             elif (len(morph_.items) > 0 and not vf.case_.is_undefined):
                 ok = False
                 for it in morph_.items:
                     if (((it.number) &
                          (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)):
                         if (not it.case_.is_undefined
                                 and ((it.case_) & vf.case_).is_undefined):
                             continue
                         ok = True
                         break
                 if (not ok):
                     self.coef = -1
                     return self.coef
         self.plural = 1
         self.coef = SemanticService.PARAMS.verb_plural
         if (vf2.normal_case == "БЫТЬ"):
             if (morph_.case_.is_undefined
                     and self.from0_.source.begin_token.begin_char >
                     self.to_verb.end_char):
                 self.coef /= (2)
     else:
         if (vf.number == MorphNumber.SINGULAR):
             self.plural = 0
             if (self.from_is_plural):
                 self.coef = -1
                 return self.coef
         if (not NGLink.__check_morph_accord(morph_, False, vf)):
             self.coef = -1
             return self.coef
         if (not morph_.case_.is_undefined):
             if (not morph_.case_.is_nominative):
                 if (self.to_verb.first_verb.is_participle):
                     pass
                 else:
                     self.coef = -1
                     return self.coef
         if (vf.misc.person != MorphPerson.UNDEFINED):
             if (((vf.misc.person) &
                  (MorphPerson.THIRD)) == (MorphPerson.UNDEFINED)):
                 if (((vf.misc.person) &
                      (MorphPerson.FIRST)) == (MorphPerson.FIRST)):
                     if (not morph_.contains_attr("1 л.", None)):
                         self.coef = -1
                         return self.coef
                 if (((vf.misc.person) &
                      (MorphPerson.SECOND)) == (MorphPerson.SECOND)):
                     if (not morph_.contains_attr("2 л.", None)):
                         self.coef = -1
                         return self.coef
         self.coef = SemanticService.PARAMS.morph_accord
         if (morph_.case_.is_undefined):
             self.coef /= (4)
     return self.coef
Esempio n. 5
0
 def from_prep(self) -> str:
     return Utils.ifNotNull(self.from0_.source.prep, "")
Esempio n. 6
0
 def __analizeAgreement(self, bfi: 'BusinessFactItem') -> 'ReferentToken':
     first = None
     second = None
     t0 = bfi.begin_token
     t1 = bfi.end_token
     max_lines = 1
     t = bfi.begin_token.previous
     first_pass2775 = True
     while True:
         if first_pass2775: first_pass2775 = False
         else: t = t.previous
         if (not (t is not None)): break
         if (t.isChar('.') or t.is_newline_after):
             max_lines -= 1
             if ((max_lines) == 0):
                 break
             continue
         if (t.isValue("СТОРОНА", None) and t.previous is not None
                 and ((t.previous.isValue("МЕЖДУ", None)
                       or t.previous.isValue("МІЖ", None)))):
             max_lines = 2
             t = t.previous
             t0 = t
             continue
         r = t.getReferent()
         if (isinstance(r, BusinessFactReferent)):
             b = Utils.asObjectOrNull(r, BusinessFactReferent)
             if (b.who is not None
                     and ((b.who2 is not None or b.whom is not None))):
                 first = b.who
                 second = (Utils.ifNotNull(b.who2, b.whom))
                 break
         if (not ((isinstance(r, OrganizationReferent)))):
             continue
         if ((t.previous is not None and
              ((t.previous.is_and or t.previous.isValue("К", None)))
              and t.previous.previous is not None)
                 and (isinstance(t.previous.previous.getReferent(),
                                 OrganizationReferent))):
             t0 = t.previous.previous
             first = t0.getReferent()
             second = r
             break
         else:
             t0 = t
             first = r
             break
     if (second is None):
         t = bfi.end_token.next0_
         first_pass2776 = True
         while True:
             if first_pass2776: first_pass2776 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (t.isChar('.')):
                 break
             if (t.is_newline_before):
                 break
             r = t.getReferent()
             if (not ((isinstance(r, OrganizationReferent)))):
                 continue
             if ((t.next0_ is not None and
                  ((t.next0_.is_and or t.next0_.isValue("К", None)))
                  and t.next0_.next0_ is not None)
                     and (isinstance(t.next0_.next0_.getReferent(),
                                     OrganizationReferent))):
                 t1 = t.next0_.next0_
                 first = r
                 second = t1.getReferent()
                 break
             else:
                 t1 = t
                 second = r
                 break
     if (first is None or second is None):
         return None
     bf = BusinessFactReferent._new436(bfi.base_kind)
     bf.who = first
     if (bfi.base_kind == BusinessFactKind.LAWSUIT):
         bf.whom = second
     else:
         bf.who2 = second
     self.__findDate(bf, bfi.begin_token)
     self.__findSum(bf, bfi.begin_token)
     return ReferentToken(bf, t0, t1)
Esempio n. 7
0
 def __tryParse(t : 'Token', lev : int) -> 'BookLinkToken':
     if (t is None or lev > 3): 
         return None
     if (t.isChar('[')): 
         re = BookLinkToken.__tryParse(t.next0_, lev + 1)
         if (re is not None and re.end_token.next0_ is not None and re.end_token.next0_.isChar(']')): 
             re.begin_token = t
             re.end_token = re.end_token.next0_
             return re
         if (re is not None and re.end_token.isChar(']')): 
             re.begin_token = t
             return re
         if (re is not None): 
             if (re.typ == BookLinkTyp.SOSTAVITEL or re.typ == BookLinkTyp.EDITORS): 
                 return re
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             if ((isinstance(br.end_token.previous, NumberToken)) and (br.length_char < 30)): 
                 return BookLinkToken._new346(t, br.end_token, BookLinkTyp.NUMBER, MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO))
     t0 = t
     if (isinstance(t, ReferentToken)): 
         if (isinstance(t.getReferent(), PersonReferent)): 
             return BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
         if (isinstance(t.getReferent(), GeoReferent)): 
             return BookLinkToken._new343(t, t, BookLinkTyp.GEO, t.getReferent())
         if (isinstance(t.getReferent(), DateReferent)): 
             dr = Utils.asObjectOrNull(t.getReferent(), DateReferent)
             if (len(dr.slots) == 1 and dr.year > 0): 
                 return BookLinkToken._new346(t, t, BookLinkTyp.YEAR, str(dr.year))
             if (dr.year > 0 and t.previous is not None and t.previous.is_comma): 
                 return BookLinkToken._new346(t, t, BookLinkTyp.YEAR, str(dr.year))
         if (isinstance(t.getReferent(), OrganizationReferent)): 
             org0_ = Utils.asObjectOrNull(t.getReferent(), OrganizationReferent)
             if (org0_.kind == OrganizationKind.PRESS): 
                 return BookLinkToken._new343(t, t, BookLinkTyp.PRESS, org0_)
         if (isinstance(t.getReferent(), UriReferent)): 
             uri = Utils.asObjectOrNull(t.getReferent(), UriReferent)
             if ((uri.scheme == "http" or uri.scheme == "https" or uri.scheme == "ftp") or uri.scheme is None): 
                 return BookLinkToken._new343(t, t, BookLinkTyp.URL, uri)
     tok_ = BookLinkToken.__m_termins.tryParse(t, TerminParseAttr.NO)
     if (tok_ is not None): 
         typ_ = Utils.valToEnum(tok_.termin.tag, BookLinkTyp)
         ok = True
         if (typ_ == BookLinkTyp.TYPE or typ_ == BookLinkTyp.NAMETAIL or typ_ == BookLinkTyp.ELECTRONRES): 
             if (t.previous is not None and ((t.previous.isCharOf(".:[") or t.previous.is_hiphen))): 
                 pass
             else: 
                 ok = False
         if (ok): 
             return BookLinkToken._new346(t, tok_.end_token, typ_, tok_.termin.canonic_text)
         if (typ_ == BookLinkTyp.ELECTRONRES): 
             tt = tok_.end_token.next0_
             first_pass2751 = True
             while True:
                 if first_pass2751: first_pass2751 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if ((isinstance(tt, TextToken)) and not tt.chars.is_letter): 
                     continue
                 if (isinstance(tt.getReferent(), UriReferent)): 
                     return BookLinkToken._new343(t, tt, BookLinkTyp.ELECTRONRES, tt.getReferent())
                 break
     if (t.isChar('/')): 
         res = BookLinkToken._new346(t, t, BookLinkTyp.DELIMETER, "/")
         if (t.next0_ is not None and t.next0_.isChar('/')): 
             res.end_token = t.next0_
             res.value = "//"
         if (not t.is_whitespace_before and not t.is_whitespace_after): 
             coo = 3
             no = True
             tt = t.next0_
             while tt is not None and coo > 0: 
                 vvv = BookLinkToken.tryParse(tt, lev + 1)
                 if (vvv is not None and vvv.typ != BookLinkTyp.NUMBER): 
                     no = False
                     break
                 tt = tt.next0_; coo -= 1
             if (no): 
                 return None
         return res
     if ((isinstance(t, NumberToken)) and (t).int_value is not None and (t).typ == NumberSpellingType.DIGIT): 
         res = BookLinkToken._new346(t, t, BookLinkTyp.NUMBER, str((t).value))
         val = (t).int_value
         if (val >= 1930 and (val < 2030)): 
             res.typ = BookLinkTyp.YEAR
         if (t.next0_ is not None and t.next0_.isChar('.')): 
             res.end_token = t.next0_
         elif ((t.next0_ is not None and t.next0_.length_char == 1 and not t.next0_.chars.is_letter) and t.next0_.is_whitespace_after): 
             res.end_token = t.next0_
         elif (isinstance(t.next0_, TextToken)): 
             term = (t.next0_).term
             if (((term == "СТР" or term == "C" or term == "С") or term == "P" or term == "S") or term == "PAGES"): 
                 res.end_token = t.next0_
                 res.typ = BookLinkTyp.PAGES
                 res.value = str((t).value)
         return res
     if (isinstance(t, TextToken)): 
         term = (t).term
         if (((((((term == "СТР" or term == "C" or term == "С") or term == "ТОМ" or term == "T") or term == "Т" or term == "P") or term == "PP" or term == "V") or term == "VOL" or term == "S") or term == "СТОР" or t.isValue("PAGE", None)) or t.isValue("СТРАНИЦА", "СТОРІНКА")): 
             tt = t.next0_
             while tt is not None:
                 if (tt.isCharOf(".:~")): 
                     tt = tt.next0_
                 else: 
                     break
             if (isinstance(tt, NumberToken)): 
                 res = BookLinkToken._new345(t, tt, BookLinkTyp.PAGERANGE)
                 tt0 = tt
                 tt1 = tt
                 tt = tt.next0_
                 first_pass2752 = True
                 while True:
                     if first_pass2752: first_pass2752 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.isCharOf(",") or tt.is_hiphen): 
                         if (isinstance(tt.next0_, NumberToken)): 
                             tt = tt.next0_
                             res.end_token = tt
                             tt1 = tt
                             continue
                     break
                 res.value = MiscHelper.getTextValue(tt0, tt1, GetTextAttr.NO)
                 return res
         if ((term == "M" or term == "М" or term == "СПБ") or term == "K" or term == "К"): 
             if (t.next0_ is not None and t.next0_.isCharOf(":;")): 
                 re = BookLinkToken._new345(t, t.next0_, BookLinkTyp.GEO)
                 return re
             if (t.next0_ is not None and t.next0_.isCharOf(".")): 
                 res = BookLinkToken._new345(t, t.next0_, BookLinkTyp.GEO)
                 if (t.next0_.next0_ is not None and t.next0_.next0_.isCharOf(":;")): 
                     res.end_token = t.next0_.next0_
                 elif (t.next0_.next0_ is not None and (isinstance(t.next0_.next0_, NumberToken))): 
                     pass
                 elif (t.next0_.next0_ is not None and t.next0_.next0_.is_comma and (isinstance(t.next0_.next0_.next0_, NumberToken))): 
                     pass
                 else: 
                     return None
                 return res
         if (term == "ПЕР" or term == "ПЕРЕВ" or term == "ПЕРЕВОД"): 
             tt = t
             if (tt.next0_ is not None and tt.next0_.isChar('.')): 
                 tt = tt.next0_
             if (tt.next0_ is not None and ((tt.next0_.isValue("C", None) or tt.next0_.isValue("С", None)))): 
                 tt = tt.next0_
                 if (tt.next0_ is None or tt.whitespaces_after_count > 2): 
                     return None
                 re = BookLinkToken._new345(t, tt.next0_, BookLinkTyp.TRANSLATE)
                 return re
         if (term == "ТАМ" or term == "ТАМЖЕ"): 
             res = BookLinkToken._new345(t, t, BookLinkTyp.TAMZE)
             if (t.next0_ is not None and t.next0_.isValue("ЖЕ", None)): 
                 res.end_token = t.next0_
             return res
         if (((term == "СМ" or term == "CM" or term == "НАПР") or term == "НАПРИМЕР" or term == "SEE") or term == "ПОДРОБНЕЕ" or term == "ПОДРОБНО"): 
             res = BookLinkToken._new345(t, t, BookLinkTyp.SEE)
             t = t.next0_
             first_pass2753 = True
             while True:
                 if first_pass2753: first_pass2753 = False
                 else: t = t.next0_
                 if (not (t is not None)): break
                 if (t.isCharOf(".:") or t.isValue("ALSO", None)): 
                     res.end_token = t
                     continue
                 if (t.isValue("В", None) or t.isValue("IN", None)): 
                     res.end_token = t
                     continue
                 vvv = BookLinkToken.__tryParse(t, lev + 1)
                 if (vvv is not None and vvv.typ == BookLinkTyp.SEE): 
                     res.end_token = vvv.end_token
                     break
                 break
             return res
         if (term == "БОЛЕЕ"): 
             vvv = BookLinkToken.__tryParse(t.next0_, lev + 1)
             if (vvv is not None and vvv.typ == BookLinkTyp.SEE): 
                 vvv.begin_token = t
                 return vvv
         no = MiscHelper.checkNumberPrefix(t)
         if (isinstance(no, NumberToken)): 
             return BookLinkToken._new345(t, no, BookLinkTyp.N)
         if (((term == "B" or term == "В")) and (isinstance(t.next0_, NumberToken)) and (isinstance(t.next0_.next0_, TextToken))): 
             term2 = (t.next0_.next0_).term
             if (((term2 == "Т" or term2 == "T" or term2.startswith("ТОМ")) or term2 == "TT" or term2 == "ТТ") or term2 == "КН" or term2.startswith("КНИГ")): 
                 return BookLinkToken._new345(t, t.next0_.next0_, BookLinkTyp.VOLUME)
     if (t.isChar('(')): 
         if (((isinstance(t.next0_, NumberToken)) and (t.next0_).int_value is not None and t.next0_.next0_ is not None) and t.next0_.next0_.isChar(')')): 
             num = (t.next0_).int_value
             if (num > 1900 and num <= 2040): 
                 if (num <= datetime.datetime.now().year): 
                     return BookLinkToken._new346(t, t.next0_.next0_, BookLinkTyp.YEAR, str(num))
         if (((isinstance(t.next0_, ReferentToken)) and (isinstance(t.next0_.getReferent(), DateReferent)) and t.next0_.next0_ is not None) and t.next0_.next0_.isChar(')')): 
             num = (t.next0_.getReferent()).year
             if (num > 0): 
                 return BookLinkToken._new346(t, t.next0_.next0_, BookLinkTyp.YEAR, str(num))
     return None
Esempio n. 8
0
 def __str__(self) -> str:
     return "{0}{1} {2}: {3}".format(
         ("(1) " if self.must_be_first_line else ""), self.lev,
         Utils.enumToString(self.typ), self.getSourceText())
Esempio n. 9
0
 def __str__(self) -> str:
     return "{0}: {1}".format(Utils.enumToString(self.typ),
                              Utils.ifNotNull(self.value, ""))
Esempio n. 10
0
 def run(self, text: str, only_tokenizing: bool, dlang: 'MorphLang',
         progress: EventHandler,
         good_text: bool) -> typing.List['MorphToken']:
     """ Произвести морфологический анализ текста
     
     Args:
         text(str): исходный текст
         lang: язык (если null, то попробует определить)
     
     Returns:
         typing.List[MorphToken]: последовательность результирующих морфем
     """
     if (Utils.isNullOrEmpty(text)):
         return None
     twr = TextWrapper(text, good_text)
     twrch = twr.chars
     res = list()
     uni_lex = dict()
     term0 = None
     pure_rus_words = 0
     pure_ukr_words = 0
     pure_by_words = 0
     pure_kz_words = 0
     tot_rus_words = 0
     tot_ukr_words = 0
     tot_by_words = 0
     tot_kz_words = 0
     i = 0
     first_pass2884 = True
     while True:
         if first_pass2884: first_pass2884 = False
         else: i += 1
         if (not (i < twr.length)): break
         ty = InnerMorphology._get_char_typ(twrch[i])
         if (ty == 0):
             continue
         if (ty > 2):
             j = (i + 1)
         else:
             j = (i + 1)
             while j < twr.length:
                 if (InnerMorphology._get_char_typ(twrch[j]) != ty):
                     break
                 j += 1
         wstr = text[i:i + j - i]
         term = None
         if (good_text):
             term = wstr
         else:
             trstr = LanguageHelper.transliteral_correction(
                 wstr, term0, False)
             term = LanguageHelper.correct_word(trstr)
         if (Utils.isNullOrEmpty(term)):
             i = (j - 1)
             continue
         lang = InnerMorphology.__detect_lang(twr, i, j - 1, term)
         if (lang == MorphLang.UA):
             pure_ukr_words += 1
         elif (lang == MorphLang.RU):
             pure_rus_words += 1
         elif (lang == MorphLang.BY):
             pure_by_words += 1
         elif (lang == MorphLang.KZ):
             pure_kz_words += 1
         if ((((lang) & MorphLang.RU)) != MorphLang.UNKNOWN):
             tot_rus_words += 1
         if ((((lang) & MorphLang.UA)) != MorphLang.UNKNOWN):
             tot_ukr_words += 1
         if ((((lang) & MorphLang.BY)) != MorphLang.UNKNOWN):
             tot_by_words += 1
         if ((((lang) & MorphLang.KZ)) != MorphLang.UNKNOWN):
             tot_kz_words += 1
         if (ty == 1):
             term0 = term
         lemmas = None
         if (ty == 1 and not only_tokenizing):
             wraplemmas12 = RefOutArgWrapper(None)
             inoutres13 = Utils.tryGetValue(uni_lex, term, wraplemmas12)
             lemmas = wraplemmas12.value
             if (not inoutres13):
                 lemmas = InnerMorphology.UniLexWrap._new11(lang)
                 uni_lex[term] = lemmas
         tok = MorphToken()
         tok.term = term
         tok.begin_char = i
         if (i == 733860):
             pass
         tok.end_char = (j - 1)
         tok.tag = (lemmas)
         res.append(tok)
         i = (j - 1)
     def_lang = MorphLang(dlang)
     if (pure_rus_words > pure_ukr_words and pure_rus_words > pure_by_words
             and pure_rus_words > pure_kz_words):
         def_lang = MorphLang.RU
     elif (tot_rus_words > tot_ukr_words and tot_rus_words > tot_by_words
           and tot_rus_words > tot_kz_words):
         def_lang = MorphLang.RU
     elif (pure_ukr_words > pure_rus_words
           and pure_ukr_words > pure_by_words
           and pure_ukr_words > pure_kz_words):
         def_lang = MorphLang.UA
     elif (tot_ukr_words > tot_rus_words and tot_ukr_words > tot_by_words
           and tot_ukr_words > tot_kz_words):
         def_lang = MorphLang.UA
     elif (pure_kz_words > pure_rus_words and pure_kz_words > pure_ukr_words
           and pure_kz_words > pure_by_words):
         def_lang = MorphLang.KZ
     elif (tot_kz_words > tot_rus_words and tot_kz_words > tot_ukr_words
           and tot_kz_words > tot_by_words):
         def_lang = MorphLang.KZ
     elif (pure_by_words > pure_rus_words and pure_by_words > pure_ukr_words
           and pure_by_words > pure_kz_words):
         def_lang = MorphLang.BY
     elif (tot_by_words > tot_rus_words and tot_by_words > tot_ukr_words
           and tot_by_words > tot_kz_words):
         if (tot_rus_words > 10 and tot_by_words > (tot_rus_words + 20)):
             def_lang = MorphLang.BY
         elif (tot_rus_words == 0 or tot_by_words >= (tot_rus_words * 2)):
             def_lang = MorphLang.BY
     if (((def_lang.is_undefined or def_lang.is_ua)) and tot_rus_words > 0):
         if (((tot_ukr_words > tot_rus_words
               and InnerMorphology.M_ENGINE_UA.language.is_ua))
                 or ((tot_by_words > tot_rus_words
                      and InnerMorphology.M_ENGINE_BY.language.is_by))
                 or ((tot_kz_words > tot_rus_words
                      and InnerMorphology.M_ENGINE_KZ.language.is_kz))):
             cou0 = 0
             tot_kz_words = 0
             tot_ukr_words = tot_kz_words
             tot_by_words = tot_ukr_words
             tot_rus_words = tot_by_words
             for kp in uni_lex.items():
                 lang = MorphLang()
                 wraplang14 = RefOutArgWrapper(lang)
                 kp[1].word_forms = self.__process_one_word(
                     kp[0], wraplang14)
                 lang = wraplang14.value
                 if (kp[1].word_forms is not None):
                     for wf in kp[1].word_forms:
                         lang |= wf.language
                 kp[1].lang = lang
                 if (lang.is_ru):
                     tot_rus_words += 1
                 if (lang.is_ua):
                     tot_ukr_words += 1
                 if (lang.is_by):
                     tot_by_words += 1
                 if (lang.is_kz):
                     tot_kz_words += 1
                 if (lang.is_cyrillic):
                     cou0 += 1
                 if (cou0 >= 100):
                     break
             if (tot_rus_words > ((math.floor(tot_by_words / 2)))
                     and tot_rus_words > ((math.floor(tot_ukr_words / 2)))):
                 def_lang = MorphLang.RU
             elif (tot_ukr_words > ((math.floor(tot_rus_words / 2)))
                   and tot_ukr_words > ((math.floor(tot_by_words / 2)))):
                 def_lang = MorphLang.UA
             elif (tot_by_words > ((math.floor(tot_rus_words / 2)))
                   and tot_by_words > ((math.floor(tot_ukr_words / 2)))):
                 def_lang = MorphLang.BY
         elif (def_lang.is_undefined):
             def_lang = MorphLang.RU
     cou = 0
     tot_kz_words = 0
     tot_ukr_words = tot_kz_words
     tot_by_words = tot_ukr_words
     tot_rus_words = tot_by_words
     for kp in uni_lex.items():
         lang = def_lang
         if (lang.is_undefined):
             if (tot_rus_words > tot_by_words
                     and tot_rus_words > tot_ukr_words
                     and tot_rus_words > tot_kz_words):
                 lang = MorphLang.RU
             elif (tot_ukr_words > tot_rus_words
                   and tot_ukr_words > tot_by_words
                   and tot_ukr_words > tot_kz_words):
                 lang = MorphLang.UA
             elif (tot_by_words > tot_rus_words
                   and tot_by_words > tot_ukr_words
                   and tot_by_words > tot_kz_words):
                 lang = MorphLang.BY
             elif (tot_kz_words > tot_rus_words
                   and tot_kz_words > tot_ukr_words
                   and tot_kz_words > tot_by_words):
                 lang = MorphLang.KZ
         wraplang15 = RefOutArgWrapper(lang)
         kp[1].word_forms = self.__process_one_word(kp[0], wraplang15)
         lang = wraplang15.value
         kp[1].lang = lang
         if ((((lang) & MorphLang.RU)) != MorphLang.UNKNOWN):
             tot_rus_words += 1
         if ((((lang) & MorphLang.UA)) != MorphLang.UNKNOWN):
             tot_ukr_words += 1
         if ((((lang) & MorphLang.BY)) != MorphLang.UNKNOWN):
             tot_by_words += 1
         if ((((lang) & MorphLang.KZ)) != MorphLang.UNKNOWN):
             tot_kz_words += 1
         if (progress is not None):
             self.__on_progress(cou, len(uni_lex), progress)
         cou += 1
     debug_token = None
     empty_list = None
     for r in res:
         uni = Utils.asObjectOrNull(r.tag, InnerMorphology.UniLexWrap)
         r.tag = None
         if (uni is None or uni.word_forms is None
                 or len(uni.word_forms) == 0):
             if (empty_list is None):
                 empty_list = list()
             r.word_forms = empty_list
             if (uni is not None):
                 r.language = uni.lang
         else:
             r.word_forms = uni.word_forms
         if (r.begin_char == 733860):
             debug_token = r
     if (not good_text):
         i = 0
         first_pass2885 = True
         while True:
             if first_pass2885: first_pass2885 = False
             else: i += 1
             if (not (i < (len(res) - 2))): break
             ui0 = twrch[res[i].begin_char]
             ui1 = twrch[res[i + 1].begin_char]
             ui2 = twrch[res[i + 2].begin_char]
             if (ui1.is_quot):
                 p = res[i + 1].begin_char
                 if ((p >= 2 and "БбТт".find(text[p - 1]) >= 0 and
                      ((p + 3) < len(text)))
                         and "ЕеЯяЁё".find(text[p + 1]) >= 0):
                     wstr = LanguageHelper.transliteral_correction(
                         LanguageHelper.correct_word("{0}Ъ{1}".format(
                             res[i].get_source_text(text),
                             res[i + 2].get_source_text(text))), None,
                         False)
                     li = self.__process_one_word0(wstr)
                     if (li is not None and len(li) > 0
                             and li[0].is_in_dictionary):
                         res[i].end_char = res[i + 2].end_char
                         res[i].term = wstr
                         res[i].word_forms = li
                         del res[i + 1:i + 1 + 2]
                 elif ((ui1.is_apos and p > 0 and str.isalpha(text[p - 1]))
                       and ((p + 1) < len(text))
                       and str.isalpha(text[p + 1])):
                     if (def_lang == MorphLang.UA
                             or (((res[i].language) & MorphLang.UA)) !=
                             MorphLang.UNKNOWN
                             or (((res[i + 2].language) & MorphLang.UA)) !=
                             MorphLang.UNKNOWN):
                         wstr = LanguageHelper.transliteral_correction(
                             LanguageHelper.correct_word("{0}{1}".format(
                                 res[i].get_source_text(text),
                                 res[i + 2].get_source_text(text))), None,
                             False)
                         li = self.__process_one_word0(wstr)
                         okk = True
                         if (okk):
                             res[i].end_char = res[i + 2].end_char
                             res[i].term = wstr
                             if (li is None):
                                 li = list()
                             res[i].word_forms = li
                             if (li is not None and len(li) > 0):
                                 res[i].language = li[0].language
                             del res[i + 1:i + 1 + 2]
             elif (((ui1.uni_char == '3' or ui1.uni_char == '4'))
                   and res[i + 1].length == 1):
                 src = ("З" if ui1.uni_char == '3' else "Ч")
                 i0 = i + 1
                 if ((res[i].end_char + 1) == res[i + 1].begin_char
                         and ui0.is_cyrillic):
                     i0 -= 1
                     src = (res[i0].get_source_text(text) + src)
                 i1 = i + 1
                 if ((res[i + 1].end_char + 1) == res[i + 2].begin_char
                         and ui2.is_cyrillic):
                     i1 += 1
                     src += res[i1].get_source_text(text)
                 if (len(src) > 2):
                     wstr = LanguageHelper.transliteral_correction(
                         LanguageHelper.correct_word(src), None, False)
                     li = self.__process_one_word0(wstr)
                     if (li is not None and len(li) > 0
                             and li[0].is_in_dictionary):
                         res[i0].end_char = res[i1].end_char
                         res[i0].term = wstr
                         res[i0].word_forms = li
                         del res[i0 + 1:i0 + 1 + i1 - i0]
             elif ((ui1.is_hiphen and ui0.is_letter and ui2.is_letter)
                   and res[i].end_char > res[i].begin_char
                   and res[i + 2].end_char > res[i + 2].begin_char):
                 newline = False
                 sps = 0
                 j = (res[i + 1].end_char + 1)
                 while j < res[i + 2].begin_char:
                     if (text[j] == '\r' or text[j] == '\n'):
                         newline = True
                         sps += 1
                     elif (not Utils.isWhitespace(text[j])):
                         break
                     else:
                         sps += 1
                     j += 1
                 full_word = LanguageHelper.correct_word(
                     res[i].get_source_text(text) +
                     res[i + 2].get_source_text(text))
                 if (not newline):
                     if (full_word in uni_lex or full_word == "ИЗЗА"):
                         newline = True
                     elif (text[res[i + 1].begin_char] == (chr(0x00AD))):
                         newline = True
                     elif (LanguageHelper.ends_with_ex(
                             res[i].get_source_text(text), "О", "о", None,
                             None) and len(res[i + 2].word_forms) > 0
                           and res[i + 2].word_forms[0].is_in_dictionary):
                         if (text[res[i + 1].begin_char] == '¬'):
                             li = self.__process_one_word0(full_word)
                             if (li is not None and len(li) > 0
                                     and li[0].is_in_dictionary):
                                 newline = True
                     elif ((res[i].end_char + 2) == res[i + 2].begin_char):
                         if (not str.isupper(text[res[i + 2].begin_char])
                                 and (sps < 2) and len(full_word) > 4):
                             newline = True
                             if ((i + 3) < len(res)):
                                 ui3 = twrch[res[i + 3].begin_char]
                                 if (ui3.is_hiphen):
                                     newline = False
                     elif (((res[i].end_char + 1) == res[i + 1].begin_char
                            and sps > 0 and (sps < 3))
                           and len(full_word) > 4):
                         newline = True
                 if (newline):
                     li = self.__process_one_word0(full_word)
                     if (li is not None and len(li) > 0
                             and ((li[0].is_in_dictionary
                                   or full_word in uni_lex))):
                         res[i].end_char = res[i + 2].end_char
                         res[i].term = full_word
                         res[i].word_forms = li
                         del res[i + 1:i + 1 + 2]
                 else:
                     pass
             elif ((ui1.is_letter and ui0.is_letter and res[i].length > 2)
                   and res[i + 1].length > 1):
                 if (ui0.is_upper != ui1.is_upper):
                     continue
                 if (not ui0.is_cyrillic or not ui1.is_cyrillic):
                     continue
                 newline = False
                 j = (res[i].end_char + 1)
                 while j < res[i + 1].begin_char:
                     if (twrch[j].code == 0xD or twrch[j].code == 0xA):
                         newline = True
                         break
                     j += 1
                 if (not newline):
                     continue
                 full_word = LanguageHelper.correct_word(
                     res[i].get_source_text(text) +
                     res[i + 1].get_source_text(text))
                 if (not full_word in uni_lex):
                     continue
                 li = self.__process_one_word0(full_word)
                 if (li is not None and len(li) > 0
                         and li[0].is_in_dictionary):
                     res[i].end_char = res[i + 1].end_char
                     res[i].term = full_word
                     res[i].word_forms = li
                     del res[i + 1]
     i = 0
     first_pass2886 = True
     while True:
         if first_pass2886: first_pass2886 = False
         else: i += 1
         if (not (i < len(res))): break
         mt = res[i]
         mt.char_info = CharsInfo()
         ui0 = twrch[mt.begin_char]
         ui00 = UnicodeInfo.ALL_CHARS[ord((res[i].term[0]))]
         j = (mt.begin_char + 1)
         while j <= mt.end_char:
             if (ui0.is_letter):
                 break
             ui0 = twrch[j]
             j += 1
         if (ui0.is_letter):
             res[i].char_info.is_letter = True
             if (ui00.is_latin):
                 res[i].char_info.is_latin_letter = True
             elif (ui00.is_cyrillic):
                 res[i].char_info.is_cyrillic_letter = True
             if (res[i].language == MorphLang.UNKNOWN):
                 if (LanguageHelper.is_cyrillic(mt.term)):
                     res[i].language = (MorphLang.RU if
                                        def_lang.is_undefined else def_lang)
             if (good_text):
                 continue
             all_up = True
             all_lo = True
             j = mt.begin_char
             while j <= mt.end_char:
                 if (twrch[j].is_upper or twrch[j].is_digit):
                     all_lo = False
                 else:
                     all_up = False
                 j += 1
             if (all_up):
                 mt.char_info.is_all_upper = True
             elif (all_lo):
                 mt.char_info.is_all_lower = True
             elif (((ui0.is_upper or twrch[mt.begin_char].is_digit))
                   and mt.end_char > mt.begin_char):
                 all_lo = True
                 j = (mt.begin_char + 1)
                 while j <= mt.end_char:
                     if (twrch[j].is_upper or twrch[j].is_digit):
                         all_lo = False
                         break
                     j += 1
                 if (all_lo):
                     mt.char_info.is_capital_upper = True
                 elif (twrch[mt.end_char].is_lower
                       and (mt.end_char - mt.begin_char) > 1):
                     all_up = True
                     j = mt.begin_char
                     while j < mt.end_char:
                         if (twrch[j].is_lower):
                             all_up = False
                             break
                         j += 1
                     if (all_up):
                         mt.char_info.is_last_lower = True
         if (mt.char_info.is_last_lower and mt.length > 2
                 and mt.char_info.is_cyrillic_letter):
             pref = text[mt.begin_char:mt.begin_char + mt.end_char -
                         mt.begin_char]
             ok = False
             for wf in mt.word_forms:
                 if (wf.normal_case == pref or wf.normal_full == pref):
                     ok = True
                     break
             if (not ok):
                 mt.word_forms = list(mt.word_forms)
                 mt.word_forms.insert(
                     0, MorphWordForm._new16(pref, MorphClass.NOUN, 1))
     if (good_text or only_tokenizing):
         return res
     i = 0
     first_pass2887 = True
     while True:
         if first_pass2887: first_pass2887 = False
         else: i += 1
         if (not (i < len(res))): break
         if (res[i].length == 1 and res[i].char_info.is_latin_letter):
             ch = res[i].term[0]
             if (ch == 'C' or ch == 'A' or ch == 'P'):
                 pass
             else:
                 continue
             is_rus = False
             for ii in range(i - 1, -1, -1):
                 if ((res[ii].end_char + 1) != res[ii + 1].begin_char):
                     break
                 elif (res[ii].char_info.is_letter):
                     is_rus = res[ii].char_info.is_cyrillic_letter
                     break
             if (not is_rus):
                 ii = i + 1
                 while ii < len(res):
                     if ((res[ii - 1].end_char + 1) != res[ii].begin_char):
                         break
                     elif (res[ii].char_info.is_letter):
                         is_rus = res[ii].char_info.is_cyrillic_letter
                         break
                     ii += 1
             if (is_rus):
                 res[i].term = LanguageHelper.transliteral_correction(
                     res[i].term, None, True)
                 res[i].char_info.is_cyrillic_letter = True
                 res[i].char_info.is_latin_letter = True
     for r in res:
         if (r.char_info.is_all_upper or r.char_info.is_capital_upper):
             if (r.language.is_cyrillic):
                 ok = False
                 for wf in r.word_forms:
                     if (wf.class0_.is_proper_surname):
                         ok = True
                         break
                 if (not ok):
                     r.word_forms = list(r.word_forms)
                     InnerMorphology.M_ENGINE_RU.process_surname_variants(
                         r.term, r.word_forms)
     for r in res:
         for mv in r.word_forms:
             if (mv.normal_case is None):
                 mv.normal_case = r.term
     i = 0
     while i < (len(res) - 2):
         if (res[i].char_info.is_latin_letter
                 and res[i].char_info.is_all_upper and res[i].length == 1):
             if (twrch[res[i + 1].begin_char].is_quot
                     and res[i + 2].char_info.is_latin_letter
                     and res[i + 2].length > 2):
                 if ((res[i].end_char + 1) == res[i + 1].begin_char and
                     (res[i + 1].end_char + 1) == res[i + 2].begin_char):
                     wstr = "{0}{1}".format(res[i].term, res[i + 2].term)
                     li = self.__process_one_word0(wstr)
                     if (li is not None):
                         res[i].word_forms = li
                     res[i].end_char = res[i + 2].end_char
                     res[i].term = wstr
                     if (res[i + 2].char_info.is_all_lower):
                         res[i].char_info.is_all_upper = False
                         res[i].char_info.is_capital_upper = True
                     elif (not res[i + 2].char_info.is_all_upper):
                         res[i].char_info.is_all_upper = False
                     del res[i + 1:i + 1 + 2]
         i += 1
     i = 0
     first_pass2888 = True
     while True:
         if first_pass2888: first_pass2888 = False
         else: i += 1
         if (not (i < (len(res) - 1))): break
         if (not res[i].char_info.is_letter
                 and not res[i + 1].char_info.is_letter
                 and (res[i].end_char + 1) == res[i + 1].begin_char):
             if (twrch[res[i].begin_char].is_hiphen
                     and twrch[res[i + 1].begin_char].is_hiphen):
                 if (i == 0 or not twrch[res[i - 1].begin_char].is_hiphen):
                     pass
                 else:
                     continue
                 if ((i + 2) == len(res)
                         or not twrch[res[i + 2].begin_char].is_hiphen):
                     pass
                 else:
                     continue
                 res[i].end_char = res[i + 1].end_char
                 del res[i + 1]
     return res
Esempio n. 11
0
 def is_real_from(self) -> bool:
     tt = Utils.asObjectOrNull(self.begin_token, TextToken)
     if (tt is None):
         return False
     return tt.term == "FROM" or tt.term == "ОТ"
 def address(self) -> 'Referent':
     """ Адрес регистрации """
     return Utils.asObjectOrNull(
         self.get_slot_value(PersonIdentityReferent.ATTR_ADDRESS), Referent)
 def state(self) -> 'Referent':
     """ Государство """
     return Utils.asObjectOrNull(
         self.get_slot_value(PersonIdentityReferent.ATTR_STATE), Referent)
 def __str__(self) -> str:
     return "{0} {1} ({2})".format(self.number, Utils.ifNotNull(self.noun, "?"), Utils.ifNotNull(self.nouns_genetive, "?"))
Esempio n. 15
0
 def __analizeGet(self, bfi: 'BusinessFactItem') -> 'ReferentToken':
     bef = self.__FindRefBefore(bfi.begin_token.previous)
     if (bef is None):
         return None
     t1 = bfi.end_token.next0_
     if (t1 is None):
         return None
     first_pass2772 = True
     while True:
         if first_pass2772: first_pass2772 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.morph.class0_.is_adverb):
             continue
         if (t1.isValue("ПРАВО", None) or t1.isValue("РАСПОРЯЖАТЬСЯ", None)
                 or t1.isValue("РОЗПОРЯДЖАТИСЯ", None)):
             continue
         break
     if (t1 is None):
         return None
     if ((isinstance(t1.getReferent(), FundsReferent))
             and not ((isinstance(bef.referent, FundsReferent)))):
         fr = Utils.asObjectOrNull(t1.getReferent(), FundsReferent)
         bfr = BusinessFactReferent._new436(bfi.base_kind)
         bfr.who = bef.referent
         bef2 = self.__FindSecRefBefore(bef)
         if (bef2 is not None):
             bfr.addSlot(BusinessFactReferent.ATTR_WHO, bef2.referent,
                         False, 0)
             bef = bef2
         if (fr.source == bef.referent and bef2 is None):
             bef2 = self.__FindRefBefore(bef.begin_token.previous)
             if (bef2 is not None):
                 bef = bef2
                 bfr.who = bef.referent
         if (fr.source == bef.referent):
             cou = 0
             tt = bef.begin_token.previous
             first_pass2773 = True
             while True:
                 if first_pass2773: first_pass2773 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 cou += 1
                 if ((cou) > 100):
                     break
                 refs = tt.getReferents()
                 if (refs is None):
                     continue
                 for r in refs:
                     if ((isinstance(r, OrganizationReferent))
                             and r != bef.referent):
                         cou = 1000
                         fr.source = Utils.asObjectOrNull(
                             r, OrganizationReferent)
                         break
         bfr._addWhat(fr)
         bfr.typ = (
             "покупка ценных бумаг"
             if bfi.base_kind == BusinessFactKind.GET else
             (("продажа ценных бумаг" if bfi.base_kind
               == BusinessFactKind.SELL else "владение ценными бумагами")))
         self.__findDate(bfr, bef.begin_token)
         self.__findSum(bfr, bef.end_token)
         return ReferentToken(bfr, bef.begin_token, t1)
     if ((bfi.morph.class0_.is_noun and
          ((bfi.base_kind == BusinessFactKind.GET
            or bfi.base_kind == BusinessFactKind.SELL)) and
          (isinstance(t1.getReferent(), OrganizationReferent)))
             or (isinstance(t1.getReferent(), PersonReferent))):
         if ((isinstance(bef.referent, FundsReferent))
                 or (isinstance(bef.referent, OrganizationReferent))):
             bfr = BusinessFactReferent._new436(bfi.base_kind)
             if (bfi.base_kind == BusinessFactKind.GET):
                 bfr.typ = ("покупка ценных бумаг" if isinstance(
                     bef.referent, FundsReferent) else "покупка компании")
             elif (bfi.base_kind == BusinessFactKind.SELL):
                 bfr.typ = ("продажа ценных бумаг" if isinstance(
                     bef.referent, FundsReferent) else "продажа компании")
             bfr.who = t1.getReferent()
             bfr._addWhat(bef.referent)
             self.__findDate(bfr, bef.begin_token)
             self.__findSum(bfr, bef.end_token)
             t1 = BusinessAnalyzer.__addWhosList(t1, bfr)
             return ReferentToken(bfr, bef.begin_token, t1)
     if ((isinstance(bef.referent, OrganizationReferent))
             or (isinstance(bef.referent, PersonReferent))):
         tt = t1
         if (tt is not None and tt.morph.class0_.is_preposition):
             tt = tt.next0_
         slav = (None if tt is None else tt.getReferent())
         if ((((isinstance(slav, PersonReferent)) or
               (isinstance(slav, OrganizationReferent))))
                 and tt.next0_ is not None
                 and (isinstance(tt.next0_.getReferent(), FundsReferent))):
             bfr = BusinessFactReferent._new436(bfi.base_kind)
             bfr.typ = ("покупка ценных бумаг" if bfi.base_kind
                        == BusinessFactKind.GET else "продажа ценных бумаг")
             bfr.who = bef.referent
             bef2 = self.__FindSecRefBefore(bef)
             if (bef2 is not None):
                 bfr.addSlot(BusinessFactReferent.ATTR_WHO, bef2.referent,
                             False, 0)
                 bef = bef2
             bfr.whom = slav
             bfr._addWhat(tt.next0_.getReferent())
             self.__findDate(bfr, bef.begin_token)
             self.__findSum(bfr, bef.end_token)
             return ReferentToken(bfr, bef.begin_token, tt.next0_)
         elif (isinstance(slav, OrganizationReferent)):
             bfr = BusinessFactReferent._new436(bfi.base_kind)
             bfr.typ = ("покупка компании" if bfi.base_kind
                        == BusinessFactKind.GET else "продажа компании")
             bfr.who = bef.referent
             bef2 = self.__FindSecRefBefore(bef)
             if (bef2 is not None):
                 bfr.addSlot(BusinessFactReferent.ATTR_WHO, bef2.referent,
                             False, 0)
                 bef = bef2
             bfr._addWhat(slav)
             self.__findDate(bfr, bef.begin_token)
             self.__findSum(bfr, bef.end_token)
             return ReferentToken(bfr, bef.begin_token, tt.next0_)
     if ((isinstance(bef.referent, FundsReferent))
             and (((isinstance(t1.getReferent(), OrganizationReferent)) or
                   (isinstance(t1.getReferent(), PersonReferent))))):
         bfr = BusinessFactReferent._new436(bfi.base_kind)
         bfr.typ = (
             "покупка ценных бумаг"
             if bfi.base_kind == BusinessFactKind.GET else
             (("продажа ценных бумаг" if bfi.base_kind
               == BusinessFactKind.SELL else "владение ценными бумагами")))
         bfr.who = t1.getReferent()
         bfr._addWhat(bef.referent)
         self.__findDate(bfr, bef.begin_token)
         self.__findSum(bfr, bef.end_token)
         return ReferentToken(bfr, bef.begin_token, t1)
     return None
Esempio n. 16
0
 def try_attach(t: 'Token') -> 'TitleItemToken':
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is not None):
         t1 = tt
         if (tt.term == "ТЕМА"):
             tit = TitleItemToken.try_attach(tt.next0_)
             if (tit is not None and tit.typ == TitleItemToken.Types.TYP):
                 t1 = tit.end_token
                 if (t1.next0_ is not None and t1.next0_.is_char(':')):
                     t1 = t1.next0_
                 return TitleItemToken._new2655(
                     t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value)
             if (tt.next0_ is not None and tt.next0_.is_char(':')):
                 t1 = tt.next0_
             return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПО" or tt.term == "НА"):
             if (tt.next0_ is not None
                     and tt.next0_.is_value("ТЕМА", None)):
                 t1 = tt.next0_
                 if (t1.next0_ is not None and t1.next0_.is_char(':')):
                     t1 = t1.next0_
                 return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"):
             tt2 = tt.next0_
             if (tt2 is not None and tt2.is_char('.')):
                 tt2 = tt2.next0_
             if (isinstance(tt2, TextToken)):
                 if (tt2.term == "C" or tt2.term == "С"):
                     tt2 = tt2.next0_
                     if (isinstance(tt2, TextToken)):
                         return TitleItemToken(
                             t, tt2, TitleItemToken.Types.TRANSLATE)
         if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION"
                 or tt.term == "СЕКЦІЯ"):
             t1 = tt.next0_
             if (t1 is not None and t1.is_char(':')):
                 t1 = t1.next0_
             br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100)
             if (br is not None):
                 t1 = br.end_token
             elif (t1 != tt.next0_):
                 while t1 is not None:
                     if (t1.is_newline_after):
                         break
                     t1 = t1.next0_
                 if (t1 is None):
                     return None
             if (t1 != tt.next0_):
                 return TitleItemToken(tt, t1, TitleItemToken.Types.DUST)
         t1 = (None)
         if (tt.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")):
             t1 = tt.next0_
         elif (tt.morph.class0_.is_preposition and tt.next0_ is not None
               and tt.next0_.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")):
             t1 = tt.next0_.next0_
         elif (tt.is_char('/') and tt.is_newline_before):
             t1 = tt.next0_
         if (t1 is not None):
             if (t1.is_char_of(":") or t1.is_hiphen):
                 t1 = t1.next0_
             spec = TitleItemToken.__try_attach_speciality(t1, True)
             if (spec is not None):
                 spec.begin_token = t
                 return spec
     sss = TitleItemToken.__try_attach_speciality(t, False)
     if (sss is not None):
         return sss
     if (isinstance(t, ReferentToken)):
         return None
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is not None):
         s = npt.get_normal_case_text(None, MorphNumber.UNDEFINED,
                                      MorphGender.UNDEFINED, False)
         tok = TitleItemToken.M_TERMINS.try_parse(npt.end_token,
                                                  TerminParseAttr.NO)
         if (tok is not None):
             ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types)
             if (ty == TitleItemToken.Types.TYP):
                 tit = TitleItemToken.try_attach(tok.end_token.next0_)
                 if (tit is not None
                         and tit.typ == TitleItemToken.Types.THEME):
                     return TitleItemToken._new2655(
                         npt.begin_token, tit.end_token,
                         TitleItemToken.Types.TYPANDTHEME, s)
                 if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"):
                     return None
                 t1 = tok.end_token
                 if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"):
                     err = 0
                     ttt = t1.next0_
                     first_pass3394 = True
                     while True:
                         if first_pass3394: first_pass3394 = False
                         else: ttt = ttt.next0_
                         if (not (ttt is not None)): break
                         if (ttt.morph.class0_.is_preposition):
                             continue
                         if (ttt.is_value("СОИСКАНИЕ", "")):
                             continue
                         npt1 = NounPhraseHelper.try_parse(
                             ttt, NounPhraseParseAttr.NO, 0, None)
                         if (npt1 is not None and npt1.noun.is_value(
                                 "СТЕПЕНЬ", "СТУПІНЬ")):
                             ttt = npt1.end_token
                             t1 = ttt
                             continue
                         rt = t1.kit.process_referent("PERSON", ttt)
                         if (rt is not None and (isinstance(
                                 rt.referent, PersonPropertyReferent))):
                             ppr = Utils.asObjectOrNull(
                                 rt.referent, PersonPropertyReferent)
                             if (ppr.name == "доктор наук"):
                                 t1 = rt.end_token
                                 s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "кандидат наук"):
                                 t1 = rt.end_token
                                 s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "магистр"):
                                 t1 = rt.end_token
                                 s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                         if (ttt.is_value("ДОКТОР", None)
                                 or ttt.is_value("КАНДИДАТ", None)
                                 or ttt.is_value("МАГИСТР", "МАГІСТР")):
                             t1 = ttt
                             npt1 = NounPhraseHelper.try_parse(
                                 ttt.next0_, NounPhraseParseAttr.NO, 0,
                                 None)
                             if (npt1 is not None
                                     and npt1.end_token.is_value(
                                         "НАУК", None)):
                                 t1 = npt1.end_token
                             s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                  if ttt.is_value("МАГИСТР", "МАГІСТР") else
                                  ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value(
                                      "ДОКТОР", None) else
                                   "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"))
                             break
                         err += 1
                         if (err > 3):
                             break
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 if (s.endswith("ОТЧЕТ") and t1.next0_ is not None
                         and t1.next0_.is_value("О", None)):
                     npt1 = NounPhraseHelper.try_parse(
                         t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0,
                         None)
                     if (npt1 is not None
                             and npt1.morph.case_.is_prepositional):
                         t1 = npt1.end_token
                 return TitleItemToken._new2655(npt.begin_token, t1, ty, s)
     tok1 = TitleItemToken.M_TERMINS.try_parse(t, TerminParseAttr.NO)
     if (tok1 is not None):
         t1 = tok1.end_token
         re = TitleItemToken(
             tok1.begin_token, t1,
             Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
         return re
     if (BracketHelper.can_be_start_of_sequence(t, False, False)):
         tok1 = TitleItemToken.M_TERMINS.try_parse(t.next0_,
                                                   TerminParseAttr.NO)
         if (tok1 is not None and BracketHelper.can_be_end_of_sequence(
                 tok1.end_token.next0_, False, None, False)):
             t1 = tok1.end_token.next0_
             return TitleItemToken(
                 tok1.begin_token, t1,
                 Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
     return None
Esempio n. 17
0
 def __analizeProfit(self, bfi: 'BusinessFactItem') -> 'ReferentToken':
     if (bfi.end_token.next0_ is None):
         return None
     t0 = bfi.begin_token
     t1 = bfi.end_token
     typ = t1.getNormalCaseText(None, True, MorphGender.UNDEFINED,
                                False).lower()
     org0_ = None
     org0_ = (Utils.asObjectOrNull(t1.next0_.getReferent(),
                                   OrganizationReferent))
     t = t1
     if (org0_ is not None):
         t = t.next0_
     else:
         rt = t.kit.processReferent(OrganizationAnalyzer.ANALYZER_NAME,
                                    t.next0_)
         if (rt is not None):
             org0_ = (Utils.asObjectOrNull(rt.referent,
                                           OrganizationReferent))
             t = rt.end_token
     dt = None
     sum0_ = None
     t = t.next0_
     first_pass2774 = True
     while True:
         if first_pass2774: first_pass2774 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.isChar('.')):
             break
         if (t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 t = br.end_token
                 continue
         if ((((t.morph.class0_.is_verb or t.isValue("ДО", None)
                or t.is_hiphen) or t.isValue("РАЗМЕР", None)
               or t.isValue("РОЗМІР", None))) and t.next0_ is not None
                 and (isinstance(t.next0_.getReferent(), MoneyReferent))):
             if (sum0_ is not None):
                 break
             sum0_ = (Utils.asObjectOrNull(t.next0_.getReferent(),
                                           MoneyReferent))
             t = t.next0_
             t1 = t
             continue
         r = t.getReferent()
         if ((isinstance(r, DateRangeReferent))
                 or (isinstance(r, DateReferent))):
             if (dt is None):
                 dt = r
                 t1 = t
         elif ((isinstance(r, OrganizationReferent)) and org0_ is None):
             org0_ = (Utils.asObjectOrNull(r, OrganizationReferent))
             t1 = t
     if (sum0_ is None):
         return None
     if (org0_ is None):
         tt = t0.previous
         while tt is not None:
             if (tt.isChar('.')):
                 break
             b0 = Utils.asObjectOrNull(tt.getReferent(),
                                       BusinessFactReferent)
             if (b0 is not None):
                 org0_ = (Utils.asObjectOrNull(b0.who,
                                               OrganizationReferent))
                 break
             org0_ = Utils.asObjectOrNull(tt.getReferent(),
                                          OrganizationReferent)
             if ((org0_) is not None):
                 break
             tt = tt.previous
     if (org0_ is None):
         return None
     bfr = BusinessFactReferent._new436(bfi.base_kind)
     bfr.who = org0_
     bfr.typ = typ
     bfr.addSlot(BusinessFactReferent.ATTR_MISC, sum0_, False, 0)
     if (dt is not None):
         bfr.when = dt
     else:
         self.__findDate(bfr, bfi.begin_token)
     return ReferentToken(bfr, t0, t1)
 def __serialize_morph_misc_info(res: io.IOBase,
                                 mi: 'MorphMiscInfo') -> None:
     MorphSerializeHelper.__serialize_short(res, mi._m_value)
     for a in mi.attrs:
         MorphSerializeHelper.__serialize_string(res, a)
     Utils.writeByteIO(res, 0xFF)
Esempio n. 19
0
 def __analizeLikelihoods(
         self, rt: 'ReferentToken') -> typing.List['ReferentToken']:
     bfr0 = Utils.asObjectOrNull(rt.referent, BusinessFactReferent)
     if (bfr0 is None or len(bfr0.whats) != 1
             or not ((isinstance(bfr0.whats[0], FundsReferent)))):
         return None
     funds0 = Utils.asObjectOrNull(bfr0.whats[0], FundsReferent)
     whos = list()
     funds = list()
     t = rt.end_token.next0_
     first_pass2778 = True
     while True:
         if first_pass2778: first_pass2778 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before or t.isChar('.')):
             break
         if (t.morph.class0_.is_adverb):
             continue
         if (t.is_hiphen or t.is_comma_and):
             continue
         if (t.morph.class0_.is_conjunction
                 or t.morph.class0_.is_preposition
                 or t.morph.class0_.is_misc):
             continue
         r = t.getReferent()
         if ((isinstance(r, OrganizationReferent))
                 or (isinstance(r, PersonReferent))):
             whos.append(Utils.asObjectOrNull(t, ReferentToken))
             continue
         if (isinstance(r, FundsReferent)):
             funds0 = (Utils.asObjectOrNull(r, FundsReferent))
             funds.append(funds0)
             continue
         it = FundsItemToken.tryParse(t, None)
         if (it is None):
             break
         fu = Utils.asObjectOrNull(funds0.clone(), FundsReferent)
         fu.occurrence.clear()
         fu.addOccurenceOfRefTok(
             ReferentToken(fu, it.begin_token, it.end_token))
         if (it.typ == FundsItemTyp.PERCENT and it.num_val is not None):
             fu.percent = it.num_val.real_value
         elif (it.typ == FundsItemTyp.COUNT and it.num_val is not None
               and it.num_val.int_value is not None):
             fu.count = it.num_val.int_value
         elif (it.typ == FundsItemTyp.SUM):
             fu.sum0_ = Utils.asObjectOrNull(it.ref, MoneyReferent)
         else:
             break
         funds.append(fu)
         t = it.end_token
     if (len(whos) == 0 or len(whos) != len(funds)):
         return None
     res = list()
     i = 0
     while i < len(whos):
         bfr = BusinessFactReferent._new447(bfr0.kind, bfr0.typ)
         bfr.who = whos[i].referent
         bfr._addWhat(funds[i])
         for s in bfr0.slots:
             if (s.type_name == BusinessFactReferent.ATTR_MISC
                     or s.type_name == BusinessFactReferent.ATTR_WHEN):
                 bfr.addSlot(s.type_name, s.value, False, 0)
         res.append(
             ReferentToken(bfr, whos[i].begin_token, whos[i].end_token))
         i += 1
     return res
 def __serialize_byte(res: io.IOBase, val: int) -> None:
     Utils.writeByteIO(res, val)
Esempio n. 21
0
 def __calc_genetive(self) -> None:
     if (not self.from0_.source.can_be_noun):
         return
     if (self.from0_.source.typ == SentItemType.FORMULA):
         if (self.to.source.typ != SentItemType.NOUN):
             return
         self.coef = SemanticService.PARAMS.transitive_coef
         return
     frmorph = self.from_morph
     if (self.to.source.typ == SentItemType.FORMULA):
         if (self.from0_.source.typ != SentItemType.NOUN):
             return
         if (frmorph.case_.is_genitive):
             self.coef = SemanticService.PARAMS.transitive_coef
         elif (frmorph.case_.is_undefined):
             self.coef = (0)
         return
     if (isinstance(self.from0_.source.source, NumbersWithUnitToken)):
         if (self.from0_.order != (self.to.order + 1)):
             return
         num = Utils.asObjectOrNull(self.from0_.source.source,
                                    NumbersWithUnitToken)
         ki = UnitToken.calc_kind(num.units)
         if (ki != MeasureKind.UNDEFINED):
             if (UnitsHelper.check_keyword(ki, self.to.source.source)):
                 self.coef = (SemanticService.PARAMS.next_model * (3))
                 return
         if (isinstance(self.to.source.source, NumbersWithUnitToken)):
             return
     non_gen_text = False
     if (Utils.isNullOrEmpty(self.from_prep) and
             not (isinstance(self.from0_.source.source, VerbPhraseToken))):
         if (self.from0_.order != (self.to.order + 1)):
             non_gen_text = True
     if (self.to.source.dr_groups is not None):
         for gr in self.to.source.dr_groups:
             if (gr.cm.transitive and Utils.isNullOrEmpty(self.from_prep)):
                 ok = False
                 if (isinstance(self.to.source.source, VerbPhraseToken)):
                     if (frmorph.case_.is_accusative):
                         ok = True
                         self.can_be_pacient = True
                 elif (frmorph.case_.is_genitive
                       and self.from0_.order == (self.to.order + 1)):
                     ok = True
                 if (ok):
                     self.coef = SemanticService.PARAMS.transitive_coef
                     return
             if ((((gr.cm.questions) & (QuestionType.WHATTODO))) !=
                 (QuestionType.UNDEFINED) and
                 (isinstance(self.from0_.source.source, VerbPhraseToken))):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 return
             if (gr.cm.nexts is not None):
                 if (self.from_prep in gr.cm.nexts):
                     cas = gr.cm.nexts[self.from_prep]
                     if (not ((cas) & frmorph.case_).is_undefined):
                         if (Utils.isNullOrEmpty(self.from_prep)
                                 and self.from0_.order !=
                             (self.to.order + 1)
                                 and ((cas) & frmorph.case_).is_genitive):
                             pass
                         else:
                             self.coef = SemanticService.PARAMS.next_model
                             return
     if (non_gen_text or not Utils.isNullOrEmpty(self.from_prep)):
         return
     cas0 = frmorph.case_
     if (cas0.is_genitive or cas0.is_instrumental or cas0.is_dative):
         if ((isinstance(self.to.source.source, NumbersWithUnitToken))
                 and cas0.is_genitive):
             self.coef = SemanticService.PARAMS.transitive_coef
         else:
             self.coef = SemanticService.PARAMS.ng_link
             if (cas0.is_nominative
                     or self.from0_.source.typ == SentItemType.PARTBEFORE):
                 self.coef /= (2)
             if (not cas0.is_genitive):
                 self.coef /= (2)
     elif (isinstance(self.from0_.source.source, VerbPhraseToken)):
         self.coef = 0.1
     if ((isinstance(self.to.source.source, NumbersWithUnitToken))
             and self.to.source.end_token.is_value("ЧЕМ", None)):
         self.coef = (SemanticService.PARAMS.transitive_coef * (2))
 def __serialize_int(res: io.IOBase, val: int) -> None:
     Utils.writeByteIO(res, val)
     Utils.writeByteIO(res, (val >> 8))
     Utils.writeByteIO(res, (val >> 16))
     Utils.writeByteIO(res, (val >> 24))
Esempio n. 23
0
 def __calc_pacient(self, noplural: bool) -> float:
     if (not Utils.isNullOrEmpty(self.from_prep)):
         self.coef = -1
         return self.coef
     vf = self.to_verb.first_verb.verb_morph
     if (vf is None):
         return -1
     vf2 = self.to_verb.last_verb.verb_morph
     if (vf2 is None):
         return -1
     morph_ = self.from_morph
     if (vf2.misc.voice == MorphVoice.PASSIVE
             or self.to_verb.last_verb.morph.contains_attr(
                 "страд.з.", None)):
         if (vf.number == MorphNumber.PLURAL):
             if (noplural):
                 if (self.from_is_plural):
                     pass
                 elif (not NGLink.__check_morph_accord(morph_, False, vf)):
                     return -1
                 elif (len(morph_.items) > 0 and not vf.case_.is_undefined):
                     ok = False
                     for it in morph_.items:
                         if (((it.number) & (MorphNumber.PLURAL)) == (
                                 MorphNumber.PLURAL)):
                             if (not it.case_.is_undefined and
                                 ((it.case_) & vf.case_).is_undefined):
                                 continue
                             ok = True
                             break
                     if (not ok):
                         self.coef = -1
                         return self.coef
             self.coef = SemanticService.PARAMS.verb_plural
             self.plural = 1
         else:
             if (vf.number == MorphNumber.SINGULAR):
                 self.plural = 0
                 if (self.from_is_plural):
                     return -1
             if (not NGLink.__check_morph_accord(morph_, False, vf)):
                 return -1
             self.coef = SemanticService.PARAMS.morph_accord
         return self.coef
     is_trans = False
     is_ref_dative = False
     grs = DerivateService.find_derivates(
         Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None)
     if (grs is not None):
         for gr in grs:
             if (gr.cm.transitive):
                 is_trans = True
             if (gr.cm_rev.agent is not None
                     and not gr.cm_rev.agent.case_.is_nominative):
                 is_ref_dative = True
     if (NGLink.__is_rev_verb(vf2)):
         if (not Utils.isNullOrEmpty(self.from_prep)):
             return -1
         if (not morph_.case_.is_undefined):
             if (is_ref_dative):
                 if (morph_.case_.is_nominative):
                     self.coef = SemanticService.PARAMS.transitive_coef
                     return self.coef
             elif (morph_.case_.is_instrumental):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 return self.coef
             return -1
         self.coef = 0
         return self.coef
     if (vf2 != vf and not is_trans):
         grs = DerivateService.find_derivates(
             Utils.ifNotNull(vf.normal_full, vf.normal_case), True, None)
         if (grs is not None):
             for gr in grs:
                 if (gr.cm.transitive):
                     is_trans = True
     if (is_trans):
         if (not Utils.isNullOrEmpty(self.from_prep)):
             return -1
         if (not morph_.case_.is_undefined):
             if (morph_.case_.is_accusative):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 if (morph_.case_.is_dative):
                     self.coef /= (2)
                 if (morph_.case_.is_genitive):
                     self.coef /= (2)
                 if (morph_.case_.is_instrumental):
                     self.coef /= (2)
                 return self.coef
             else:
                 return -1
     if (vf2.normal_case == "БЫТЬ"):
         if (not Utils.isNullOrEmpty(self.from_prep)):
             return -1
         if (morph_.case_.is_instrumental):
             self.coef = SemanticService.PARAMS.transitive_coef
             return self.coef
         if (morph_.case_.is_nominative):
             if (self.from0_.source.begin_token.begin_char >
                     self.to_verb.end_char):
                 self.coef = SemanticService.PARAMS.transitive_coef
                 return self.coef
             else:
                 self.coef = SemanticService.PARAMS.transitive_coef / (2)
                 return self.coef
         if (morph_.case_.is_undefined):
             self.coef = SemanticService.PARAMS.transitive_coef / (2)
             return self.coef
     return -1
Esempio n. 24
0
 def __str__(self) -> str:
     return "№ {0}".format(Utils.ifNotNull(self.number, "?"))
Esempio n. 25
0
 def __str__(self) -> str:
     return "{0} {1} {2}".format(Utils.enumToString(self.typ),
                                 Utils.ifNotNull(self.value, ""),
                                 self.getSourceText())
Esempio n. 26
0
 def tryParse(t: 'Token', items: typing.List['NounPhraseItem'],
              attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token):
         res = NounPhraseItem.tryParse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             return res
     if (rt is not None and items is not None and len(items) > 0):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.getReferent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = (t).term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and (wf).is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.getMorphClassInDictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.endsWithEx(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.containsAttr("к.ф.", None)):
                             if (t.getMorphClassInDictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.endsWithEx(s, "ИН", "ЕН", "ЫН",
                                                   None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.endsWith(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.containsAttr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.getIndexerItem(0).containsAttr("в.ср.ст.", None)):
                 return None
         mc1 = t.getMorphClassInDictionary()
         if (mc1 == MorphClass.VERB):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.containsAttr("дейст.з.", None)):
                         if (LanguageHelper.endsWith((t).term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if ((((isinstance(t0, TextToken))) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.endsWith((t0).term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new470(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun)) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.tryAccordVariant(
                         items, (0 if items is None else len(items)), v)):
                     is_doub = False
                     if (v.containsAttr("к.ф.", None)):
                         continue
                     if (v.containsAttr("собир.", None)
                             and not ((isinstance(t, NumberToken)))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.containsAttr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = (t).term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.endsWith(s, "ОВ")
                               and t.getMorphClassInDictionary().is_noun):
                             ok = False
                         elif (wf is not None
                               and ((wf.normal_case == "САМ"
                                     or wf.normal_case == "ТО"))):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif ((v.class0_.is_pronoun and
                    ((items is None or len(items) == 0 or
                      ((len(items) == 1
                        and items[0].can_be_adj_for_personal_pronoun))))
                    and wf is not None) and
                   ((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                       or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                      or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                     or wf.normal_case == "КТО"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.isValue("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 if (NounPhraseItem.tryAccordVariant(
                         items, (0 if items is None else len(items)), v)):
                     it.noun_morph.append(NounPhraseItemTextVar(v, t))
                     it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), False)
             for v in it.noun_morph:
                 v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.getNormalCaseText(
                             None, False, MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.tryParse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.tryParse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.tryParse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0)
                 if (npt1 is not None):
                     mc = LanguageHelper.getCaseAfterPreposition((t).lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = (tt2).term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.isValue("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None
Esempio n. 27
0
 def create(t: 'Token', names: 'TerminCollection') -> 'BlockLine':
     if (t is None):
         return None
     res = BlockLine(t, t)
     tt = t
     while tt is not None:
         if (tt != t and tt.is_newline_before):
             break
         else:
             res.end_token = tt
         tt = tt.next0_
     nums = 0
     while t is not None and t.next0_ is not None and t.end_char <= res.end_char:
         if (isinstance(t, NumberToken)):
             pass
         else:
             rom = NumberHelper.try_parse_roman(t)
             if (rom is not None and rom.end_token.next0_ is not None):
                 t = rom.end_token
             else:
                 break
         if (t.next0_.is_char('.')):
             pass
         elif ((isinstance(t.next0_, TextToken))
               and not t.next0_.chars.is_all_lower):
             pass
         else:
             break
         res.number_end = t
         t = t.next0_
         if (t.is_char('.') and t.next0_ is not None):
             res.number_end = t
             t = t.next0_
         if (t.is_newline_before):
             return res
         nums += 1
     tok = BlockLine.__m_ontology.try_parse(t, TerminParseAttr.NO)
     if (tok is None):
         npt1 = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0,
                                           None)
         if (npt1 is not None and npt1.end_token != npt1.begin_token):
             tok = BlockLine.__m_ontology.try_parse(npt1.noun.begin_token,
                                                    TerminParseAttr.NO)
     if (tok is not None):
         if (t.previous is not None and t.previous.is_char(':')):
             tok = (None)
     if (tok is not None):
         typ_ = Utils.valToEnum(tok.termin.tag, BlkTyps)
         if (typ_ == BlkTyps.CONSLUSION):
             if (t.is_newline_after):
                 pass
             elif (t.next0_ is not None
                   and t.next0_.morph.class0_.is_preposition
                   and t.next0_.next0_ is not None):
                 tok2 = BlockLine.__m_ontology.try_parse(
                     t.next0_.next0_, TerminParseAttr.NO)
                 if (tok2 is not None and (Utils.valToEnum(
                         tok2.termin.tag, BlkTyps)) == BlkTyps.CHAPTER):
                     pass
                 else:
                     tok = (None)
             else:
                 tok = (None)
         if (t.kit.base_language != t.morph.language):
             tok = (None)
         if (typ_ == BlkTyps.INDEX and not t.is_value("ОГЛАВЛЕНИЕ", None)):
             if (not t.is_newline_after and t.next0_ is not None):
                 npt = NounPhraseHelper.try_parse(t.next0_,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.is_newline_after
                         and npt.morph.case_.is_genitive):
                     tok = (None)
                 elif (npt is None):
                     tok = (None)
         if ((typ_ == BlkTyps.INTRO and tok is not None
              and not tok.is_newline_after)
                 and t.is_value("ВВЕДЕНИЕ", None)):
             npt = NounPhraseHelper.try_parse(t.next0_,
                                              NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None and npt.morph.case_.is_genitive):
                 tok = (None)
         if (tok is not None):
             if (res.number_end is None):
                 res.number_end = tok.end_token
                 if (res.number_end.end_char > res.end_char):
                     res.end_token = res.number_end
             res.typ = typ_
             t = tok.end_token
             if (t.next0_ is not None and t.next0_.is_char_of(":.")):
                 t = t.next0_
                 res.end_token = t
             if (t.is_newline_after or t.next0_ is None):
                 return res
             t = t.next0_
     if (t.is_char('§') and (isinstance(t.next0_, NumberToken))):
         res.typ = BlkTyps.CHAPTER
         res.number_end = t
         t = t.next0_
     if (names is not None):
         tok2 = names.try_parse(t, TerminParseAttr.NO)
         if (tok2 is not None and tok2.end_token.is_newline_after):
             res.end_token = tok2.end_token
             res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED):
                 li2 = BlockLine.create((None if res.number_end is None else
                                         res.number_end.next0_), None)
                 if (li2 is not None
                         and ((li2.typ == BlkTyps.LITERATURE
                               or li2.typ == BlkTyps.INTRO
                               or li2.typ == BlkTyps.CONSLUSION))):
                     res.typ = li2.typ
                 else:
                     res.typ = BlkTyps.CHAPTER
             return res
     t1 = res.end_token
     if ((((isinstance(t1, NumberToken)) or t1.is_char('.')))
             and t1.previous is not None):
         t1 = t1.previous
         if (t1.is_char('.')):
             res.has_content_item_tail = True
             while t1 is not None and t1.begin_char > res.begin_char:
                 if (not t1.is_char('.')):
                     break
                 t1 = t1.previous
     res.is_all_upper = True
     while t is not None and t.end_char <= t1.end_char:
         if (not (isinstance(t, TextToken)) or not t.chars.is_letter):
             res.not_words += 1
         else:
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_undefined):
                 res.not_words += 1
             elif (t.length_char > 2):
                 res.words += 1
             if (not t.chars.is_all_upper):
                 res.is_all_upper = False
             if (t.is_pure_verb):
                 if (not t.term.endswith("ING")):
                     res.has_verb = True
         t = t.next0_
     if (res.typ == BlkTyps.UNDEFINED):
         npt = NounPhraseHelper.try_parse(
             (res.begin_token if res.number_end is None else
              res.number_end.next0_), NounPhraseParseAttr.NO, 0, None)
         if (npt is not None):
             if (npt.noun.is_value("ХАРАКТЕРИСТИКА", None)
                     or npt.noun.is_value("СОДЕРЖАНИЕ", "ЗМІСТ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass3032 = True
                 while True:
                     if first_pass3032: first_pass3032 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.is_char('.')):
                         continue
                     npt2 = NounPhraseHelper.try_parse(
                         tt, NounPhraseParseAttr.NO, 0, None)
                     if (npt2 is None or not npt2.morph.case_.is_genitive):
                         ok = False
                         break
                     tt = npt2.end_token
                     if (tt.end_char > res.end_char):
                         res.end_token = tt
                         if (not tt.is_newline_after):
                             while res.end_token.next0_ is not None:
                                 if (res.end_token.is_newline_after):
                                     break
                                 res.end_token = res.end_token.next0_
                 if (ok):
                     res.typ = BlkTyps.INTRO
                     res.is_exist_name = True
             elif (npt.noun.is_value("ВЫВОД", "ВИСНОВОК")
                   or npt.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass3033 = True
                 while True:
                     if first_pass3033: first_pass3033 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.is_char_of(",.") or tt.is_and):
                         continue
                     npt1 = NounPhraseHelper.try_parse(
                         tt, NounPhraseParseAttr.NO, 0, None)
                     if (npt1 is not None):
                         if (npt1.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")
                                 or npt1.noun.is_value(
                                     "РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ")
                                 or npt1.noun.is_value(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")):
                             tt = npt1.end_token
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                     ok = False
                     break
                 if (ok):
                     res.typ = BlkTyps.CONSLUSION
                     res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED and npt is not None
                     and npt.end_char <= res.end_char):
                 ok = False
                 publ = 0
                 if (BlockLine.__is_pub(npt)):
                     ok = True
                     publ = 1
                 elif ((npt.noun.is_value("СПИСОК", None)
                        or npt.noun.is_value("УКАЗАТЕЛЬ", "ПОКАЖЧИК")
                        or npt.noun.is_value("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ"))
                       or npt.noun.is_value("ВЫВОД", "ВИСНОВОК")
                       or npt.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                     if (npt.end_char == res.end_char):
                         return None
                     ok = True
                 if (ok):
                     if (npt.begin_token == npt.end_token
                             and npt.noun.is_value("СПИСОК", None)
                             and npt.end_char == res.end_char):
                         ok = False
                     tt = npt.end_token.next0_
                     first_pass3034 = True
                     while True:
                         if first_pass3034: first_pass3034 = False
                         else: tt = tt.next0_
                         if (not (tt is not None
                                  and tt.end_char <= res.end_char)):
                             break
                         if (tt.is_char_of(",.:") or tt.is_and
                                 or tt.morph.class0_.is_preposition):
                             continue
                         if (tt.is_value("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")):
                             continue
                         npt = NounPhraseHelper.try_parse(
                             tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is None):
                             ok = False
                             break
                         if (((BlockLine.__is_pub(npt) or npt.noun.is_value(
                                 "РАБОТА", "РОБОТА") or npt.noun.is_value(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                              or npt.noun.is_value("АВТОР", None)
                              or npt.noun.is_value("ТРУД", "ПРАЦЯ"))
                                 or npt.noun.is_value("ТЕМА", None)
                                 or npt.noun.is_value(
                                     "ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")):
                             tt = npt.end_token
                             if (BlockLine.__is_pub(npt)):
                                 publ += 1
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                         ok = False
                         break
                     if (ok):
                         res.typ = BlkTyps.LITERATURE
                         res.is_exist_name = True
                         if (publ == 0 and (res.end_char < ((math.floor(
                             (len(res.kit.sofa.text) * 2) / 3))))):
                             if (res.number_end is not None):
                                 res.typ = BlkTyps.MISC
                             else:
                                 res.typ = BlkTyps.UNDEFINED
     return res
Esempio n. 28
0
 def getNormalCaseText(self,
                       mc: 'MorphClass' = None,
                       single_number: bool = False,
                       gender: 'MorphGender' = MorphGender.UNDEFINED,
                       keep_chars: bool = False) -> str:
     if ((isinstance(self.begin_token, ReferentToken))
             and self.begin_token == self.end_token):
         return self.begin_token.getNormalCaseText(mc, single_number,
                                                   gender, keep_chars)
     res = None
     max_coef = 0
     def_coef = -1
     for it in self.morph.items:
         v = Utils.asObjectOrNull(it, NounPhraseItemTextVar)
         if (v.undef_coef > 0
                 and (((v.undef_coef < max_coef) or def_coef >= 0))):
             continue
         if (single_number and v.single_number_value is not None):
             if (mc is not None and ((gender == MorphGender.NEUTER
                                      or gender == MorphGender.FEMINIE))
                     and mc.is_adjective):
                 bi = MorphBaseInfo._new467(MorphClass(mc), gender,
                                            MorphNumber.SINGULAR,
                                            MorphCase.NOMINATIVE,
                                            self.morph.language)
                 str0_ = Morphology.getWordform(v.single_number_value, bi)
                 if (str0_ is not None):
                     res = str0_
             else:
                 res = v.single_number_value
             if (v.undef_coef == 0):
                 break
             max_coef = v.undef_coef
             continue
         if (Utils.isNullOrEmpty(v.normal_value)):
             continue
         if (str.isdigit(v.normal_value[0]) and mc is not None
                 and mc.is_adjective):
             wrapval468 = RefOutArgWrapper(0)
             inoutres469 = Utils.tryParseInt(v.normal_value, wrapval468)
             val = wrapval468.value
             if (inoutres469):
                 str0_ = NumberHelper.getNumberAdjective(
                     val, gender, (MorphNumber.SINGULAR if single_number
                                   or val == 1 else MorphNumber.PLURAL))
                 if (str0_ is not None):
                     res = str0_
                     if (v.undef_coef == 0):
                         break
                     max_coef = v.undef_coef
                     continue
         res1 = (it).normal_value
         if (single_number):
             if (res1 == "ДЕТИ"):
                 res1 = "РЕБЕНОК"
             elif (res1 == "ЛЮДИ"):
                 res1 = "ЧЕЛОВЕК"
         max_coef = v.undef_coef
         if (v.undef_coef > 0):
             res = res1
             continue
         def_co = 0
         if (mc is not None and mc.is_adjective and v.undef_coef == 0):
             pass
         elif (((isinstance(self.begin_token, TextToken)) and res1
                == (self.begin_token).term and it.case_.is_nominative)
               and it.number == MorphNumber.SINGULAR):
             def_co = 1
         if (res is None or def_co > def_coef):
             res = res1
             def_coef = def_co
             if (def_co > 0):
                 break
     if (res is not None):
         return self.__corrChars(res, keep_chars)
     if (res is None and self.begin_token == self.end_token):
         res = self.begin_token.getNormalCaseText(mc, single_number, gender,
                                                  keep_chars)
     return Utils.ifNotNull(res, "?")
Esempio n. 29
0
 def canBeEquals(self, obj: 'Referent', typ: 'EqualType') -> bool:
     pr = Utils.asObjectOrNull(obj, PersonPropertyReferent)
     if (pr is None):
         return False
     n1 = self.name
     n2 = pr.name
     if (n1 is None or n2 is None):
         return False
     eq_bosses = False
     if (n1 != n2):
         if (typ == Referent.EqualType.DIFFERENTTEXTS):
             return False
         if (n1 in PersonPropertyReferent.__m_bosses0
                 and n2 in PersonPropertyReferent.__m_bosses1):
             eq_bosses = True
         elif (n1 in PersonPropertyReferent.__m_bosses1
               and n2 in PersonPropertyReferent.__m_bosses0):
             eq_bosses = True
         else:
             if (not n1.startswith(n2 + " ")
                     and not n2.startswith(n1 + " ")):
                 return False
             eq_bosses = True
         hi = self.higher
         while hi is not None:
             PersonPropertyReferent.__tmp_stack += 1
             if ((PersonPropertyReferent.__tmp_stack) > 20):
                 pass
             elif (hi.canBeEquals(pr, typ)):
                 PersonPropertyReferent.__tmp_stack -= 1
                 return False
             PersonPropertyReferent.__tmp_stack -= 1
             hi = hi.higher
         hi = pr.higher
         while hi is not None:
             PersonPropertyReferent.__tmp_stack += 1
             if ((PersonPropertyReferent.__tmp_stack) > 20):
                 pass
             elif (hi.canBeEquals(self, typ)):
                 PersonPropertyReferent.__tmp_stack -= 1
                 return False
             PersonPropertyReferent.__tmp_stack -= 1
             hi = hi.higher
     if (self.higher is not None and pr.higher is not None):
         PersonPropertyReferent.__tmp_stack += 1
         if ((PersonPropertyReferent.__tmp_stack) > 20):
             pass
         elif (not self.higher.canBeEquals(pr.higher, typ)):
             PersonPropertyReferent.__tmp_stack -= 1
             return False
         PersonPropertyReferent.__tmp_stack -= 1
     if (self.findSlot("@GENERAL", None, True) is not None
             or pr.findSlot("@GENERAL", None, True) is not None):
         return str(self) == str(pr)
     if (self.findSlot(PersonPropertyReferent.ATTR_REF, None, True)
             is not None or pr.findSlot(PersonPropertyReferent.ATTR_REF,
                                        None, True) is not None):
         refs1 = list()
         refs2 = list()
         for s in self.slots:
             if (s.type_name == PersonPropertyReferent.ATTR_REF):
                 refs1.append(s.value)
         for s in pr.slots:
             if (s.type_name == PersonPropertyReferent.ATTR_REF):
                 refs2.append(s.value)
         eq = False
         noeq = False
         i = 0
         first_pass3113 = True
         while True:
             if first_pass3113: first_pass3113 = False
             else: i += 1
             if (not (i < len(refs1))): break
             if (refs1[i] in refs2):
                 eq = True
                 continue
             noeq = True
             if (isinstance(refs1[i], Referent)):
                 for rr in refs2:
                     if (isinstance(rr, Referent)):
                         if ((rr).canBeEquals(
                                 Utils.asObjectOrNull(refs1[i], Referent),
                                 typ)):
                             noeq = False
                             eq = True
                             break
         i = 0
         first_pass3114 = True
         while True:
             if first_pass3114: first_pass3114 = False
             else: i += 1
             if (not (i < len(refs2))): break
             if (refs2[i] in refs1):
                 eq = True
                 continue
             noeq = True
             if (isinstance(refs2[i], Referent)):
                 for rr in refs1:
                     if (isinstance(rr, Referent)):
                         if ((rr).canBeEquals(
                                 Utils.asObjectOrNull(refs2[i], Referent),
                                 typ)):
                             noeq = False
                             eq = True
                             break
         if (eq and not noeq):
             pass
         elif (noeq and ((eq or len(refs1) == 0 or len(refs2) == 0))):
             if (typ == Referent.EqualType.DIFFERENTTEXTS or n1 != n2):
                 return False
             if (self.higher is not None or pr.higher is not None):
                 return False
         else:
             return False
     elif (not eq_bosses and n1 != n2):
         return False
     return True
Esempio n. 30
0
 def __TryAttach(t: 'Token',
                 prev: typing.List['DateItemToken']) -> 'DateItemToken':
     from pullenti.ner.measure.internal.MeasureToken import MeasureToken
     if (t is None):
         return None
     nt = Utils.asObjectOrNull(t, NumberToken)
     begin = t
     end = t
     is_in_brack = False
     if ((BracketHelper.canBeStartOfSequence(t, False, False)
          and t.next0_ is not None and (isinstance(t.next0_, NumberToken)))
             and BracketHelper.canBeEndOfSequence(t.next0_.next0_, False,
                                                  None, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if ((t.is_newline_before and BracketHelper.isBracket(t, False) and
          (isinstance(t.next0_, NumberToken)))
             and BracketHelper.isBracket(t.next0_.next0_, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if (nt is not None):
         if (nt.int_value is None):
             return None
         if (nt.typ == NumberSpellingType.WORDS):
             if (nt.morph.class0_.is_noun
                     and not nt.morph.class0_.is_adjective):
                 if (t.next0_ is not None
                         and ((t.next0_.isValue("КВАРТАЛ", None)
                               or t.next0_.isValue("ПОЛУГОДИЕ", None)
                               or t.next0_.isValue("ПІВРІЧЧЯ", None)))):
                     pass
                 else:
                     return None
         if (NumberHelper.tryParseAge(nt) is not None):
             return None
         res = DateItemToken._new653(begin, end,
                                     DateItemToken.DateItemType.NUMBER,
                                     nt.int_value, nt.morph)
         if ((res.int_value == 20 and
              (isinstance(nt.next0_, NumberToken)) and
              (nt.next0_).int_value is not None)
                 and nt.next0_.length_char == 2 and prev is not None):
             num = 2000 + (nt.next0_).int_value
             if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ
                     == DateItemToken.DateItemType.MONTH):
                 ok = False
                 if (nt.whitespaces_after_count == 1):
                     ok = True
                 elif (nt.is_newline_after and nt.is_newline_after):
                     ok = True
                 if (ok):
                     nt = (Utils.asObjectOrNull(nt.next0_, NumberToken))
                     res.end_token = nt
                     res.int_value = num
         if (res.int_value == 20 or res.int_value == 201):
             tt = t.next0_
             if (tt is not None and tt.isChar('_')):
                 while tt is not None:
                     if (not tt.isChar('_')):
                         break
                     tt = tt.next0_
                 tt = DateItemToken.__testYearRusWord(tt, False)
                 if (tt is not None):
                     res.int_value = 0
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.YEAR
                     return res
         if (res.int_value <= 12 and t.next0_ is not None
                 and (t.whitespaces_after_count < 3)):
             tt = t.next0_
             if (tt.isValue("ЧАС", None)):
                 if (((isinstance(t.previous, TextToken))
                      and not t.previous.chars.is_letter
                      and not t.is_whitespace_before)
                         and (isinstance(t.previous.previous, NumberToken))
                         and not t.previous.is_whitespace_before):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = tt
                     tt = tt.next0_
                     if (tt is not None and tt.isChar('.')):
                         res.end_token = tt
                         tt = tt.next0_
             first_pass2816 = True
             while True:
                 if first_pass2816: first_pass2816 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.isValue("УТРО", "РАНОК")):
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("ВЕЧЕР", "ВЕЧІР")):
                     res.end_token = tt
                     res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("ДЕНЬ", None)):
                     res.end_token = tt
                     if (res.int_value < 10):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("НОЧЬ", "НІЧ")):
                     res.end_token = tt
                     if (res.int_value == 12):
                         res.int_value = 0
                     elif (res.int_value > 9):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_comma or tt.morph.class0_.is_adverb):
                     continue
                 break
             if (res.typ == DateItemToken.DateItemType.HOUR):
                 return res
         can_be_year_ = True
         if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ
                 == DateItemToken.DateItemType.MONTH):
             pass
         elif ((prev is not None and len(prev) >= 4 and
                prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM)
               and prev[len(prev) - 2].can_by_month):
             pass
         elif (nt.next0_ is not None
               and ((nt.next0_.isValue("ГОД", None)
                     or nt.next0_.isValue("РІК", None)))):
             if (res.int_value < 1000):
                 can_be_year_ = False
         tt = DateItemToken.__testYearRusWord(nt.next0_, False)
         if (tt is not None and DateItemToken.__isNewAge(tt.next0_)):
             res.typ = DateItemToken.DateItemType.YEAR
             res.end_token = tt
         elif (can_be_year_):
             if (res.can_be_year):
                 tt = DateItemToken.__testYearRusWord(
                     nt.next0_, res.is_newline_before)
                 if ((tt) is not None):
                     if ((tt.isValue("Г", None)
                          and not tt.is_whitespace_before
                          and t.previous is not None)
                             and ((t.previous.isValue("КОРПУС", None)
                                   or t.previous.isValue("КОРП", None)))):
                         pass
                     elif (
                         (((nt.next0_.isValue("Г", None) and
                            (t.whitespaces_before_count < 3) and t.previous
                            is not None) and t.previous.isValue("Я", None)
                           and t.previous.previous is not None)
                          and t.previous.previous.isCharOf("\\/")
                          and t.previous.previous.previous is not None)
                             and t.previous.previous.previous.isValue(
                                 "А", None)):
                         return None
                     else:
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
             elif (tt is not None and (nt.whitespaces_after_count < 2)
                   and (nt.end_char - nt.begin_char) == 1):
                 res.end_token = tt
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.lang = tt.morph.language
         if (nt.previous is not None):
             if (nt.previous.isValue("В", "У")
                     or nt.previous.isValue("К", None)
                     or nt.previous.isValue("ДО", None)):
                 tt = DateItemToken.__testYearRusWord(nt.next0_, False)
                 if ((tt) is not None):
                     ok = False
                     if ((res.int_value < 100)
                             and (isinstance(tt, TextToken)) and
                         (((tt).term == "ГОДА" or (tt).term == "РОКИ"))):
                         pass
                     else:
                         ok = True
                         if (nt.previous.isValue("ДО", None)
                                 and nt.next0_.isValue("Г", None)):
                             cou = 0
                             ttt = nt.previous.previous
                             while ttt is not None and (cou < 10):
                                 mt = MeasureToken.tryParse(
                                     ttt, None, False, False)
                                 if (mt is not None
                                         and mt.end_char > nt.end_char):
                                     ok = False
                                     break
                                 ttt = ttt.previous
                                 cou += 1
                     if (ok):
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
                         res.begin_token = nt.previous
             elif (((nt.previous.isValue("IN", None)
                     or nt.previous.isValue("SINCE", None)))
                   and res.can_be_year):
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.begin_token = nt.previous
             elif (nt.previous.isValue("NEL", None)
                   or nt.previous.isValue("DEL", None)):
                 if (res.can_be_year):
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.lang = MorphLang.IT
                     res.begin_token = nt.previous
             elif (nt.previous.isValue("IL", None) and res.can_be_day):
                 res.lang = MorphLang.IT
                 res.begin_token = nt.previous
         t1 = res.end_token.next0_
         if (t1 is not None):
             if ((t1.isValue("ЧАС", None) or t1.isValue("ГОДИНА", None))):
                 if ((((prev is not None and len(prev) == 2
                        and prev[0].can_be_hour)
                       and prev[1].typ == DateItemToken.DateItemType.DELIM
                       and not prev[1].is_whitespace_after)
                      and not prev[1].is_whitespace_after
                      and res.int_value >= 0) and (res.int_value < 59)):
                     prev[0].typ = DateItemToken.DateItemType.HOUR
                     res.typ = DateItemToken.DateItemType.MINUTE
                     res.end_token = t1
                 elif (res.int_value < 24):
                     if (t1.next0_ is not None and t1.next0_.isChar('.')):
                         t1 = t1.next0_
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = t1
             elif ((res.int_value < 60) and
                   ((t1.isValue("МИНУТА", None) or t1.isValue("МИН", None)
                     or t.isValue("ХВИЛИНА", None)))):
                 if (t1.next0_ is not None and t1.next0_.isChar('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.MINUTE
                 res.end_token = t1
             elif ((res.int_value < 60) and ((t1.isValue("СЕКУНДА", None)
                                              or t1.isValue("СЕК", None)))):
                 if (t1.next0_ is not None and t1.next0_.isChar('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.SECOND
                 res.end_token = t1
             elif ((res.int_value < 30)
                   and ((t1.isValue("ВЕК", "ВІК")
                         or t1.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")))):
                 res.typ = DateItemToken.DateItemType.CENTURY
                 res.end_token = t1
             elif (res.int_value <= 4 and t1.isValue("КВАРТАЛ", None)):
                 res.typ = DateItemToken.DateItemType.QUARTAL
                 res.end_token = t1
             elif (res.int_value <= 2
                   and ((t1.isValue("ПОЛУГОДИЕ", None)
                         or t1.isValue("ПІВРІЧЧЯ", None)))):
                 res.typ = DateItemToken.DateItemType.HALFYEAR
                 res.end_token = t1
         return res
     t0 = Utils.asObjectOrNull(t, TextToken)
     if (t0 is None):
         return None
     txt = t0.getSourceText()
     if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х')
             or txt[0] == 'V'):
         lat = NumberHelper.tryParseRoman(t)
         if (lat is not None and lat.end_token.next0_ is not None
                 and lat.int_value is not None):
             val = lat.int_value
             tt = lat.end_token.next0_
             if (tt.isValue("КВАРТАЛ", None) and val > 0 and val <= 4):
                 return DateItemToken._new654(
                     t, tt, DateItemToken.DateItemType.QUARTAL, val)
             if (tt.isValue("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0
                     and val <= 2):
                 return DateItemToken._new654(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.HALFYEAR, val)
             if (tt.isValue("ВЕК", "ВІК")
                     or tt.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")):
                 return DateItemToken._new654(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.CENTURY, val)
             if (tt.isValue("В", None) and tt.next0_ is not None
                     and tt.next0_.isChar('.')):
                 if (prev is not None and len(prev) > 0
                         and prev[len(prev) - 1].typ
                         == DateItemToken.DateItemType.POINTER):
                     return DateItemToken._new654(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
                 if (DateItemToken.__isNewAge(tt.next0_.next0_)):
                     return DateItemToken._new654(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
             if (tt.is_hiphen):
                 lat2 = NumberHelper.tryParseRoman(tt.next0_)
                 if ((lat2 is not None and lat2.int_value is not None
                      and lat2.int_value > val)
                         and lat2.end_token.next0_ is not None):
                     if (lat2.end_token.next0_.isValue("ВЕК", "ВІК")
                             or lat2.end_token.next0_.isValue(
                                 "СТОЛЕТИЕ", "СТОЛІТТЯ")):
                         return DateItemToken._new654(
                             t, lat.end_token,
                             DateItemToken.DateItemType.CENTURY, val)
     if (t is not None and t.isValue("НАПРИКІНЦІ", None)):
         return DateItemToken._new660(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "конец")
     if (t is not None and t.isValue("ДОНЕДАВНА", None)):
         return DateItemToken._new660(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "сегодня")
     tok = DateItemToken.M_SEASONS.tryParse(t, TerminParseAttr.NO)
     if ((tok is not None and
          (Utils.valToEnum(tok.termin.tag, DatePointerType))
          == DatePointerType.SUMMER and t.morph.language.is_ru)
             and (isinstance(t, TextToken))):
         str0_ = (t).term
         if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
             tok = (None)
     if (tok is not None):
         return DateItemToken._new654(
             t, tok.end_token, DateItemToken.DateItemType.POINTER,
             Utils.valToEnum(tok.termin.tag, DatePointerType))
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is not None):
         tok = DateItemToken.M_SEASONS.tryParse(npt.end_token,
                                                TerminParseAttr.NO)
         if ((tok is not None and
              (Utils.valToEnum(tok.termin.tag, DatePointerType))
              == DatePointerType.SUMMER and t.morph.language.is_ru)
                 and (isinstance(t, TextToken))):
             str0_ = (t).term
             if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
                 tok = (None)
         if (tok is not None):
             return DateItemToken._new654(
                 t, tok.end_token, DateItemToken.DateItemType.POINTER,
                 Utils.valToEnum(tok.termin.tag, DatePointerType))
         typ_ = DateItemToken.DateItemType.NUMBER
         if (npt.noun.isValue("КВАРТАЛ", None)):
             typ_ = DateItemToken.DateItemType.QUARTAL
         elif (npt.end_token.isValue("ПОЛУГОДИЕ", None)
               or npt.end_token.isValue("ПІВРІЧЧЯ", None)):
             typ_ = DateItemToken.DateItemType.HALFYEAR
         elif (npt.end_token.isValue("НАЧАЛО", None)
               or npt.end_token.isValue("ПОЧАТОК", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "начало")
         elif (npt.end_token.isValue("СЕРЕДИНА", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "середина")
         elif (npt.end_token.isValue("КОНЕЦ", None)
               or npt.end_token.isValue("КІНЕЦЬ", None)
               or npt.end_token.isValue("НАПРИКІНЕЦЬ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "конец")
         elif (npt.end_token.isValue("ВРЕМЯ", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.isValue("НАСТОЯЩЕЕ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         elif (npt.end_token.isValue("ЧАС", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.isValue("ДАНИЙ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         if (typ_ != DateItemToken.DateItemType.NUMBER):
             delta = 0
             if (len(npt.adjectives) > 0):
                 if (npt.adjectives[0].isValue("ПОСЛЕДНИЙ", None)
                         or npt.adjectives[0].isValue("ОСТАННІЙ", None)):
                     return DateItemToken._new654(
                         t0, npt.end_token, typ_,
                         (4 if typ_ == DateItemToken.DateItemType.QUARTAL
                          else 2))
                 if (npt.adjectives[0].isValue("ПРЕДЫДУЩИЙ", None)
                         or npt.adjectives[0].isValue("ПОПЕРЕДНІЙ", None)):
                     delta = -1
                 elif (npt.adjectives[0].isValue("СЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].isValue("ПОСЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].isValue("НАСТУПНИЙ", None)):
                     delta = 1
                 else:
                     return None
             cou = 0
             tt = t.previous
             first_pass2817 = True
             while True:
                 if first_pass2817: first_pass2817 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (cou > 200):
                     break
                 dr = Utils.asObjectOrNull(tt.getReferent(),
                                           DateRangeReferent)
                 if (dr is None):
                     continue
                 if (typ_ == DateItemToken.DateItemType.QUARTAL):
                     ii = dr.quarter_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 4):
                         continue
                     return DateItemToken._new654(t0, npt.end_token, typ_,
                                                  ii)
                 if (typ_ == DateItemToken.DateItemType.HALFYEAR):
                     ii = dr.halfyear_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 2):
                         continue
                     return DateItemToken._new654(t0, npt.end_token, typ_,
                                                  ii)
     term = t0.term
     if (not str.isalnum(term[0])):
         if (t0.isCharOf(".\\/:") or t0.is_hiphen):
             return DateItemToken._new660(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         elif (t0.isChar(',')):
             return DateItemToken._new660(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         else:
             return None
     if (term == "O" or term == "О"):
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after and len(
                     (t.next0_).value) == 1):
             return DateItemToken._new654(t, t.next0_,
                                          DateItemToken.DateItemType.NUMBER,
                                          (t.next0_).int_value)
     if (str.isalpha(term[0])):
         inf = DateItemToken.M_MONTHES.tryParse(t, TerminParseAttr.NO)
         if (inf is not None and inf.termin.tag is None):
             inf = DateItemToken.M_MONTHES.tryParse(inf.end_token.next0_,
                                                    TerminParseAttr.NO)
         if (inf is not None and (isinstance(inf.termin.tag, int))):
             return DateItemToken._new675(inf.begin_token, inf.end_token,
                                          DateItemToken.DateItemType.MONTH,
                                          inf.termin.tag, inf.termin.lang)
     return None