Exemple #1
0
 def __siteBefore(t : 'Token') -> 'Token':
     if (t is not None and t.isChar(':')): 
         t = t.previous
     if (t is None): 
         return None
     if ((t.isValue("ВЕБСАЙТ", None) or t.isValue("WEBSITE", None) or t.isValue("WEB", None)) or t.isValue("WWW", None)): 
         return t
     t0 = None
     if (t.isValue("САЙТ", None) or t.isValue("SITE", None)): 
         t0 = t
         t = t.previous
     elif (t.isValue("АДРЕС", None)): 
         t0 = t.previous
         if (t0 is not None and t0.isChar('.')): 
             t0 = t0.previous
         if (t0 is not None): 
             if (t0.isValue("ЭЛ", None) or t0.isValue("ЭЛЕКТРОННЫЙ", None)): 
                 return t0
         return None
     else: 
         return None
     if (t is not None and t.is_hiphen): 
         t = t.previous
     if (t is None): 
         return t0
     if (t.isValue("WEB", None) or t.isValue("ВЕБ", None)): 
         t0 = t
     if (t0.previous is not None and t0.previous.morph.class0_.is_adjective and (t0.whitespaces_before_count < 3)): 
         npt = NounPhraseHelper.tryParse(t0.previous, NounPhraseParseAttr.NO, 0)
         if (npt is not None): 
             t0 = npt.begin_token
     return t0
 def tryParse(t : 'Token') -> 'DefinitionWithNumericToken':
     """ Выделить определение с указанного токена
     
     Args:
         t(Token): токен
     
     """
     if (not MiscHelper.canBeStartOfSentence(t)): 
         return None
     tt = t
     noun_ = None
     num = None
     first_pass2886 = True
     while True:
         if first_pass2886: first_pass2886 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt != t and MiscHelper.canBeStartOfSentence(tt)): 
             return None
         if (not ((isinstance(tt, NumberToken)))): 
             continue
         if (tt.whitespaces_after_count > 2 or tt == t): 
             continue
         if (tt.morph.class0_.is_adjective): 
             continue
         nn = NounPhraseHelper.tryParse(tt.next0_, NounPhraseParseAttr.NO, 0)
         if (nn is None): 
             continue
         num = (Utils.asObjectOrNull(tt, NumberToken))
         noun_ = nn
         break
     if (num is None or num.int_value is None): 
         return None
     res = DefinitionWithNumericToken(t, noun_.end_token)
     res.number = num.int_value
     res.number_begin_char = num.begin_char
     res.number_end_char = num.end_char
     res.noun = noun_.getNormalCaseText(None, True, MorphGender.UNDEFINED, False)
     res.nouns_genetive = (Utils.ifNotNull(noun_.getMorphVariant(MorphCase.GENITIVE, True), (res.noun if res is not None else None)))
     res.text = MiscHelper.getTextValue(t, num.previous, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr))
     if (num.is_whitespace_before): 
         res.text += " "
     res.number_substring = MiscHelper.getTextValue(num, noun_.end_token, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr))
     res.text += res.number_substring
     tt = noun_.end_token
     while tt is not None: 
         if (MiscHelper.canBeStartOfSentence(tt)): 
             break
         res.end_token = tt
         tt = tt.next0_
     if (res.end_token != noun_.end_token): 
         if (noun_.is_whitespace_after): 
             res.text += " "
         res.text += MiscHelper.getTextValue(noun_.end_token.next0_, res.end_token, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr))
     return res
Exemple #3
0
 def __tryParse(t: 'Token') -> 'BusinessFactItem':
     tok = BusinessFactItem.__m_base_onto.tryParse(t, TerminParseAttr.NO)
     if (tok is None and t.morph.class0_.is_verb and t.next0_ is not None):
         tok = BusinessFactItem.__m_base_onto.tryParse(
             t.next0_, TerminParseAttr.NO)
     if (tok is not None):
         ki = Utils.valToEnum(tok.termin.tag, BusinessFactKind)
         if (ki != BusinessFactKind.UNDEFINED):
             return BusinessFactItem._new402(t, tok.end_token,
                                             BusinessFactItemTyp.BASE, ki,
                                             tok.morph, tok.termin.tag2
                                             is not None)
         tt = tok.end_token.next0_
         first_pass2765 = True
         while True:
             if first_pass2765: first_pass2765 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if (tt.morph.class0_.is_preposition):
                 continue
             tok = BusinessFactItem.__m_base_onto.tryParse(
                 tt, TerminParseAttr.NO)
             if (tok is None):
                 continue
             ki = (Utils.valToEnum(tok.termin.tag, BusinessFactKind))
             if (ki != BusinessFactKind.UNDEFINED):
                 return BusinessFactItem._new403(t, tok.end_token,
                                                 BusinessFactItemTyp.BASE,
                                                 ki, tok.morph)
             tt = tok.end_token
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is not None):
         if (((((npt.noun.isValue("АКЦИОНЕР", None) or npt.noun.isValue(
                 "ВЛАДЕЛЕЦ", None) or npt.noun.isValue("ВЛАДЕЛИЦА", None))
                or npt.noun.isValue("СОВЛАДЕЛЕЦ", None)
                or npt.noun.isValue("СОВЛАДЕЛИЦА", None))
               or npt.noun.isValue("АКЦІОНЕР", None) or npt.noun.isValue(
                   "ВЛАСНИК", None)) or npt.noun.isValue("ВЛАСНИЦЯ", None)
              or npt.noun.isValue("СПІВВЛАСНИК", None))
                 or npt.noun.isValue("СПІВВЛАСНИЦЯ", None)):
             return BusinessFactItem._new403(t, npt.end_token,
                                             BusinessFactItemTyp.BASE,
                                             BusinessFactKind.HAVE,
                                             npt.morph)
     if (npt is not None):
         if ((npt.noun.isValue("ОСНОВАТЕЛЬ", None)
              or npt.noun.isValue("ОСНОВАТЕЛЬНИЦА", None)
              or npt.noun.isValue("ЗАСНОВНИК", None))
                 or npt.noun.isValue("ЗАСНОВНИЦЯ", None)):
             return BusinessFactItem._new403(t, npt.end_token,
                                             BusinessFactItemTyp.BASE,
                                             BusinessFactKind.CREATE,
                                             npt.morph)
     return None
Exemple #4
0
 def tryAttach(t0: 'Token') -> 'PhoneItemToken':
     """ Привязать с указанной позиции один примитив
     
     Args:
         cnt: 
         indFrom: 
     
     """
     res = PhoneItemToken.__TryAttach(t0)
     if (res is None):
         return None
     if (res.item_type != PhoneItemToken.PhoneItemType.PREFIX):
         return res
     t = res.end_token.next0_
     first_pass3116 = True
     while True:
         if first_pass3116: first_pass3116 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         if (t.is_newline_before):
             break
         res2 = PhoneItemToken.__TryAttach(t)
         if (res2 is not None):
             if (res2.item_type == PhoneItemToken.PhoneItemType.PREFIX):
                 if (res.kind == PhoneKind.UNDEFINED):
                     res.kind = res2.kind
                 res.end_token = res2.end_token
                 t = res.end_token
                 continue
             break
         if (t.isChar(':')):
             res.end_token = t
             break
         if (not ((isinstance(t, TextToken)))):
             break
         if (t0.length_char == 1):
             break
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt is not None):
             t = npt.end_token
             if (t.isValue("ПОСЕЛЕНИЕ", None)):
                 return None
             res.end_token = t
             continue
         if (t.morph.class0_.is_preposition):
             continue
         break
     return res
Exemple #5
0
 def checkUnknownRegion(t : 'Token') -> 'Token':
     """ Проверка, что здесь какой-то непонятный регион типа "Европа", "Средняя Азия", "Дикий запад" и т.п.
     
     Args:
         t(Token): 
     
     """
     if (not ((isinstance(t, TextToken)))): 
         return None
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is None): 
         return None
     if (TerrItemToken._m_unknown_regions.tryParse(npt.end_token, TerminParseAttr.FULLWORDSONLY) is not None): 
         return npt.end_token
     return None
Exemple #6
0
 def tryParse(t: 'Token') -> 'BusinessFactItem':
     if (t is None):
         return None
     res = BusinessFactItem.__tryParse(t)
     if (res is None):
         return None
     tt = res.end_token.next0_
     first_pass2764 = True
     while True:
         if first_pass2764: first_pass2764 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition):
             continue
         if (not ((isinstance(tt, TextToken)))):
             break
         npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
         if (npt is None):
             break
         rr = BusinessFactItem.__tryParse(tt)
         if (rr is not None):
             if (rr.base_kind == res.base_kind):
                 pass
             elif (rr.base_kind == BusinessFactKind.GET
                   and res.base_kind == BusinessFactKind.FINANCE):
                 res.base_kind = rr.base_kind
             else:
                 break
             res.end_token = rr.end_token
             tt = res.end_token
             continue
         if ((res.base_kind == BusinessFactKind.FINANCE or npt.noun.isValue(
                 "РЫНОК", None) or npt.noun.isValue("СДЕЛКА", None))
                 or npt.noun.isValue("РИНОК", None)
                 or npt.noun.isValue("УГОДА", None)):
             res.end_token = tt
             continue
         break
     return res
Exemple #7
0
 def tryParse(t: 'Token',
              prev: 'WeaponItemToken',
              after_conj: bool,
              attach_high: bool = False) -> 'WeaponItemToken':
     res = WeaponItemToken.__TryParse(t, prev, after_conj, attach_high)
     if (res is None):
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt is not None and npt.noun.begin_char > npt.begin_char):
             res = WeaponItemToken.__TryParse(npt.noun.begin_token, prev,
                                              after_conj, attach_high)
             if (res is not None):
                 if (res.typ == WeaponItemToken.Typs.NOUN):
                     str0_ = npt.getNormalCaseText(None, True,
                                                   MorphGender.UNDEFINED,
                                                   False)
                     if (str0_ == "РУЧНОЙ ГРАНАТ"):
                         str0_ = "РУЧНАЯ ГРАНАТА"
                     if ((Utils.ifNotNull(str0_, "")).endswith(res.value)):
                         if (res.alt_value is None):
                             res.alt_value = str0_
                         else:
                             str0_ = str0_[0:0 + len(str0_) -
                                           len(res.value)].strip()
                             res.alt_value = "{0} {1}".format(
                                 str0_, res.alt_value)
                         res.begin_token = t
                         return res
         return None
     if (res.typ == WeaponItemToken.Typs.NAME):
         br = BracketHelper.tryParse(res.end_token.next0_,
                                     BracketParseAttr.NO, 100)
         if (br is not None and br.isChar('(')):
             alt = MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO)
             if (MiscHelper.canBeEqualCyrAndLatSS(res.value, alt)):
                 res.alt_value = alt
                 res.end_token = br.end_token
     return res
Exemple #8
0
 def tryParse(t : 'Token', add_units : 'TerminCollection', prev : 'UnitToken', parse_unknown_units : bool=False) -> 'UnitToken':
     if (t is None): 
         return None
     t0 = t
     pow0__ = 1
     is_neg = False
     if ((t.isCharOf("\\/") or t.isValue("НА", None) or t.isValue("OF", None)) or t.isValue("PER", None)): 
         is_neg = True
         t = t.next0_
     elif (t.isValue("В", None) and prev is not None): 
         is_neg = True
         t = t.next0_
     elif (MeasureHelper.isMultChar(t)): 
         t = t.next0_
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is None): 
         return None
     if (tt.term == "КВ" or tt.term == "КВАДР" or tt.isValue("КВАДРАТНЫЙ", None)): 
         pow0__ = 2
         tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is not None and tt.isChar('.')): 
             tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is None): 
             return None
     elif (tt.term == "КУБ" or tt.term == "КУБИЧ" or tt.isValue("КУБИЧЕСКИЙ", None)): 
         pow0__ = 3
         tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is not None and tt.isChar('.')): 
             tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is None): 
             return None
     elif (tt.term == "µ"): 
         res = UnitToken.tryParse(tt.next0_, add_units, prev, False)
         if (res is not None): 
             for u in UnitsHelper.UNITS: 
                 if (u.factor == UnitsFactors.MICRO and Utils.compareStrings("мк" + u.name_cyr, res.unit.name_cyr, True) == 0): 
                     res.unit = u
                     res.begin_token = tt
                     res.pow0_ = pow0__
                     if (is_neg): 
                         res.pow0_ = (- pow0__)
                     return res
     toks = UnitsHelper.TERMINS.tryParseAll(tt, TerminParseAttr.NO)
     if (toks is not None): 
         if ((prev is not None and tt == t0 and len(toks) == 1) and t.is_whitespace_before): 
             return None
         if (toks[0].begin_token == toks[0].end_token and tt.morph.class0_.is_preposition and (tt.whitespaces_after_count < 3)): 
             if (NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.PARSEPREPOSITION, 0) is not None): 
                 return None
             if (isinstance(tt.next0_, NumberToken)): 
                 if ((tt.next0_).typ != NumberSpellingType.DIGIT): 
                     return None
         uts = list()
         for tok in toks: 
             res = UnitToken._new1517(t0, tok.end_token, Utils.asObjectOrNull(tok.termin.tag, Unit))
             res.pow0_ = pow0__
             if (is_neg): 
                 res.pow0_ = (- pow0__)
             if (res.unit.base_multiplier == 1000000 and (isinstance(t0, TextToken)) and str.islower((t0).getSourceText()[0])): 
                 for u in UnitsHelper.UNITS: 
                     if (u.factor == UnitsFactors.MILLI and Utils.compareStrings(u.name_cyr, res.unit.name_cyr, True) == 0): 
                         res.unit = u
                         break
             res.__correct()
             res.__checkDoubt()
             uts.append(res)
         max0_ = 0
         best = None
         for ut in uts: 
             if (ut.keyword is not None): 
                 if (ut.keyword.begin_char >= max0_): 
                     max0_ = ut.keyword.begin_char
                     best = ut
         if (best is not None): 
             return best
         for ut in uts: 
             if (not ut.is_doubt): 
                 return ut
         return uts[0]
     t1 = None
     if (t.isCharOf("º°")): 
         t1 = t
     elif ((t.isChar('<') and t.next0_ is not None and t.next0_.next0_ is not None) and t.next0_.next0_.isChar('>') and ((t.next0_.isValue("О", None) or t.next0_.isValue("O", None) or (((isinstance(t.next0_, NumberToken)) and (t.next0_).value == "0"))))): 
         t1 = t.next0_.next0_
     if (t1 is not None): 
         res = UnitToken._new1517(t0, t1, UnitsHelper.UGRADUS)
         res.__checkDoubt()
         t = t1.next0_
         if (t is not None and t.is_comma): 
             t = t.next0_
         if (t is not None and t.isValue("ПО", None)): 
             t = t.next0_
         if (isinstance(t, TextToken)): 
             vv = (t).term
             if (vv == "C" or vv == "С" or vv.startswith("ЦЕЛЬС")): 
                 res.unit = UnitsHelper.UGRADUSC
                 res.is_doubt = False
                 res.end_token = t
             if (vv == "F" or vv.startswith("ФАР")): 
                 res.unit = UnitsHelper.UGRADUSF
                 res.is_doubt = False
                 res.end_token = t
         return res
     if (t.isChar('%')): 
         tt1 = t.next0_
         if (tt1 is not None and tt1.isChar('(')): 
             tt1 = tt1.next0_
         if ((isinstance(tt1, TextToken)) and (tt1).term.startswith("ОБ")): 
             re = UnitToken._new1517(t, tt1, UnitsHelper.UALCO)
             if (re.end_token.next0_ is not None and re.end_token.next0_.isChar('.')): 
                 re.end_token = re.end_token.next0_
             if (re.end_token.next0_ is not None and re.end_token.next0_.isChar(')') and t.next0_.isChar('(')): 
                 re.end_token = re.end_token.next0_
             return re
         return UnitToken._new1517(t, t, UnitsHelper.UPERCENT)
     if (add_units is not None): 
         tok = add_units.tryParse(t, TerminParseAttr.NO)
         if (tok is not None): 
             res = UnitToken._new1621(t0, tok.end_token, Utils.asObjectOrNull(tok.termin.tag, UnitReferent))
             if (tok.end_token.next0_ is not None and tok.end_token.next0_.isChar('.')): 
                 tok.end_token = tok.end_token.next0_
             res.pow0_ = pow0__
             if (is_neg): 
                 res.pow0_ = (- pow0__)
             res.__correct()
             return res
     if (not parse_unknown_units): 
         return None
     if ((t.whitespaces_before_count > 2 or not t.chars.is_letter or t.length_char > 5) or not ((isinstance(t, TextToken)))): 
         return None
     if (MiscHelper.canBeStartOfSentence(t)): 
         return None
     t1 = t
     if (t.next0_ is not None and t.next0_.isChar('.')): 
         t1 = t
     ok = False
     if (t1.next0_ is None or t1.whitespaces_after_count > 2): 
         ok = True
     elif (t1.next0_.is_comma or t1.next0_.isCharOf("\\/") or t1.next0_.is_table_control_char): 
         ok = True
     elif (MeasureHelper.isMultChar(t1.next0_)): 
         ok = True
     if (not ok): 
         return None
     mc = t.getMorphClassInDictionary()
     if (mc.is_undefined): 
         pass
     elif (t.length_char > 7): 
         return None
     res1 = UnitToken._new1622(t0, t1, pow0__, True)
     res1.unknown_name = (t).getSourceText()
     res1.__correct()
     return res1
Exemple #9
0
 def parse(t0: 'Token', lev_: int) -> 'MailLine':
     if (t0 is None):
         return None
     res = MailLine(t0, t0)
     pr = True
     t = t0
     first_pass3027 = True
     while True:
         if first_pass3027: first_pass3027 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t0 != t):
             break
         res.end_token = t
         if (t.is_table_control_char or t.is_hiphen):
             continue
         if (pr):
             if ((isinstance(t, TextToken)) and t.isCharOf(">|")):
                 res.lev += 1
             else:
                 pr = False
                 tok = MailLine.M_FROM_WORDS.tryParse(t, TerminParseAttr.NO)
                 if (tok is not None and tok.end_token.next0_ is not None
                         and tok.end_token.next0_.isChar(':')):
                     res.typ = MailLine.Types.FROM
                     t = tok.end_token.next0_
                     continue
         if (isinstance(t, ReferentToken)):
             r = t.getReferent()
             if (r is not None):
                 if ((((isinstance(r, PersonReferent)) or
                       (isinstance(r, GeoReferent)) or
                       (isinstance(r, AddressReferent)))
                      or r.type_name == "PHONE" or r.type_name == "URI")
                         or (isinstance(r, PersonPropertyReferent))
                         or r.type_name == "ORGANIZATION"):
                     res.refs.append(r)
     if (res.typ == MailLine.Types.UNDEFINED):
         t = t0
         while t is not None and (t.end_char < res.end_char):
             if (not t.is_hiphen and t.chars.is_letter):
                 break
             t = t.next0_
         ok = 0
         nams = 0
         oth = 0
         last_comma = None
         first_pass3028 = True
         while True:
             if first_pass3028: first_pass3028 = False
             else: t = t.next0_
             if (not (t is not None and (t.end_char < res.end_char))): break
             if (isinstance(t.getReferent(), PersonReferent)):
                 nams += 1
                 continue
             if (isinstance(t, TextToken)):
                 if (not t.chars.is_letter):
                     last_comma = t
                     continue
                 tok = MailLine.M_HELLO_WORDS.tryParse(
                     t, TerminParseAttr.NO)
                 if (tok is not None):
                     ok += 1
                     t = tok.end_token
                     continue
                 if (t.isValue("ВСЕ", None) or t.isValue("ALL", None)
                         or t.isValue("TEAM", None)):
                     nams += 1
                     continue
                 pit = PersonItemToken.tryAttach(
                     t, None, PersonItemToken.ParseAttr.NO, None)
                 if (pit is not None):
                     nams += 1
                     t = pit.end_token
                     continue
             oth += 1
             if ((oth) > 3):
                 if (ok > 0 and last_comma is not None):
                     res.end_token = last_comma
                     oth = 0
                 break
         if ((oth < 3) and ok > 0):
             res.typ = MailLine.Types.HELLO
     if (res.typ == MailLine.Types.UNDEFINED):
         ok_words = 0
         if (t0.isValue("HAVE", None)):
             pass
         t = t0
         first_pass3029 = True
         while True:
             if first_pass3029: first_pass3029 = False
             else: t = t.next0_
             if (not (t is not None and t.end_char <= res.end_char)): break
             if (not ((isinstance(t, TextToken)))):
                 continue
             if (t.isChar('<')):
                 br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
                 if (br is not None):
                     t = br.end_token
                     continue
             if (not t.is_letters or t.is_table_control_char):
                 continue
             tok = MailLine.M_REGARD_WORDS.tryParse(t, TerminParseAttr.NO)
             if (tok is not None):
                 ok_words += 1
                 while t is not None and t.end_char <= tok.end_char:
                     t.tag = (tok.termin)
                     t = t.next0_
                 t = tok.end_token
                 if ((isinstance(t.next0_, TextToken))
                         and t.next0_.morph.case_.is_genitive):
                     t = t.next0_
                     first_pass3030 = True
                     while True:
                         if first_pass3030: first_pass3030 = False
                         else: t = t.next0_
                         if (not (t.end_char <= res.end_char)): break
                         if (t.morph.class0_.is_conjunction):
                             continue
                         npt1 = NounPhraseHelper.tryParse(
                             t, NounPhraseParseAttr.NO, 0)
                         if (npt1 is None):
                             break
                         if (not npt1.morph.case_.is_genitive):
                             break
                         while t.end_char < npt1.end_char:
                             t.tag = (t)
                             t = t.next0_
                         t.tag = (t)
                 continue
             if ((t.morph.class0_.is_preposition or
                  t.morph.class0_.is_conjunction or t.morph.class0_.is_misc)
                     or t.isValue("C", None)):
                 continue
             if ((ok_words > 0 and t.previous is not None
                  and t.previous.is_comma)
                     and t.previous.begin_char > t0.begin_char
                     and not t.chars.is_all_lower):
                 res.end_token = t.previous
                 break
             npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
             if (npt is None):
                 if ((res.end_char - t.end_char) > 10):
                     ok_words = 0
                 break
             tok = MailLine.M_REGARD_WORDS.tryParse(npt.end_token,
                                                    TerminParseAttr.NO)
             if (tok is not None
                     and (isinstance(npt.end_token, TextToken))):
                 term = (npt.end_token).term
                 if (term == "ДЕЛ"):
                     tok = (None)
             if (tok is None):
                 if (npt.noun.isValue("НАДЕЖДА", None)):
                     t.tag = (t)
                 elif (ok_words > 0 and t.isValue("NICE", None)
                       and ((res.end_char - npt.end_char) < 13)):
                     t.tag = (t)
                 else:
                     ok_words = 0
                 break
             ok_words += 1
             while t is not None and t.end_char <= tok.end_char:
                 t.tag = (tok.termin)
                 t = t.next0_
             t = tok.end_token
         if (ok_words > 0):
             res.typ = MailLine.Types.BESTREGARDS
     if (res.typ == MailLine.Types.UNDEFINED):
         t = t0
         while t is not None and (t.end_char < res.end_char):
             if (not ((isinstance(t, TextToken)))):
                 break
             elif (not t.is_hiphen and t.chars.is_letter):
                 break
             t = t.next0_
         if (t is not None):
             if (t != t0):
                 pass
             if (((t.isValue("ПЕРЕСЫЛАЕМОЕ", None)
                   or t.isValue("ПЕРЕАДРЕСОВАННОЕ", None)))
                     and t.next0_ is not None
                     and t.next0_.isValue("СООБЩЕНИЕ", None)):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif ((t.isValue("НАЧАЛО", None) and t.next0_ is not None and
                    ((t.next0_.isValue("ПЕРЕСЫЛАЕМОЕ", None)
                      or t.next0_.isValue("ПЕРЕАДРЕСОВАННОЕ", None))))
                   and t.next0_.next0_ is not None
                   and t.next0_.next0_.isValue("СООБЩЕНИЕ", None)):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif (t.isValue("ORIGINAL", None) and t.next0_ is not None
                   and ((t.next0_.isValue("MESSAGE", None)
                         or t.next0_.isValue("APPOINTMENT", None)))):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif (t.isValue("ПЕРЕСЛАНО", None) and t.next0_ is not None
                   and t.next0_.isValue("ПОЛЬЗОВАТЕЛЕМ", None)):
                 res.typ = MailLine.Types.FROM
                 res.must_be_first_line = True
             elif (((t.getReferent() is not None
                     and t.getReferent().type_name == "DATE"))
                   or ((t.isValue("IL", None) and t.next0_ is not None
                        and t.next0_.isValue("GIORNO", None)))
                   or ((t.isValue("ON", None) and
                        (isinstance(t.next0_, ReferentToken))
                        and t.next0_.getReferent().type_name == "DATE"))):
                 has_from = False
                 has_date = t.getReferent() is not None and t.getReferent(
                 ).type_name == "DATE"
                 if (t.is_newline_after and (lev_ < 5)):
                     res1 = MailLine.parse(t.next0_, lev_ + 1)
                     if (res1 is not None
                             and res1.typ == MailLine.Types.HELLO):
                         res.typ = MailLine.Types.FROM
                 next0__ = MailLine.parse(res.end_token.next0_, lev_ + 1)
                 if (next0__ is not None):
                     if (next0__.typ != MailLine.Types.UNDEFINED):
                         next0__ = (None)
                 tmax = res.end_char
                 if (next0__ is not None):
                     tmax = next0__.end_char
                 br1 = None
                 while t is not None and t.end_char <= tmax:
                     if (t.isValue("ОТ", None) or t.isValue("FROM", None)):
                         has_from = True
                     elif (
                             t.getReferent() is not None and
                         ((t.getReferent().type_name == "URI" or
                           (isinstance(t.getReferent(), PersonReferent))))):
                         if (t.getReferent().type_name == "URI"
                                 and has_date):
                             if (br1 is not None):
                                 has_from = True
                                 next0__ = (None)
                             if (t.previous.isChar('<')
                                     and t.next0_ is not None
                                     and t.next0_.isChar('>')):
                                 t = t.next0_
                                 if (t.next0_ is not None
                                         and t.next0_.isChar(':')):
                                     t = t.next0_
                                 if (t.is_newline_after):
                                     has_from = True
                                     next0__ = (None)
                         t = t.next0_
                         while t is not None and t.end_char <= res.end_char:
                             if (t.isValue("HA", None)
                                     and t.next0_ is not None
                                     and t.next0_.isValue("SCRITTO", None)):
                                 has_from = True
                                 break
                             elif (((t.isValue("НАПИСАТЬ", None)
                                     or t.isValue("WROTE", None)))
                                   and ((res.end_char - t.end_char) < 10)):
                                 has_from = True
                                 break
                             t = t.next0_
                         if (has_from):
                             res.typ = MailLine.Types.FROM
                             if (next0__ is not None
                                     and t.end_char >= next0__.begin_char):
                                 res.end_token = next0__.end_token
                         break
                     elif (br1 is None and not t.isChar('<')
                           and BracketHelper.canBeStartOfSequence(
                               t, True, False)):
                         br1 = BracketHelper.tryParse(
                             t, BracketParseAttr.NO, 100)
                         if (br1 is not None):
                             t = br1.end_token
                     t = t.next0_
             else:
                 has_uri = False
                 while t is not None and (t.end_char < res.end_char):
                     if (t.getReferent() is not None and
                         ((t.getReferent().type_name == "URI" or
                           (isinstance(t.getReferent(), PersonReferent))))):
                         has_uri = True
                     elif (t.isValue("ПИСАТЬ", None) and has_uri):
                         if (t.next0_ is not None and t.next0_.isChar('(')):
                             if (has_uri):
                                 res.typ = MailLine.Types.FROM
                             break
                     t = t.next0_
     return res
Exemple #10
0
 def main(args: typing.List[str]) -> None:
     sw = Stopwatch()
     # инициализация - необходимо проводить один раз до обработки текстов
     print("Initializing ... ", end="", flush=True)
     # инициализируются движок и все имеющиеся анализаторы
     Sdk.initialize((MorphLang.RU) | MorphLang.EN)
     sw.stop()
     print("OK (by {0} ms), version {1}".format(
         sw.elapsedMilliseconds, ProcessorService.getVersion()),
           flush=True)
     # анализируемый текст
     txt = "Единственным конкурентом «Трансмаша» на этом сомнительном тендере было ООО «Плассер Алека Рейл Сервис», основным владельцем которого является австрийская компания «СТЦ-Холдинг ГМБХ». До конца 2011 г. эта же фирма была совладельцем «Трансмаша» вместе с «Тако» Краснова. Зато совладельцем «Плассера», также до конца 2011 г., был тот самый Карл Контрус, который имеет четверть акций «Трансмаша». "
     print("Text: {0}".format(txt), flush=True)
     # запускаем обработку на пустом процессоре (без анализаторов NER)
     are = ProcessorService.getEmptyProcessor().process(
         SourceOfAnalysis(txt), None, None)
     print("Noun groups: ", end="", flush=True)
     t = are.first_token
     # перебираем токены
     first_pass2703 = True
     while True:
         if first_pass2703: first_pass2703 = False
         else: t = t.next0_
         if (not (t is not None)): break
         # выделяем именную группу с текущего токена
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         # не получилось
         if (npt is None):
             continue
         # получилось, выводим в нормализованном виде
         print("[{0}=>{1}] ".format(
             npt.getSourceText(),
             npt.getNormalCaseText(None, True, MorphGender.UNDEFINED,
                                   False)),
               end="",
               flush=True)
         # указатель на последний токен именной группы
         t = npt.end_token
     with ProcessorService.createProcessor() as proc:
         # анализируем текст
         ar = proc.process(SourceOfAnalysis(txt), None, None)
         # результирующие сущности
         print(
             "\r\n==========================================\r\nEntities: ",
             flush=True)
         for e0_ in ar.entities:
             print("{0}: {1}".format(e0_.type_name, str(e0_)), flush=True)
             for s in e0_.slots:
                 print("   {0}: {1}".format(s.type_name, s.value),
                       flush=True)
         # пример выделения именных групп
         print(
             "\r\n==========================================\r\nNoun groups: ",
             flush=True)
         t = ar.first_token
         first_pass2704 = True
         while True:
             if first_pass2704: first_pass2704 = False
             else: t = t.next0_
             if (not (t is not None)): break
             # токены с сущностями игнорируем
             if (t.getReferent() is not None):
                 continue
             # пробуем создать именную группу
             npt = NounPhraseHelper.tryParse(
                 t, NounPhraseParseAttr.ADJECTIVECANBELAST, 0)
             # не получилось
             if (npt is None):
                 continue
             print(npt, flush=True)
             # указатель перемещаем на последний токен группы
             t = npt.end_token
     with ProcessorService.createSpecificProcessor(
             KeywordAnalyzer.ANALYZER_NAME) as proc:
         ar = proc.process(SourceOfAnalysis(txt), None, None)
         print(
             "\r\n==========================================\r\nKeywords1: ",
             flush=True)
         for e0_ in ar.entities:
             if (isinstance(e0_, KeywordReferent)):
                 print(e0_, flush=True)
         print(
             "\r\n==========================================\r\nKeywords2: ",
             flush=True)
         t = ar.first_token
         first_pass2705 = True
         while True:
             if first_pass2705: first_pass2705 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, ReferentToken)):
                 kw = Utils.asObjectOrNull(t.getReferent(), KeywordReferent)
                 if (kw is None):
                     continue
                 kwstr = MiscHelper.getTextValueOfMetaToken(
                     Utils.asObjectOrNull(t, ReferentToken),
                     Utils.valToEnum(
                         (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVESINGLE) |
                         (GetTextAttr.KEEPREGISTER), GetTextAttr))
                 print("{0} = {1}".format(kwstr, kw), flush=True)
     print("Over!", flush=True)
Exemple #11
0
 def tryAttach(t : 'Token') -> 'TitleItemToken':
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is not None): 
         t1 = tt
         if (tt.term == "ТЕМА"): 
             tit = TitleItemToken.tryAttach(tt.next0_)
             if (tit is not None and tit.typ == TitleItemToken.Types.TYP): 
                 t1 = tit.end_token
                 if (t1.next0_ is not None and t1.next0_.isChar(':')): 
                     t1 = t1.next0_
                 return TitleItemToken._new2501(t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value)
             if (tt.next0_ is not None and tt.next0_.isChar(':')): 
                 t1 = tt.next0_
             return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПО" or tt.term == "НА"): 
             if (tt.next0_ is not None and tt.next0_.isValue("ТЕМА", None)): 
                 t1 = tt.next0_
                 if (t1.next0_ is not None and t1.next0_.isChar(':')): 
                     t1 = t1.next0_
                 return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"): 
             tt2 = tt.next0_
             if (tt2 is not None and tt2.isChar('.')): 
                 tt2 = tt2.next0_
             if (isinstance(tt2, TextToken)): 
                 if ((tt2).term == "C" or (tt2).term == "С"): 
                     tt2 = tt2.next0_
                     if (isinstance(tt2, TextToken)): 
                         return TitleItemToken(t, tt2, TitleItemToken.Types.TRANSLATE)
         if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION" or tt.term == "СЕКЦІЯ"): 
             t1 = tt.next0_
             if (t1 is not None and t1.isChar(':')): 
                 t1 = t1.next0_
             br = BracketHelper.tryParse(t1, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t1 = br.end_token
             elif (t1 != tt.next0_): 
                 while t1 is not None: 
                     if (t1.is_newline_after): 
                         break
                     t1 = t1.next0_
                 if (t1 is None): 
                     return None
             if (t1 != tt.next0_): 
                 return TitleItemToken(tt, t1, TitleItemToken.Types.DUST)
         t1 = (None)
         if (tt.isValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): 
             t1 = tt.next0_
         elif (tt.morph.class0_.is_preposition and tt.next0_ is not None and tt.next0_.isValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): 
             t1 = tt.next0_.next0_
         elif (tt.isChar('/') and tt.is_newline_before): 
             t1 = tt.next0_
         if (t1 is not None): 
             if (t1.isCharOf(":") or t1.is_hiphen): 
                 t1 = t1.next0_
             spec = TitleItemToken.__tryAttachSpeciality(t1, True)
             if (spec is not None): 
                 spec.begin_token = t
                 return spec
     sss = TitleItemToken.__tryAttachSpeciality(t, False)
     if (sss is not None): 
         return sss
     if (isinstance(t, ReferentToken)): 
         return None
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is not None): 
         s = npt.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)
         tok = TitleItemToken.M_TERMINS.tryParse(npt.end_token, TerminParseAttr.NO)
         if (tok is not None): 
             ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types)
             if (ty == TitleItemToken.Types.TYP): 
                 tit = TitleItemToken.tryAttach(tok.end_token.next0_)
                 if (tit is not None and tit.typ == TitleItemToken.Types.THEME): 
                     return TitleItemToken._new2501(npt.begin_token, tit.end_token, TitleItemToken.Types.TYPANDTHEME, s)
                 if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"): 
                     return None
                 t1 = tok.end_token
                 if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"): 
                     err = 0
                     ttt = t1.next0_
                     first_pass3125 = True
                     while True:
                         if first_pass3125: first_pass3125 = False
                         else: ttt = ttt.next0_
                         if (not (ttt is not None)): break
                         if (ttt.morph.class0_.is_preposition): 
                             continue
                         if (ttt.isValue("СОИСКАНИЕ", "")): 
                             continue
                         npt1 = NounPhraseHelper.tryParse(ttt, NounPhraseParseAttr.NO, 0)
                         if (npt1 is not None and npt1.noun.isValue("СТЕПЕНЬ", "СТУПІНЬ")): 
                             ttt = npt1.end_token
                             t1 = ttt
                             continue
                         rt = t1.kit.processReferent("PERSON", ttt)
                         if (rt is not None and (isinstance(rt.referent, PersonPropertyReferent))): 
                             ppr = Utils.asObjectOrNull(rt.referent, PersonPropertyReferent)
                             if (ppr.name == "доктор наук"): 
                                 t1 = rt.end_token
                                 s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "кандидат наук"): 
                                 t1 = rt.end_token
                                 s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "магистр"): 
                                 t1 = rt.end_token
                                 s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                         if (ttt.isValue("ДОКТОР", None) or ttt.isValue("КАНДИДАТ", None) or ttt.isValue("МАГИСТР", "МАГІСТР")): 
                             t1 = ttt
                             npt1 = NounPhraseHelper.tryParse(ttt.next0_, NounPhraseParseAttr.NO, 0)
                             if (npt1 is not None and npt1.end_token.isValue("НАУК", None)): 
                                 t1 = npt1.end_token
                             s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" if ttt.isValue("МАГИСТР", "МАГІСТР") else ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.isValue("ДОКТОР", None) else "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"))
                             break
                         err += 1
                         if ((err) > 3): 
                             break
                 if (t1.next0_ is not None and t1.next0_.isChar('.')): 
                     t1 = t1.next0_
                 if (s.endswith("ОТЧЕТ") and t1.next0_ is not None and t1.next0_.isValue("О", None)): 
                     npt1 = NounPhraseHelper.tryParse(t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0)
                     if (npt1 is not None and npt1.morph.case_.is_prepositional): 
                         t1 = npt1.end_token
                 return TitleItemToken._new2501(npt.begin_token, t1, ty, s)
     tok1 = TitleItemToken.M_TERMINS.tryParse(t, TerminParseAttr.NO)
     if (tok1 is not None): 
         t1 = tok1.end_token
         re = TitleItemToken(tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
         return re
     if (BracketHelper.canBeStartOfSequence(t, False, False)): 
         tok1 = TitleItemToken.M_TERMINS.tryParse(t.next0_, TerminParseAttr.NO)
         if (tok1 is not None and BracketHelper.canBeEndOfSequence(tok1.end_token.next0_, False, None, False)): 
             t1 = tok1.end_token.next0_
             return TitleItemToken(tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
     return None
Exemple #12
0
 def process(self, kit : 'AnalysisKit') -> None:
     """ Основная функция выделения объектов
     
     Args:
         container: 
         lastStage: 
     
     """
     ad = kit.getAnalyzerData(self)
     t = kit.first_token
     first_pass3149 = True
     while True:
         if first_pass3149: first_pass3149 = False
         else: t = t.next0_
         if (not (t is not None)): break
         tt = t
         tok = UriAnalyzer.__m_schemes.tryParse(t, TerminParseAttr.NO)
         if (tok is not None): 
             i = (tok.termin.tag)
             tt = tok.end_token
             if (tt.next0_ is not None and tt.next0_.isChar('(')): 
                 tok1 = UriAnalyzer.__m_schemes.tryParse(tt.next0_.next0_, TerminParseAttr.NO)
                 if ((tok1 is not None and tok1.termin.canonic_text == tok.termin.canonic_text and tok1.end_token.next0_ is not None) and tok1.end_token.next0_.isChar(')')): 
                     tt = tok1.end_token.next0_
             if (i == 0): 
                 if ((tt.next0_ is None or ((not tt.next0_.isCharOf(":|") and not tt.is_table_control_char)) or tt.next0_.is_whitespace_before) or tt.next0_.whitespaces_after_count > 2): 
                     continue
                 t1 = tt.next0_.next0_
                 while t1 is not None and t1.isCharOf("/\\"):
                     t1 = t1.next0_
                 if (t1 is None or t1.whitespaces_before_count > 2): 
                     continue
                 ut = UriItemToken.attachUriContent(t1, False)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok.termin.canonic_text.lower(), ut.value)), UriReferent)
                 rt = ReferentToken(ad.registerReferent(ur), t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 10): 
                 tt = tt.next0_
                 if (tt is None or not tt.isChar(':')): 
                     continue
                 tt = tt.next0_
                 while tt is not None: 
                     if (tt.isCharOf("/\\")): 
                         pass
                     else: 
                         break
                     tt = tt.next0_
                 if (tt is None): 
                     continue
                 if (tt.isValue("WWW", None) and tt.next0_ is not None and tt.next0_.isChar('.')): 
                     tt = tt.next0_.next0_
                 if (tt is None or tt.is_newline_before): 
                     continue
                 ut = UriItemToken.attachUriContent(tt, True)
                 if (ut is None): 
                     continue
                 if (len(ut.value) < 4): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok.termin.canonic_text.lower(), ut.value)), UriReferent)
                 rt = ReferentToken(ad.registerReferent(ur), t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 2): 
                 if (tt.next0_ is None or not tt.next0_.isChar('.') or tt.next0_.is_whitespace_before): 
                     continue
                 if (tt.next0_.is_whitespace_after and tok.termin.canonic_text != "WWW"): 
                     continue
                 ut = UriItemToken.attachUriContent(tt.next0_.next0_, True)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", ut.value)), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(t.previous), t)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 1): 
                 sch = tok.termin.canonic_text
                 ut = None
                 if (sch == "ISBN"): 
                     ut = UriItemToken.attachISBN(tt.next0_)
                     if ((ut is None and t.previous is not None and t.previous.isChar('(')) and t.next0_ is not None and t.next0_.isChar(')')): 
                         tt0 = t.previous.previous
                         while tt0 is not None: 
                             if (tt0.whitespaces_after_count > 2): 
                                 break
                             if (tt0.is_whitespace_before): 
                                 ut = UriItemToken.attachISBN(tt0)
                                 if (ut is not None and ut.end_token.next0_ != t.previous): 
                                     ut = (None)
                                 break
                             tt0 = tt0.previous
                 elif ((sch == "RFC" or sch == "ISO" or sch == "ОКФС") or sch == "ОКОПФ"): 
                     ut = UriItemToken.attachISOContent(tt.next0_, ":")
                 elif (sch == "ГОСТ"): 
                     ut = UriItemToken.attachISOContent(tt.next0_, "-.")
                 elif (sch == "ТУ"): 
                     if (tok.chars.is_all_upper): 
                         ut = UriItemToken.attachISOContent(tt.next0_, "-.")
                         if (ut is not None and (ut.length_char < 10)): 
                             ut = (None)
                 else: 
                     ut = UriItemToken.attachBBK(tt.next0_)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, sch)), UriReferent)
                 if (ut.begin_char < t.begin_char): 
                     rt = ReferentToken(ur, ut.begin_token, t)
                     if (t.next0_ is not None and t.next0_.isChar(')')): 
                         rt.end_token = t.next0_
                 else: 
                     rt = ReferentToken(ur, t, ut.end_token)
                 if (t.previous is not None and t.previous.isValue("КОД", None)): 
                     rt.begin_token = t.previous
                 if (ur.scheme.startswith("ОК")): 
                     UriAnalyzer.__checkDetail(rt)
                 kit.embedToken(rt)
                 t = (rt)
                 if (ur.scheme.startswith("ОК")): 
                     while t.next0_ is not None:
                         if (t.next0_.is_comma_and and (isinstance(t.next0_.next0_, NumberToken))): 
                             pass
                         else: 
                             break
                         ut = UriItemToken.attachBBK(t.next0_.next0_)
                         if (ut is None): 
                             break
                         ur = (Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, sch)), UriReferent))
                         rt = ReferentToken(ur, t.next0_.next0_, ut.end_token)
                         UriAnalyzer.__checkDetail(rt)
                         kit.embedToken(rt)
                         t = (rt)
                 continue
             if (i == 3): 
                 t0 = tt.next0_
                 while t0 is not None:
                     if (t0.isCharOf(":|") or t0.is_table_control_char or t0.is_hiphen): 
                         t0 = t0.next0_
                     else: 
                         break
                 if (t0 is None): 
                     continue
                 ut = UriItemToken.attachSkype(t0)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value.lower(), ("skype" if tok.termin.canonic_text == "SKYPE" else tok.termin.canonic_text))), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 4): 
                 t0 = tt.next0_
                 if (t0 is not None and ((t0.isChar(':') or t0.is_hiphen))): 
                     t0 = t0.next0_
                 if (t0 is None): 
                     continue
                 ut = UriItemToken.attachIcqContent(t0)
                 if (ut is None): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(ut.value, "ICQ")), UriReferent)
                 rt = ReferentToken(ur, t, t0)
                 kit.embedToken(rt)
                 t = (rt)
                 continue
             if (i == 5 or i == 6): 
                 t0 = tt.next0_
                 has_tab_cel = False
                 is_iban = False
                 first_pass3150 = True
                 while True:
                     if first_pass3150: first_pass3150 = False
                     else: t0 = t0.next0_
                     if (not (t0 is not None)): break
                     if ((((t0.isValue("БАНК", None) or t0.morph.class0_.is_preposition or t0.is_hiphen) or t0.isCharOf(".:") or t0.isValue("РУБЛЬ", None)) or t0.isValue("РУБ", None) or t0.isValue("ДОЛЛАР", None)) or t0.isValue("№", None) or t0.isValue("N", None)): 
                         pass
                     elif (t0.is_table_control_char): 
                         has_tab_cel = True
                     elif (t0.isCharOf("\\/") and t0.next0_ is not None and t0.next0_.isValue("IBAN", None)): 
                         is_iban = True
                         t0 = t0.next0_
                     elif (t0.isValue("IBAN", None)): 
                         is_iban = True
                     elif (isinstance(t0, TextToken)): 
                         npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO, 0)
                         if (npt is not None and npt.morph.case_.is_genitive): 
                             t0 = npt.end_token
                             continue
                         break
                     else: 
                         break
                 if (t0 is None): 
                     continue
                 ur2 = None
                 ur2begin = None
                 ur2end = None
                 t00 = t0
                 val = t0.getSourceText()
                 if (str.isdigit(val[0]) and ((((i == 6 or tok.termin.canonic_text == "ИНН" or tok.termin.canonic_text == "БИК") or tok.termin.canonic_text == "ОГРН" or tok.termin.canonic_text == "СНИЛС") or tok.termin.canonic_text == "ОКПО"))): 
                     if (t0.chars.is_letter): 
                         continue
                     if (Utils.isNullOrEmpty(val) or not str.isdigit(val[0])): 
                         continue
                     if (t0.length_char < 9): 
                         tmp = io.StringIO()
                         print(val, end="", file=tmp)
                         ttt = t0.next0_
                         first_pass3151 = True
                         while True:
                             if first_pass3151: first_pass3151 = False
                             else: ttt = ttt.next0_
                             if (not (ttt is not None)): break
                             if (ttt.whitespaces_before_count > 1): 
                                 break
                             if (isinstance(ttt, NumberToken)): 
                                 print(ttt.getSourceText(), end="", file=tmp)
                                 t0 = ttt
                                 continue
                             if (ttt.is_hiphen or ttt.isChar('.')): 
                                 if (ttt.next0_ is None or not ((isinstance(ttt.next0_, NumberToken)))): 
                                     break
                                 if (ttt.is_whitespace_after or ttt.is_whitespace_before): 
                                     break
                                 continue
                             break
                         val = (None)
                         if (tmp.tell() == 20): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() == 9 and tok.termin.canonic_text == "БИК"): 
                             val = Utils.toStringStringIO(tmp)
                         elif (((tmp.tell() == 10 or tmp.tell() == 12)) and tok.termin.canonic_text == "ИНН"): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() >= 15 and tok.termin.canonic_text == "Л/С"): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() >= 11 and ((tok.termin.canonic_text == "ОГРН" or tok.termin.canonic_text == "СНИЛС"))): 
                             val = Utils.toStringStringIO(tmp)
                         elif (tok.termin.canonic_text == "ОКПО"): 
                             val = Utils.toStringStringIO(tmp)
                     if (val is None): 
                         continue
                 elif (not ((isinstance(t0, NumberToken)))): 
                     if ((isinstance(t0, TextToken)) and is_iban): 
                         tmp1 = io.StringIO()
                         t1 = None
                         ttt = t0
                         first_pass3152 = True
                         while True:
                             if first_pass3152: first_pass3152 = False
                             else: ttt = ttt.next0_
                             if (not (ttt is not None)): break
                             if (ttt.is_newline_before and ttt != t0): 
                                 break
                             if (ttt.is_hiphen): 
                                 continue
                             if (not ((isinstance(ttt, NumberToken)))): 
                                 if (not ((isinstance(ttt, TextToken))) or not ttt.chars.is_latin_letter): 
                                     break
                             print(ttt.getSourceText(), end="", file=tmp1)
                             t1 = ttt
                             if (tmp1.tell() >= 34): 
                                 break
                         if (tmp1.tell() < 10): 
                             continue
                         ur1 = UriReferent._new2560(Utils.toStringStringIO(tmp1), tok.termin.canonic_text)
                         ur1.addSlot(UriReferent.ATTR_DETAIL, "IBAN", False, 0)
                         rt1 = ReferentToken(ad.registerReferent(ur1), t, t1)
                         kit.embedToken(rt1)
                         t = (rt1)
                         continue
                     if (not t0.isCharOf("/\\") or t0.next0_ is None): 
                         continue
                     tok2 = UriAnalyzer.__m_schemes.tryParse(t0.next0_, TerminParseAttr.NO)
                     if (tok2 is None or not ((isinstance(tok2.termin.tag, int))) or (tok2.termin.tag) != i): 
                         continue
                     t0 = tok2.end_token.next0_
                     while t0 is not None:
                         if (t0.isCharOf(":N№")): 
                             t0 = t0.next0_
                         elif (t0.is_table_control_char): 
                             t0 = t0.next0_
                             t00 = t0
                             has_tab_cel = True
                         else: 
                             break
                     if (not ((isinstance(t0, NumberToken)))): 
                         continue
                     tmp = io.StringIO()
                     while t0 is not None: 
                         if (not ((isinstance(t0, NumberToken)))): 
                             break
                         print(t0.getSourceText(), end="", file=tmp)
                         t0 = t0.next0_
                     if (t0 is None or not t0.isCharOf("/\\,") or not ((isinstance(t0.next0_, NumberToken)))): 
                         continue
                     val = Utils.toStringStringIO(tmp)
                     Utils.setLengthStringIO(tmp, 0)
                     ur2begin = t0.next0_
                     t0 = t0.next0_
                     while t0 is not None: 
                         if (not ((isinstance(t0, NumberToken)))): 
                             break
                         if (t0.whitespaces_before_count > 4 and tmp.tell() > 0): 
                             break
                         print(t0.getSourceText(), end="", file=tmp)
                         ur2end = t0
                         t0 = t0.next0_
                     ur2 = (Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557(tok2.termin.canonic_text, Utils.toStringStringIO(tmp))), UriReferent))
                 if (len(val) < 5): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560(val, tok.termin.canonic_text)), UriReferent)
                 rt = ReferentToken(ur, t, (t0 if ur2begin is None else ur2begin.previous))
                 if (has_tab_cel): 
                     rt.begin_token = t00
                 if (ur.scheme.startswith("ОК")): 
                     UriAnalyzer.__checkDetail(rt)
                 ttt = t.previous
                 first_pass3153 = True
                 while True:
                     if first_pass3153: first_pass3153 = False
                     else: ttt = ttt.previous
                     if (not (ttt is not None)): break
                     if (ttt.is_table_control_char): 
                         break
                     if (ttt.morph.class0_.is_preposition): 
                         continue
                     if (ttt.isValue("ОРГАНИЗАЦИЯ", None)): 
                         continue
                     if (ttt.isValue("НОМЕР", None) or ttt.isValue("КОД", None)): 
                         rt.begin_token = ttt
                         t = rt.begin_token
                     break
                 kit.embedToken(rt)
                 t = (rt)
                 if (ur2 is not None): 
                     rt2 = ReferentToken(ur2, ur2begin, ur2end)
                     kit.embedToken(rt2)
                     t = (rt2)
                 continue
             continue
         if (t.isChar('@')): 
             u1s = UriItemToken.attachMailUsers(t.previous)
             if (u1s is None): 
                 continue
             u2 = UriItemToken.attachDomainName(t.next0_, False, True)
             if (u2 is None): 
                 continue
             for ii in range(len(u1s) - 1, -1, -1):
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2560("{0}@{1}".format(u1s[ii].value, u2.value).lower(), "mailto")), UriReferent)
                 b = u1s[ii].begin_token
                 t0 = b.previous
                 if (t0 is not None and t0.isChar(':')): 
                     t0 = t0.previous
                 if (t0 is not None and ii == 0): 
                     br = False
                     ttt = t0
                     first_pass3154 = True
                     while True:
                         if first_pass3154: first_pass3154 = False
                         else: ttt = ttt.previous
                         if (not (ttt is not None)): break
                         if (not ((isinstance(ttt, TextToken)))): 
                             break
                         if (ttt != t0 and ttt.whitespaces_after_count > 1): 
                             break
                         if (ttt.isChar(')')): 
                             br = True
                             continue
                         if (ttt.isChar('(')): 
                             if (not br): 
                                 break
                             br = False
                             continue
                         if (ttt.isValue("EMAIL", None) or ttt.isValue("MAILTO", None)): 
                             b = ttt
                             break
                         if (ttt.isValue("MAIL", None)): 
                             b = ttt
                             if ((ttt.previous is not None and ttt.previous.is_hiphen and ttt.previous.previous is not None) and ((ttt.previous.previous.isValue("E", None) or ttt.previous.previous.isValue("Е", None)))): 
                                 b = ttt.previous.previous
                             break
                         if (ttt.isValue("ПОЧТА", None) or ttt.isValue("АДРЕС", None)): 
                             b = t0
                             ttt = ttt.previous
                             if (ttt is not None and ttt.isChar('.')): 
                                 ttt = ttt.previous
                             if (ttt is not None and ((t0.isValue("ЭЛ", None) or ttt.isValue("ЭЛЕКТРОННЫЙ", None)))): 
                                 b = ttt
                             if (b.previous is not None and b.previous.isValue("АДРЕС", None)): 
                                 b = b.previous
                             break
                         if (ttt.morph.class0_.is_preposition): 
                             continue
                 rt = ReferentToken(ur, b, (u2.end_token if ii == (len(u1s) - 1) else u1s[ii].end_token))
                 kit.embedToken(rt)
                 t = (rt)
             continue
         if (not t.morph.language.is_cyrillic): 
             if (t.is_whitespace_before or ((t.previous is not None and t.previous.isCharOf(",(")))): 
                 u1 = UriItemToken.attachUrl(t)
                 if (u1 is not None): 
                     if (u1.is_whitespace_after or u1.end_token.next0_ is None or not u1.end_token.next0_.isChar('@')): 
                         ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", u1.value)), UriReferent)
                         rt = ReferentToken(ur, u1.begin_token, u1.end_token)
                         rt.begin_token = Utils.ifNotNull(UriAnalyzer.__siteBefore(u1.begin_token.previous), u1.begin_token)
                         kit.embedToken(rt)
                         t = (rt)
                         continue
         if ((isinstance(t, TextToken)) and not t.is_whitespace_after and t.length_char > 2): 
             if (UriAnalyzer.__siteBefore(t.previous) is not None): 
                 ut = UriItemToken.attachUriContent(t, True)
                 if (ut is None or ut.value.find('.') <= 0 or ut.value.find('@') > 0): 
                     continue
                 ur = Utils.asObjectOrNull(ad.registerReferent(UriReferent._new2557("http", ut.value)), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 rt.begin_token = UriAnalyzer.__siteBefore(t.previous)
                 if (rt.end_token.next0_ is not None and rt.end_token.next0_.isCharOf("/\\")): 
                     rt.end_token = rt.end_token.next0_
                 kit.embedToken(rt)
                 t = (rt)
                 continue
         if ((t.chars.is_latin_letter and not t.chars.is_all_lower and t.next0_ is not None) and not t.is_whitespace_after): 
             if (t.next0_.isChar('/')): 
                 rt = UriAnalyzer.__TryAttachLotus(Utils.asObjectOrNull(t, TextToken))
                 if (rt is not None): 
                     rt.referent = ad.registerReferent(rt.referent)
                     kit.embedToken(rt)
                     t = (rt)
                     continue
Exemple #13
0
 def create(t: 'Token', names: 'TerminCollection') -> 'BlockLine':
     if (t is None):
         return None
     res = BlockLine(t, t)
     tt = t
     while tt is not None:
         if (tt != t and tt.is_newline_before):
             break
         else:
             res.end_token = tt
         tt = tt.next0_
     nums = 0
     while t is not None and t.next0_ is not None and t.end_char <= res.end_char:
         if (isinstance(t, NumberToken)):
             pass
         else:
             rom = NumberHelper.tryParseRoman(t)
             if (rom is not None and rom.end_token.next0_ is not None):
                 t = rom.end_token
             else:
                 break
         if (t.next0_.isChar('.')):
             pass
         elif ((isinstance(t.next0_, TextToken))
               and not t.next0_.chars.is_all_lower):
             pass
         else:
             break
         res.number_end = t
         t = t.next0_
         if (t.isChar('.') and t.next0_ is not None):
             res.number_end = t
             t = t.next0_
         if (t.is_newline_before):
             return res
         nums += 1
     tok = BlockLine.__m_ontology.tryParse(t, TerminParseAttr.NO)
     if (tok is None):
         npt1 = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt1 is not None and npt1.end_token != npt1.begin_token):
             tok = BlockLine.__m_ontology.tryParse(npt1.noun.begin_token,
                                                   TerminParseAttr.NO)
     if (tok is not None):
         if (t.previous is not None and t.previous.isChar(':')):
             tok = (None)
     if (tok is not None):
         typ_ = Utils.valToEnum(tok.termin.tag, BlkTyps)
         if (typ_ == BlkTyps.CONSLUSION):
             if (t.is_newline_after):
                 pass
             elif (t.next0_ is not None
                   and t.next0_.morph.class0_.is_preposition
                   and t.next0_.next0_ is not None):
                 tok2 = BlockLine.__m_ontology.tryParse(
                     t.next0_.next0_, TerminParseAttr.NO)
                 if (tok2 is not None and (Utils.valToEnum(
                         tok2.termin.tag, BlkTyps)) == BlkTyps.CHAPTER):
                     pass
                 else:
                     tok = (None)
             else:
                 tok = (None)
         if (t.kit.base_language != t.morph.language):
             tok = (None)
         if (typ_ == BlkTyps.INDEX and not t.isValue("ОГЛАВЛЕНИЕ", None)):
             if (not t.is_newline_after and t.next0_ is not None):
                 npt = NounPhraseHelper.tryParse(t.next0_,
                                                 NounPhraseParseAttr.NO, 0)
                 if (npt is not None and npt.is_newline_after
                         and npt.morph.case_.is_genitive):
                     tok = (None)
                 elif (npt is None):
                     tok = (None)
         if ((typ_ == BlkTyps.INTRO and tok is not None
              and not tok.is_newline_after)
                 and t.isValue("ВВЕДЕНИЕ", None)):
             npt = NounPhraseHelper.tryParse(t.next0_,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None and npt.morph.case_.is_genitive):
                 tok = (None)
         if (tok is not None):
             if (res.number_end is None):
                 res.number_end = tok.end_token
                 if (res.number_end.end_char > res.end_char):
                     res.end_token = res.number_end
             res.typ = typ_
             t = tok.end_token
             if (t.next0_ is not None and t.next0_.isCharOf(":.")):
                 t = t.next0_
                 res.end_token = t
             if (t.is_newline_after or t.next0_ is None):
                 return res
             t = t.next0_
     if (t.isChar('§') and (isinstance(t.next0_, NumberToken))):
         res.typ = BlkTyps.CHAPTER
         res.number_end = t
         t = t.next0_
     if (names is not None):
         tok2 = names.tryParse(t, TerminParseAttr.NO)
         if (tok2 is not None and tok2.end_token.is_newline_after):
             res.end_token = tok2.end_token
             res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED):
                 li2 = BlockLine.create((None if res.number_end is None else
                                         res.number_end.next0_), None)
                 if (li2 is not None
                         and ((li2.typ == BlkTyps.LITERATURE
                               or li2.typ == BlkTyps.INTRO
                               or li2.typ == BlkTyps.CONSLUSION))):
                     res.typ = li2.typ
                 else:
                     res.typ = BlkTyps.CHAPTER
             return res
     t1 = res.end_token
     if ((((isinstance(t1, NumberToken)) or t1.isChar('.')))
             and t1.previous is not None):
         t1 = t1.previous
         if (t1.isChar('.')):
             res.has_content_item_tail = True
             while t1 is not None and t1.begin_char > res.begin_char:
                 if (not t1.isChar('.')):
                     break
                 t1 = t1.previous
     res.is_all_upper = True
     while t is not None and t.end_char <= t1.end_char:
         if (not ((isinstance(t, TextToken))) or not t.chars.is_letter):
             res.not_words += 1
         else:
             mc = t.getMorphClassInDictionary()
             if (mc.is_undefined):
                 res.not_words += 1
             elif (t.length_char > 2):
                 res.words += 1
             if (not t.chars.is_all_upper):
                 res.is_all_upper = False
             if ((t).is_pure_verb):
                 if (not (t).term.endswith("ING")):
                     res.has_verb = True
         t = t.next0_
     if (res.typ == BlkTyps.UNDEFINED):
         npt = NounPhraseHelper.tryParse(
             (res.begin_token if res.number_end is None else
              res.number_end.next0_), NounPhraseParseAttr.NO, 0)
         if (npt is not None):
             if (npt.noun.isValue("ХАРАКТЕРИСТИКА", None)
                     or npt.noun.isValue("СОДЕРЖАНИЕ", "ЗМІСТ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass2779 = True
                 while True:
                     if first_pass2779: first_pass2779 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.isChar('.')):
                         continue
                     npt2 = NounPhraseHelper.tryParse(
                         tt, NounPhraseParseAttr.NO, 0)
                     if (npt2 is None or not npt2.morph.case_.is_genitive):
                         ok = False
                         break
                     tt = npt2.end_token
                     if (tt.end_char > res.end_char):
                         res.end_token = tt
                         if (not tt.is_newline_after):
                             while res.end_token.next0_ is not None:
                                 if (res.end_token.is_newline_after):
                                     break
                                 res.end_token = res.end_token.next0_
                 if (ok):
                     res.typ = BlkTyps.INTRO
                     res.is_exist_name = True
             elif (npt.noun.isValue("ВЫВОД", "ВИСНОВОК")
                   or npt.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass2780 = True
                 while True:
                     if first_pass2780: first_pass2780 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.isCharOf(",.") or tt.is_and):
                         continue
                     npt1 = NounPhraseHelper.tryParse(
                         tt, NounPhraseParseAttr.NO, 0)
                     if (npt1 is not None):
                         if (npt1.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")
                                 or npt1.noun.isValue(
                                     "РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ")
                                 or npt1.noun.isValue(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")):
                             tt = npt1.end_token
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                     ok = False
                     break
                 if (ok):
                     res.typ = BlkTyps.CONSLUSION
                     res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED and npt is not None
                     and npt.end_char <= res.end_char):
                 ok = False
                 publ = 0
                 if (BlockLine.__isPub(npt)):
                     ok = True
                     publ = 1
                 elif ((npt.noun.isValue("СПИСОК", None)
                        or npt.noun.isValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК")
                        or npt.noun.isValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ"))
                       or npt.noun.isValue("ВЫВОД", "ВИСНОВОК")
                       or npt.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                     if (npt.end_char == res.end_char):
                         return None
                     ok = True
                 if (ok):
                     if (npt.begin_token == npt.end_token
                             and npt.noun.isValue("СПИСОК", None)
                             and npt.end_char == res.end_char):
                         ok = False
                     tt = npt.end_token.next0_
                     first_pass2781 = True
                     while True:
                         if first_pass2781: first_pass2781 = False
                         else: tt = tt.next0_
                         if (not (tt is not None
                                  and tt.end_char <= res.end_char)):
                             break
                         if (tt.isCharOf(",.:") or tt.is_and
                                 or tt.morph.class0_.is_preposition):
                             continue
                         if (tt.isValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")):
                             continue
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is None):
                             ok = False
                             break
                         if (((BlockLine.__isPub(npt) or npt.noun.isValue(
                                 "РАБОТА", "РОБОТА") or npt.noun.isValue(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                              or npt.noun.isValue("АВТОР", None)
                              or npt.noun.isValue("ТРУД", "ПРАЦЯ"))
                                 or npt.noun.isValue("ТЕМА", None)
                                 or npt.noun.isValue(
                                     "ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")):
                             tt = npt.end_token
                             if (BlockLine.__isPub(npt)):
                                 publ += 1
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                         ok = False
                         break
                     if (ok):
                         res.typ = BlkTyps.LITERATURE
                         res.is_exist_name = True
                         if (publ == 0 and (res.end_char < ((math.floor(
                             (len(res.kit.sofa.text) * 2) / 3))))):
                             if (res.number_end is not None):
                                 res.typ = BlkTyps.MISC
                             else:
                                 res.typ = BlkTyps.UNDEFINED
     return res
Exemple #14
0
 def tryParse(t: 'Token',
              typ: 'BracketParseAttr' = BracketParseAttr.NO,
              max_tokens: int = 100) -> 'BracketSequenceToken':
     """ Попробовать восстановить последовательность, обрамляемой кавычками
     
     Args:
         t(Token): 
         typ(BracketParseAttr): параметры выделения
         max_tokens(int): максимально токенов (вдруг забыли закрывающую ккавычку)
     
     """
     t0 = t
     cou = 0
     if (not BracketHelper.canBeStartOfSequence(t0, False, False)):
         return None
     br_list = list()
     br_list.append(BracketHelper.Bracket(t0))
     cou = 0
     crlf = 0
     last = None
     lev = 1
     is_assim = br_list[
         0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find(
             br_list[0].char0_) >= 0
     t = t0.next0_
     first_pass2802 = True
     while True:
         if first_pass2802: first_pass2802 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         last = t
         if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                 or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
             if (t.is_newline_before
                     and (((typ) & (BracketParseAttr.CANBEMANYLINES)))
                     == (BracketParseAttr.NO)):
                 if (t.whitespaces_before_count > 10
                         or BracketHelper.canBeStartOfSequence(
                             t, False, False)):
                     if (t.isChar('(') and not t0.isChar('(')):
                         pass
                     else:
                         last = t.previous
                         break
             bb = BracketHelper.Bracket(t)
             br_list.append(bb)
             if (len(br_list) > 20):
                 break
             if ((len(br_list) == 3 and br_list[1].can_be_open
                  and bb.can_be_close) and BracketHelper.__mustBeCloseChar(
                      bb.char0_, br_list[1].char0_)
                     and BracketHelper.__mustBeCloseChar(
                         bb.char0_, br_list[0].char0_)):
                 ok = False
                 tt = t.next0_
                 while tt is not None:
                     if (tt.is_newline_before):
                         break
                     if (tt.isChar(',')):
                         break
                     if (tt.isChar('.')):
                         tt = tt.next0_
                         while tt is not None:
                             if (tt.is_newline_before):
                                 break
                             elif (tt.isCharOf(BracketHelper.M_OPEN_CHARS)
                                   or tt.isCharOf(
                                       BracketHelper.M_CLOSE_CHARS)):
                                 bb2 = BracketHelper.Bracket(tt)
                                 if (BracketHelper.canBeEndOfSequence(
                                         tt, False, None, False)
                                         and BracketHelper.__canBeCloseChar(
                                             bb2.char0_,
                                             br_list[0].char0_)):
                                     ok = True
                                 break
                             tt = tt.next0_
                         break
                     if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                             or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
                         ok = True
                         break
                     tt = tt.next0_
                 if (not ok):
                     break
             if (is_assim):
                 if (bb.can_be_open and not bb.can_be_close
                         and bb.char0_ == br_list[0].char0_):
                     lev += 1
                 elif (bb.can_be_close and not bb.can_be_open and
                       BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_)
                       == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)):
                     lev -= 1
                     if (lev == 0):
                         break
         else:
             cou += 1
             if ((cou) > max_tokens):
                 break
             if ((((typ) & (BracketParseAttr.CANCONTAINSVERBS))) == (
                     BracketParseAttr.NO)):
                 if (t.morph.language.is_cyrillic):
                     if (t.getMorphClassInDictionary() == MorphClass.VERB):
                         if (not t.morph.class0_.is_adjective
                                 and not t.morph.containsAttr(
                                     "страд.з.", None)):
                             if (t.chars.is_all_lower):
                                 norm = t.getNormalCaseText(
                                     None, False, MorphGender.UNDEFINED,
                                     False)
                                 if (not LanguageHelper.endsWith(
                                         norm, "СЯ")):
                                     if (len(br_list) > 1):
                                         break
                                     if (br_list[0].char0_ != '('):
                                         break
                 elif (t.morph.language.is_en):
                     if (t.morph.class0_ == MorphClass.VERB
                             and t.chars.is_all_lower):
                         break
                 r = t.getReferent()
                 if (r is not None and r.type_name == "ADDRESS"):
                     if (not t0.isChar('(')):
                         break
         if ((((typ) & (BracketParseAttr.CANBEMANYLINES))) !=
             (BracketParseAttr.NO)):
             if (t.is_newline_before):
                 if (t.newlines_before_count > 1):
                     break
                 crlf += 1
             continue
         if (t.is_newline_before):
             if (t.whitespaces_before_count > 15):
                 break
             crlf += 1
             if (not t.chars.is_all_lower):
                 if (t.previous is not None and t.previous.isChar('.')):
                     break
             if ((isinstance(t.previous, MetaToken))
                     and BracketHelper.canBeEndOfSequence(
                         (t.previous).end_token, False, None, False)):
                 break
         if (crlf > 1):
             if (len(br_list) > 1):
                 break
             if (crlf > 10):
                 break
         if (t.isChar(';') and t.is_newline_after):
             break
     if ((len(br_list) == 1 and br_list[0].can_be_open and
          (isinstance(last, MetaToken))) and last.is_newline_after):
         if (BracketHelper.canBeEndOfSequence((last).end_token, False, None,
                                              False)):
             return BracketSequenceToken(t0, last)
     if (len(br_list) < 1):
         return None
     i = 1
     while i < (len(br_list) - 1):
         if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'):
             br_list[i].can_be_open = True
             br_list[i + 1].can_be_close = True
         i += 1
     internals = None
     while len(br_list) > 3:
         i = len(br_list) - 1
         if ((br_list[i].can_be_close and br_list[i - 1].can_be_open
              and not BracketHelper.__canBeCloseChar(
                  br_list[i].char0_, br_list[0].char0_))
                 and BracketHelper.__canBeCloseChar(br_list[i].char0_,
                                                    br_list[i - 1].char0_)):
             del br_list[len(br_list) - 2:len(br_list) - 2 + 2]
             continue
         break
     while len(br_list) >= 4:
         changed = False
         i = 1
         while i < (len(br_list) - 2):
             if ((br_list[i].can_be_open and not br_list[i].can_be_close
                  and br_list[i + 1].can_be_close)
                     and not br_list[i + 1].can_be_open):
                 ok = False
                 if (BracketHelper.__mustBeCloseChar(
                         br_list[i + 1].char0_, br_list[i].char0_)
                         or br_list[i].char0_ != br_list[0].char0_):
                     ok = True
                     if ((i == 1 and ((i + 2) < len(br_list))
                          and br_list[i + 2].char0_ == ')')
                             and br_list[i + 1].char0_ != ')'
                             and BracketHelper.__canBeCloseChar(
                                 br_list[i + 1].char0_,
                                 br_list[i - 1].char0_)):
                         br_list[i + 2] = br_list[i + 1]
                 elif (i > 1 and ((i + 2) < len(br_list))
                       and BracketHelper.__mustBeCloseChar(
                           br_list[i + 2].char0_, br_list[i - 1].char0_)):
                     ok = True
                 if (ok):
                     if (internals is None):
                         internals = list()
                     internals.append(
                         BracketSequenceToken(br_list[i].source,
                                              br_list[i + 1].source))
                     del br_list[i:i + 2]
                     changed = True
                     break
             i += 1
         if (not changed):
             break
     res = None
     if ((len(br_list) >= 4 and br_list[1].can_be_open
          and br_list[2].can_be_close) and br_list[3].can_be_close
             and not br_list[3].can_be_open):
         if (BracketHelper.__canBeCloseChar(br_list[3].char0_,
                                            br_list[0].char0_)):
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[3].source)
             if (br_list[0].source.next0_ != br_list[1].source
                     or br_list[2].source.next0_ != br_list[3].source):
                 res.internal.append(
                     BracketSequenceToken(br_list[1].source,
                                          br_list[2].source))
             if (internals is not None):
                 res.internal.extend(internals)
     if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close)
             and not br_list[2].can_be_open):
         if ((((typ) & (BracketParseAttr.NEARCLOSEBRACKET))) !=
             (BracketParseAttr.NO)):
             if (BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                br_list[0].char0_)):
                 return BracketSequenceToken(br_list[0].source,
                                             br_list[1].source)
         ok = True
         if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                            br_list[0].char0_)
                 and BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                    br_list[0].char0_)
                 and br_list[1].can_be_close):
             t = br_list[1].source
             while t != br_list[2].source and t is not None:
                 if (t.is_newline_before):
                     ok = False
                     break
                 if (t.chars.is_letter and t.chars.is_all_lower):
                     ok = False
                     break
                 npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None):
                     t = npt.end_token
                 t = t.next0_
             if (ok):
                 t = br_list[0].source.next0_
                 while t != br_list[1].source and t is not None:
                     if (t.is_newline_before):
                         return BracketSequenceToken(
                             br_list[0].source, t.previous)
                     t = t.next0_
             lev1 = 0
             tt = br_list[0].source.previous
             first_pass2803 = True
             while True:
                 if first_pass2803: first_pass2803 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after or tt.is_table_control_char):
                     break
                 if (not ((isinstance(tt, TextToken)))):
                     continue
                 if (tt.chars.is_letter or tt.length_char > 1):
                     continue
                 ch = (tt).term[0]
                 if (BracketHelper.__canBeCloseChar(ch, br_list[0].char0_)):
                     lev1 += 1
                 elif (BracketHelper.__canBeCloseChar(
                         br_list[1].char0_, ch)):
                     lev1 -= 1
                     if (lev1 < 0):
                         return BracketSequenceToken(
                             br_list[0].source, br_list[1].source)
         if (ok and BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                   br_list[0].char0_)):
             intern = BracketSequenceToken(br_list[1].source,
                                           br_list[2].source)
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[2].source)
             res.internal.append(intern)
         elif (ok and BracketHelper.__canBeCloseChar(
                 br_list[2].char0_, br_list[1].char0_)
               and br_list[0].can_be_open):
             if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                br_list[0].char0_)):
                 intern = BracketSequenceToken(br_list[1].source,
                                               br_list[2].source)
                 res = BracketSequenceToken(br_list[0].source,
                                            br_list[2].source)
                 res.internal.append(intern)
             elif (len(br_list) == 3):
                 return None
     if (res is None and len(br_list) > 1 and br_list[1].can_be_close):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None
             and len(br_list) > 1 and BracketHelper.__canBeCloseChar(
                 br_list[1].char0_, br_list[0].char0_)):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None and len(br_list) == 2
             and br_list[0].char0_ == br_list[1].char0_):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is not None and internals is not None):
         for i in internals:
             if (i.begin_char < res.end_char):
                 res.internal.append(i)
     if (res is None):
         cou = 0
         tt = t0.next0_
         first_pass2804 = True
         while True:
             if first_pass2804: first_pass2804 = False
             else:
                 tt = tt.next0_
                 cou += 1
             if (not (tt is not None)): break
             if (tt.is_table_control_char):
                 break
             if (MiscHelper.canBeStartOfSentence(tt)):
                 break
             if (max_tokens > 0 and cou > max_tokens):
                 break
             mt = Utils.asObjectOrNull(tt, MetaToken)
             if (mt is None):
                 continue
             if (isinstance(mt.end_token, TextToken)):
                 if ((mt.end_token).isCharOf(BracketHelper.M_CLOSE_CHARS)):
                     bb = BracketHelper.Bracket(
                         Utils.asObjectOrNull(mt.end_token, TextToken))
                     if (bb.can_be_close and BracketHelper.__canBeCloseChar(
                             bb.char0_, br_list[0].char0_)):
                         return BracketSequenceToken(t0, tt)
     return res
Exemple #15
0
 def __tryNameExist(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                    always: bool) -> 'ReferentToken':
     """ Это проверяем некоторые частные случаи
     
     Args:
         li(typing.List[CityItemToken]): 
         oi(IntOntologyItem): 
     
     """
     oi.value = (None)
     if (li is None or li[0].typ != CityItemToken.ItemType.CITY):
         return None
     oi.value = li[0].onto_item
     tt = Utils.asObjectOrNull(li[0].begin_token, TextToken)
     if (tt is None):
         return None
     ok = False
     nam = (li[0].value if oi.value is None else oi.value.canonic_text)
     if (nam is None):
         return None
     if (nam == "РИМ"):
         if (tt.term == "РИМ"):
             if ((isinstance(tt.next0_, TextToken)) and
                     tt.next0_.getMorphClassInDictionary().is_proper_secname
                 ):
                 pass
             else:
                 ok = True
         elif (tt.previous is not None and tt.previous.isValue("В", None)
               and tt.term == "РИМЕ"):
             ok = True
     elif (oi.value is not None and oi.value.referent is not None
           and oi.value.owner.is_ext_ontology):
         ok = True
     elif (nam.endswith("ГРАД") or nam.endswith("СК")):
         ok = True
     elif (nam.endswith("TOWN") or nam.startswith("SAN")):
         ok = True
     elif (li[0].chars.is_latin_letter
           and li[0].begin_token.previous is not None
           and ((li[0].begin_token.previous.isValue("IN", None)
                 or li[0].begin_token.previous.isValue("FROM", None)))):
         ok = True
     else:
         tt2 = li[0].end_token.next0_
         first_pass2890 = True
         while True:
             if first_pass2890: first_pass2890 = False
             else: tt2 = tt2.next0_
             if (not (tt2 is not None)): break
             if (tt2.is_newline_before):
                 break
             if ((tt2.isCharOf(",(") or tt2.morph.class0_.is_preposition
                  or tt2.morph.class0_.is_conjunction)
                     or tt2.morph.class0_.is_misc):
                 continue
             if ((isinstance(tt2.getReferent(), GeoReferent))
                     and tt2.chars.is_cyrillic_letter
                     == li[0].chars.is_cyrillic_letter):
                 ok = True
             break
         if (not ok):
             tt2 = li[0].begin_token.previous
             first_pass2891 = True
             while True:
                 if first_pass2891: first_pass2891 = False
                 else: tt2 = tt2.previous
                 if (not (tt2 is not None)): break
                 if (tt2.is_newline_after):
                     break
                 if ((tt2.isCharOf(",)") or tt2.morph.class0_.is_preposition
                      or tt2.morph.class0_.is_conjunction)
                         or tt2.morph.class0_.is_misc):
                     continue
                 if ((isinstance(tt2.getReferent(), GeoReferent))
                         and tt2.chars.is_cyrillic_letter
                         == li[0].chars.is_cyrillic_letter):
                     ok = True
                 if (ok):
                     sits = StreetItemToken.tryParseList(
                         li[0].begin_token, None, 10)
                     if (sits is not None and len(sits) > 1):
                         ss = StreetDefineHelper._tryParseStreet(
                             sits, False, False)
                         if (ss is not None):
                             del sits[0]
                             if (StreetDefineHelper._tryParseStreet(
                                     sits, False, False) is None):
                                 ok = False
                 if (ok):
                     if (len(li) > 1 and li[1].typ
                             == CityItemToken.ItemType.PROPERNAME
                             and (li[1].whitespaces_before_count < 3)):
                         ok = False
                     else:
                         mc = li[0].begin_token.getMorphClassInDictionary()
                         if (mc.is_proper_name or mc.is_proper_surname
                                 or mc.is_adjective):
                             ok = False
                         else:
                             npt = NounPhraseHelper.tryParse(
                                 li[0].begin_token, NounPhraseParseAttr.NO,
                                 0)
                             if (npt is not None
                                     and npt.end_char > li[0].end_char):
                                 ok = False
                 if (AddressItemToken.tryAttachOrg(li[0].begin_token)
                         is not None):
                     ok = False
                     break
                 break
     if (always):
         if (li[0].whitespaces_before_count > 3 and li[0].doubtful
                 and li[0].begin_token.getMorphClassInDictionary(
                 ).is_proper_surname):
             pp = li[0].kit.processReferent("PERSON", li[0].begin_token)
             if (pp is not None):
                 always = False
     if (li[0].begin_token.chars.is_latin_letter
             and li[0].begin_token == li[0].end_token):
         tt1 = li[0].end_token.next0_
         if (tt1 is not None and tt1.isChar(',')):
             tt1 = tt1.next0_
         if (((isinstance(tt1, TextToken)) and tt1.chars.is_latin_letter and
              (tt1.length_char < 3)) and not tt1.chars.is_all_lower):
             ok = False
     if (not ok and not always):
         return None
     city = None
     if (oi.value is not None
             and (isinstance(oi.value.referent, GeoReferent))
             and not oi.value.owner.is_ext_ontology):
         city = (Utils.asObjectOrNull(oi.value.referent, GeoReferent))
     else:
         city = GeoReferent()
         city._addName(nam)
         if (oi.value is not None
                 and (isinstance(oi.value.referent, GeoReferent))):
             city._mergeSlots2(
                 Utils.asObjectOrNull(oi.value.referent, GeoReferent),
                 li[0].kit.base_language)
         if (not city.is_city):
             city._addTypCity(li[0].kit.base_language)
     return ReferentToken._new719(city, li[0].begin_token, li[0].end_token,
                                  li[0].morph)
Exemple #16
0
 def __tryNounName(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                   always: bool) -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 2)
             or ((li[0].typ != CityItemToken.ItemType.NOUN
                  and li[0].typ != CityItemToken.ItemType.MISC))):
         return None
     ok = not li[0].doubtful
     if (ok and li[0].typ == CityItemToken.ItemType.MISC):
         ok = False
     typ = (None
            if li[0].typ == CityItemToken.ItemType.MISC else li[0].value)
     typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else
             li[0].alt_value)
     prob_adj = None
     i1 = 1
     org0_ = None
     if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and
          ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and
         (((LanguageHelper.endsWith(typ, "ПОСЕЛОК")
            or LanguageHelper.endsWith(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ")
           or typ == "СЕЛО"))):
         if (li[i1].begin_token == li[i1].end_token):
             ooo = AddressItemToken.tryAttachOrg(li[i1].begin_token)
             if (ooo is not None and ooo.ref_token is not None):
                 return None
         typ2 = li[i1].value
         if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.isValue("СТ", None)
                 and ((i1 + 1) < len(li))):
             m = li[i1 + 1].morph
             if (m.number == MorphNumber.PLURAL):
                 prob_adj = "СТАРЫЕ"
             elif (m.gender == MorphGender.FEMINIE):
                 prob_adj = "СТАРАЯ"
             elif (m.gender == MorphGender.MASCULINE):
                 prob_adj = "СТАРЫЙ"
             else:
                 prob_adj = "СТАРОЕ"
         i1 += 1
     name = Utils.ifNotNull(li[i1].value,
                            ((None if li[i1].onto_item is None else
                              li[i1].onto_item.canonic_text)))
     alt_name = li[i1].alt_value
     if (name is None):
         return None
     mc = li[0].morph
     if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY
             and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО"
                   or li[0].typ == CityItemToken.ItemType.MISC))):
         if (typ is None and ((i1 + 1) < len(li))
                 and li[i1 + 1].typ == CityItemToken.ItemType.NOUN):
             return None
         oi.value = li[i1].onto_item
         if (oi.value is not None):
             name = oi.value.canonic_text
         if (len(name) > 2 or oi.value.misc_attr is not None):
             if (not li[1].doubtful
                     or ((oi.value is not None
                          and oi.value.misc_attr is not None))):
                 ok = True
             elif (not ok and not li[1].is_newline_before):
                 if (li[0].geo_object_before or li[1].geo_object_after):
                     ok = True
                 elif (StreetDefineHelper.checkStreetAfter(
                         li[1].end_token.next0_)):
                     ok = True
                 elif (li[1].end_token.next0_ is not None
                       and (isinstance(li[1].end_token.next0_.getReferent(),
                                       DateReferent))):
                     ok = True
                 elif ((li[1].whitespaces_before_count < 2)
                       and li[1].onto_item is not None):
                     if (li[1].is_newline_after):
                         ok = True
             if (li[1].doubtful and li[1].end_token.next0_ is not None and
                     li[1].end_token.chars == li[1].end_token.next0_.chars):
                 ok = False
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 ok = True
         if (not ok):
             ok = CityAttachHelper.checkYearAfter(li[1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkCityAfter(li[1].end_token.next0_)
     elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME
            or li[i1].typ == CityItemToken.ItemType.CITY)):
         if (((li[0].value == "АДМИНИСТРАЦИЯ"
               or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1):
             return None
         if (li[i1].is_newline_before):
             if (len(li) != 2):
                 return None
         if (not li[0].doubtful):
             ok = True
             if (len(name) < 2):
                 ok = False
             elif ((len(name) < 3)
                   and li[0].morph.number != MorphNumber.SINGULAR):
                 ok = False
             if (li[i1].doubtful and not li[i1].geo_object_after
                     and not li[0].geo_object_before):
                 if (li[i1].morph.case_.is_genitive):
                     if (((li[0].begin_token.previous is None
                           or MiscLocationHelper.checkGeoObjectBefore(
                               li[0].begin_token))) and
                         ((li[i1].end_token.next0_ is None
                           or MiscLocationHelper.checkGeoObjectAfter(
                               li[i1].end_token.next0_)
                           or AddressItemToken.checkHouseAfter(
                               li[i1].end_token.next0_, False, True)))):
                         pass
                     else:
                         ok = False
                 else:
                     rt0 = li[i1].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt0 is not None):
                         rt1 = li[i1].kit.processReferent(
                             "PERSON", li[i1].begin_token)
                         if (rt1 is not None):
                             ok = False
             npt = NounPhraseHelper.tryParse(li[i1].begin_token,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None):
                 if (npt.end_token.end_char > li[i1].end_char
                         and len(npt.adjectives) > 0 and
                         not npt.adjectives[0].end_token.next0_.is_comma):
                     ok = False
                 elif (TerrItemToken._m_unknown_regions.tryParse(
                         npt.end_token, TerminParseAttr.FULLWORDSONLY)
                       is not None):
                     ok1 = False
                     if (li[0].begin_token.previous is not None):
                         ttt = li[0].begin_token.previous
                         if (ttt.is_comma and ttt.previous is not None):
                             ttt = ttt.previous
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (npt.end_token.next0_ is not None):
                         ttt = npt.end_token.next0_
                         if (ttt.is_comma and ttt.next0_ is not None):
                             ttt = ttt.next0_
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (not ok1):
                         return None
             if (li[0].value == "ПОРТ"):
                 if (li[i1].chars.is_all_upper
                         or li[i1].chars.is_latin_letter):
                     return None
         elif (li[0].geo_object_before):
             ok = True
         elif (li[i1].geo_object_after and not li[i1].is_newline_after):
             ok = True
         else:
             ok = CityAttachHelper.checkYearAfter(li[i1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkStreetAfter(li[i1].end_token.next0_)
         if (not ok and li[0].begin_token.previous is not None
                 and li[0].begin_token.previous.isValue("В", None)):
             ok = True
     else:
         return None
     if (not ok and not always):
         if (MiscLocationHelper.checkNearBefore(li[0].begin_token.previous)
                 is None):
             return None
     if (len(li) > (i1 + 1)):
         del li[i1 + 1:i1 + 1 + len(li) - i1 - 1]
     city = GeoReferent()
     if (oi.value is not None and oi.value.referent is not None):
         city = (Utils.asObjectOrNull(oi.value.referent.clone(),
                                      GeoReferent))
         city.occurrence.clear()
     if (not li[0].morph.case_.is_undefined
             and li[0].morph.gender != MorphGender.UNDEFINED):
         if (li[i1].end_token.morph.class0_.is_adjective
                 and li[i1].begin_token == li[i1].end_token):
             nam = ProperNameHelper.getNameEx(
                 li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE,
                 li[0].morph.case_, li[0].morph.gender, False, False)
             if (nam is not None and nam != name):
                 name = nam
     if (li[0].morph.case_.is_nominative):
         if (alt_name is not None):
             city._addName(alt_name)
         alt_name = (None)
     city._addName(name)
     if (prob_adj is not None):
         city._addName(prob_adj + " " + name)
     if (alt_name is not None):
         city._addName(alt_name)
         if (prob_adj is not None):
             city._addName(prob_adj + " " + alt_name)
     if (typ is not None):
         city._addTyp(typ)
     elif (not city.is_city):
         city._addTypCity(li[0].kit.base_language)
     if (typ2 is not None):
         city._addTyp(typ2.lower())
     if (li[0].higher_geo is not None
             and GeoOwnerHelper.canBeHigher(li[0].higher_geo, city)):
         city.higher = li[0].higher_geo
     if (li[0].typ == CityItemToken.ItemType.MISC):
         del li[0]
     res = ReferentToken._new719(city, li[0].begin_token,
                                 li[len(li) - 1].end_token, mc)
     if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen
             and (isinstance(res.end_token.next0_.next0_, NumberToken))):
         num = Utils.asObjectOrNull(res.end_token.next0_.next0_,
                                    NumberToken)
         if ((num.typ == NumberSpellingType.DIGIT
              and not num.morph.class0_.is_adjective
              and num.int_value is not None) and (num.int_value < 50)):
             for s in city.slots:
                 if (s.type_name == GeoReferent.ATTR_NAME):
                     city.uploadSlot(s,
                                     "{0}-{1}".format(s.value, num.value))
             res.end_token = num
     if (li[0].begin_token == li[0].end_token
             and li[0].begin_token.isValue("ГОРОДОК", None)):
         if (AddressItemToken.checkHouseAfter(res.end_token.next0_, True,
                                              False)):
             return None
     return res
Exemple #17
0
 def tryParse(t: 'Token',
              loc_onto: 'IntOntologyCollection') -> 'NamedItemToken':
     if (t is None):
         return None
     if (isinstance(t, ReferentToken)):
         r = t.getReferent()
         if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or
              (isinstance(r, GeoReferent)))
                 or r.type_name == "ORGANIZATION"):
             return NamedItemToken._new1635(t, t, r, t.morph)
         return None
     typ = NamedItemToken.__m_types.tryParse(t, TerminParseAttr.NO)
     nam = NamedItemToken.__m_names.tryParse(t, TerminParseAttr.NO)
     if (typ is not None):
         if (not ((isinstance(t, TextToken)))):
             return None
         res = NamedItemToken._new1636(typ.begin_token, typ.end_token,
                                       typ.morph, typ.chars)
         res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind))
         res.type_value = typ.termin.canonic_text
         if ((nam is not None and nam.end_token == typ.end_token
              and not t.chars.is_all_lower) and (Utils.valToEnum(
                  nam.termin.tag, NamedEntityKind)) == res.kind):
             res.name_value = nam.termin.canonic_text
             res.is_wellknown = True
         return res
     if (nam is not None):
         if (nam.begin_token.chars.is_all_lower):
             return None
         res = NamedItemToken._new1636(nam.begin_token, nam.end_token,
                                       nam.morph, nam.chars)
         res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
         res.name_value = nam.termin.canonic_text
         ok = True
         if (not t.is_whitespace_before and t.previous is not None):
             ok = False
         elif (not t.is_whitespace_after and t.next0_ is not None):
             if (t.next0_.isCharOf(",.;!?")
                     and t.next0_.is_whitespace_after):
                 pass
             else:
                 ok = False
         if (ok):
             res.is_wellknown = True
             res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str))
         return res
     adj = MiscLocationHelper.tryAttachNordWest(t)
     if (adj is not None):
         if (adj.morph.class0_.is_noun):
             if (adj.end_token.isValue("ВОСТОК", None)):
                 if (adj.begin_token == adj.end_token):
                     return None
                 re = NamedItemToken._new1638(t, adj.end_token, adj.morph)
                 re.kind = NamedEntityKind.LOCATION
                 re.name_value = MiscHelper.getTextValue(
                     t, adj.end_token,
                     GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                 re.is_wellknown = True
                 return re
             return None
         if (adj.whitespaces_after_count > 2):
             return None
         if ((isinstance(adj.end_token.next0_, ReferentToken)) and
             (isinstance(adj.end_token.next0_.getReferent(), GeoReferent))):
             re = NamedItemToken._new1638(t, adj.end_token.next0_,
                                          adj.end_token.next0_.morph)
             re.kind = NamedEntityKind.LOCATION
             re.name_value = MiscHelper.getTextValue(
                 t, adj.end_token.next0_,
                 GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
             re.is_wellknown = True
             re.ref = adj.end_token.next0_.getReferent()
             return re
         res = NamedItemToken.tryParse(adj.end_token.next0_, loc_onto)
         if (res is not None and res.kind == NamedEntityKind.LOCATION):
             s = adj.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                       res.morph.gender, False)
             if (s is not None):
                 if (res.name_value is None):
                     res.name_value = s.upper()
                 else:
                     res.name_value = "{0} {1}".format(
                         s.upper(), res.name_value)
                     res.type_value = (None)
                 res.begin_token = t
                 res.chars = t.chars
                 res.is_wellknown = True
                 return res
     if (t.chars.is_capital_upper
             and not MiscHelper.canBeStartOfSentence(t)):
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt is not None and len(npt.adjectives) > 0):
             test = NamedItemToken.tryParse(npt.noun.begin_token, loc_onto)
             if (test is not None and test.end_token == npt.end_token
                     and test.type_value is not None):
                 test.begin_token = t
                 tmp = io.StringIO()
                 for a in npt.adjectives:
                     s = a.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                             test.morph.gender, False)
                     if (tmp.tell() > 0):
                         print(' ', end="", file=tmp)
                     print(s, end="", file=tmp)
                 test.name_value = Utils.toStringStringIO(tmp)
                 test.chars = t.chars
                 if (test.kind == NamedEntityKind.LOCATION):
                     test.is_wellknown = True
                 return test
     if ((BracketHelper.isBracket(t, True) and t.next0_ is not None
          and t.next0_.chars.is_letter)
             and not t.next0_.chars.is_all_lower):
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             res = NamedItemToken(t, br.end_token)
             res.is_in_bracket = True
             res.name_value = MiscHelper.getTextValue(
                 t, br.end_token, GetTextAttr.NO)
             nam = NamedItemToken.__m_names.tryParse(
                 t.next0_, TerminParseAttr.NO)
             if (nam is not None
                     and nam.end_token == br.end_token.previous):
                 res.kind = (Utils.valToEnum(nam.termin.tag,
                                             NamedEntityKind))
                 res.is_wellknown = True
                 res.name_value = nam.termin.canonic_text
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter
          and not t.chars.is_all_lower) and t.length_char > 2):
         res = NamedItemToken._new1638(t, t, t.morph)
         str0_ = (t).term
         if (str0_.endswith("О") or str0_.endswith("И")
                 or str0_.endswith("Ы")):
             res.name_value = str0_
         else:
             res.name_value = t.getNormalCaseText(None, False,
                                                  MorphGender.UNDEFINED,
                                                  False)
         res.chars = t.chars
         if (((not t.is_whitespace_after and t.next0_ is not None
               and t.next0_.is_hiphen) and
              (isinstance(t.next0_.next0_, TextToken))
              and not t.next0_.next0_.is_whitespace_after)
                 and t.chars.is_cyrillic_letter
                 == t.next0_.next0_.chars.is_cyrillic_letter):
             res.end_token = t.next0_.next0_
             t = res.end_token
             res.name_value = "{0}-{1}".format(
                 res.name_value,
                 t.getNormalCaseText(None, False, MorphGender.UNDEFINED,
                                     False))
         return res
     return None
Exemple #18
0
 def tryAttachTerritory(
         li: typing.List['TerrItemToken'],
         ad: 'AnalyzerData',
         attach_always: bool = False,
         cits: typing.List['CityItemToken'] = None,
         exists: typing.List['GeoReferent'] = None) -> 'ReferentToken':
     if (li is None or len(li) == 0):
         return None
     ex_obj = None
     new_name = None
     adj_list = list()
     noun = None
     add_noun = None
     rt = TerrAttachHelper.__tryAttachMoscowAO(li, ad)
     if (rt is not None):
         return rt
     if (li[0].termin_item is not None
             and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"):
         res2 = TerrAttachHelper.__tryAttachPureTerr(li, ad)
         return res2
     if (len(li) == 2):
         if (li[0].rzd is not None and li[1].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._addName(li[1].rzd_dir)
             rzd._addTypTer(li[0].kit.base_language)
             rzd.addSlot(GeoReferent.ATTR_REF, li[0].rzd.referent, False, 0)
             rzd.addExtReferent(li[0].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
         if (li[1].rzd is not None and li[0].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._addName(li[0].rzd_dir)
             rzd._addTypTer(li[0].kit.base_language)
             rzd.addSlot(GeoReferent.ATTR_REF, li[1].rzd.referent, False, 0)
             rzd.addExtReferent(li[1].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
     can_be_city_before = False
     adj_terr_before = False
     if (cits is not None):
         if (cits[0].typ == CityItemToken.ItemType.CITY):
             can_be_city_before = True
         elif (cits[0].typ == CityItemToken.ItemType.NOUN
               and len(cits) > 1):
             can_be_city_before = True
     k = 0
     while k < len(li):
         if (li[k].onto_item is not None):
             if (ex_obj is not None or new_name is not None):
                 break
             if (noun is not None):
                 if (k == 1):
                     if (noun.termin_item.canonic_text == "РАЙОН"
                             or noun.termin_item.canonic_text == "ОБЛАСТЬ"
                             or noun.termin_item.canonic_text == "СОЮЗ"):
                         if (isinstance(li[k].onto_item.referent,
                                        GeoReferent)):
                             if ((li[k].onto_item.referent).is_state):
                                 break
                         ok = False
                         tt = li[k].end_token.next0_
                         if (tt is None):
                             ok = True
                         elif (tt.isCharOf(",.")):
                             ok = True
                         if (not ok):
                             ok = MiscLocationHelper.checkGeoObjectBefore(
                                 li[0].begin_token)
                         if (not ok):
                             adr = AddressItemToken.tryParse(
                                 tt, None, False, False, None)
                             if (adr is not None):
                                 if (adr.typ ==
                                         AddressItemToken.ItemType.STREET):
                                     ok = True
                         if (not ok):
                             break
                     if (li[k].onto_item is not None):
                         if (noun.begin_token.isValue("МО", None)
                                 or noun.begin_token.isValue("ЛО", None)):
                             return None
             ex_obj = li[k]
         elif (li[k].termin_item is not None):
             if (noun is not None):
                 break
             if (li[k].termin_item.is_always_prefix and k > 0):
                 break
             if (k > 0 and li[k].is_doubt):
                 if (li[k].begin_token == li[k].end_token
                         and li[k].begin_token.isValue("ЗАО", None)):
                     break
             if (li[k].termin_item.is_adjective
                     or li[k].is_geo_in_dictionary):
                 adj_list.append(li[k])
             else:
                 if (ex_obj is not None):
                     geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                                 GeoReferent)
                     if (geo_ is None):
                         break
                     if (ex_obj.is_adjective and
                         ((li[k].termin_item.canonic_text == "СОЮЗ" or
                           li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                         str0_ = str(ex_obj.onto_item)
                         if (not li[k].termin_item.canonic_text in str0_):
                             return None
                     if (li[k].termin_item.canonic_text == "РАЙОН"
                             or li[k].termin_item.canonic_text == "ОКРУГ"
                             or li[k].termin_item.canonic_text == "КРАЙ"):
                         tmp = io.StringIO()
                         for s in geo_.slots:
                             if (s.type_name == GeoReferent.ATTR_TYPE):
                                 print("{0};".format(s.value),
                                       end="",
                                       file=tmp,
                                       flush=True)
                         if (not li[k].termin_item.canonic_text
                                 in Utils.toStringStringIO(tmp).upper()):
                             if (k != 1 or new_name is not None):
                                 break
                             new_name = li[0]
                             new_name.is_adjective = True
                             new_name.onto_item = (None)
                             ex_obj = (None)
                 noun = li[k]
                 if (k == 0):
                     tt = TerrItemToken.tryParse(li[k].begin_token.previous,
                                                 None, True, False)
                     if (tt is not None and tt.morph.class0_.is_adjective):
                         adj_terr_before = True
         else:
             if (ex_obj is not None):
                 break
             if (new_name is not None):
                 break
             new_name = li[k]
         k += 1
     name = None
     alt_name = None
     full_name = None
     morph_ = None
     if (ex_obj is not None):
         if (ex_obj.is_adjective and not ex_obj.morph.language.is_en
                 and noun is None):
             if (attach_always and ex_obj.end_token.next0_ is not None):
                 npt = NounPhraseHelper.tryParse(ex_obj.begin_token,
                                                 NounPhraseParseAttr.NO, 0)
                 if (ex_obj.end_token.next0_.is_comma_and):
                     pass
                 elif (npt is None):
                     pass
                 else:
                     str0_ = StreetItemToken.tryParse(
                         ex_obj.end_token.next0_, None, False, None, False)
                     if (str0_ is not None):
                         if (str0_.typ == StreetItemType.NOUN
                                 and str0_.end_token == npt.end_token):
                             return None
             else:
                 cit = CityItemToken.tryParse(ex_obj.end_token.next0_, None,
                                              False, None)
                 if (cit is not None
                         and ((cit.typ == CityItemToken.ItemType.NOUN
                               or cit.typ == CityItemToken.ItemType.CITY))):
                     npt = NounPhraseHelper.tryParse(
                         ex_obj.begin_token, NounPhraseParseAttr.NO, 0)
                     if (npt is not None
                             and npt.end_token == cit.end_token):
                         pass
                     else:
                         return None
                 elif (ex_obj.begin_token.isValue("ПОДНЕБЕСНЫЙ", None)):
                     pass
                 else:
                     return None
         if (noun is None and ex_obj.can_be_city):
             cit0 = CityItemToken.tryParseBack(ex_obj.begin_token.previous)
             if (cit0 is not None
                     and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                 return None
         if (ex_obj.is_doubt and noun is None):
             ok2 = False
             if (TerrAttachHelper.__canBeGeoAfter(ex_obj.end_token.next0_)):
                 ok2 = True
             elif (not ex_obj.can_be_surname and not ex_obj.can_be_city):
                 if ((ex_obj.end_token.next0_ is not None
                      and ex_obj.end_token.next0_.isChar(')')
                      and ex_obj.begin_token.previous is not None)
                         and ex_obj.begin_token.previous.isChar('(')):
                     ok2 = True
                 elif (ex_obj.chars.is_latin_letter
                       and ex_obj.begin_token.previous is not None):
                     if (ex_obj.begin_token.previous.isValue("IN", None)):
                         ok2 = True
                     elif (ex_obj.begin_token.previous.isValue("THE", None)
                           and ex_obj.begin_token.previous.previous
                           is not None
                           and ex_obj.begin_token.previous.previous.isValue(
                               "IN", None)):
                         ok2 = True
             if (not ok2):
                 cit0 = CityItemToken.tryParseBack(
                     ex_obj.begin_token.previous)
                 if (cit0 is not None
                         and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                     pass
                 elif (MiscLocationHelper.checkGeoObjectBefore(
                         ex_obj.begin_token.previous)):
                     pass
                 else:
                     return None
         name = ex_obj.onto_item.canonic_text
         morph_ = ex_obj.morph
     elif (new_name is not None):
         if (noun is None):
             return None
         j = 1
         while j < k:
             if (li[j].is_newline_before and not li[0].is_newline_before):
                 return None
             j += 1
         morph_ = noun.morph
         if (new_name.is_adjective):
             if (noun.termin_item.acronym == "АО"):
                 if (noun.begin_token != noun.end_token):
                     return None
                 if (new_name.morph.gender != MorphGender.FEMINIE):
                     return None
             geo_before = None
             tt0 = li[0].begin_token.previous
             if (tt0 is not None and tt0.is_comma_and):
                 tt0 = tt0.previous
             if (not li[0].is_newline_before and tt0 is not None):
                 geo_before = (Utils.asObjectOrNull(tt0.getReferent(),
                                                    GeoReferent))
             if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList(
                     li, new_name, 0)):
                 if (noun.termin_item.is_state):
                     return None
                 if (new_name.can_be_surname and geo_before is None):
                     if (((noun.morph.case_)
                          & new_name.morph.case_).is_undefined):
                         return None
                 if (MiscHelper.isExistsInDictionary(
                         new_name.begin_token, new_name.end_token,
                     (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                         | MorphClass.VERB)):
                     if (noun.begin_token != new_name.begin_token):
                         if (geo_before is None):
                             if (len(li) == 2
                                     and TerrAttachHelper.__canBeGeoAfter(
                                         li[1].end_token.next0_)):
                                 pass
                             elif (len(li) == 3
                                   and li[2].termin_item is not None
                                   and TerrAttachHelper.__canBeGeoAfter(
                                       li[2].end_token.next0_)):
                                 pass
                             elif (new_name.is_geo_in_dictionary):
                                 pass
                             elif (new_name.end_token.is_newline_after):
                                 pass
                             else:
                                 return None
                 npt = NounPhraseHelper.tryParse(
                     new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS,
                     0)
                 if (npt is not None
                         and npt.end_token != new_name.end_token):
                     if (len(li) >= 3 and li[2].termin_item is not None
                             and npt.end_token == li[2].end_token):
                         add_noun = li[2]
                     else:
                         return None
                 rtp = new_name.kit.processReferent("PERSON",
                                                    new_name.begin_token)
                 if (rtp is not None):
                     return None
                 name = ProperNameHelper.getNameEx(new_name.begin_token,
                                                   new_name.end_token,
                                                   MorphClass.ADJECTIVE,
                                                   MorphCase.UNDEFINED,
                                                   noun.termin_item.gender,
                                                   False, False)
             else:
                 ok = False
                 if (((k + 1) < len(li)) and li[k].termin_item is None
                         and li[k + 1].termin_item is not None):
                     ok = True
                 elif ((k < len(li)) and li[k].onto_item is not None):
                     ok = True
                 elif (k == len(li) and not new_name.is_adj_in_dictionary):
                     ok = True
                 elif (MiscLocationHelper.checkGeoObjectBefore(
                         li[0].begin_token) or can_be_city_before):
                     ok = True
                 elif (MiscLocationHelper.checkGeoObjectAfter(
                         li[k - 1].end_token)):
                     ok = True
                 elif (len(li) == 3 and k == 2):
                     cit = CityItemToken.tryParse(li[2].begin_token, None,
                                                  False, None)
                     if (cit is not None):
                         if (cit.typ == CityItemToken.ItemType.CITY
                                 or cit.typ == CityItemToken.ItemType.NOUN):
                             ok = True
                 elif (len(li) == 2):
                     ok = TerrAttachHelper.__canBeGeoAfter(
                         li[len(li) - 1].end_token.next0_)
                 if (not ok and not li[0].is_newline_before
                         and not li[0].chars.is_all_lower):
                     rt00 = li[0].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt00 is not None):
                         ok = True
                 if (noun.termin_item is not None
                         and noun.termin_item.is_strong
                         and new_name.is_adjective):
                     ok = True
                 if (noun.is_doubt and len(adj_list) == 0
                         and geo_before is None):
                     return None
                 name = ProperNameHelper.getNameEx(new_name.begin_token,
                                                   new_name.end_token,
                                                   MorphClass.ADJECTIVE,
                                                   MorphCase.UNDEFINED,
                                                   noun.termin_item.gender,
                                                   False, False)
                 if (not ok and not attach_always):
                     if (MiscHelper.isExistsInDictionary(
                             new_name.begin_token, new_name.end_token,
                         (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                             | MorphClass.VERB)):
                         if (exists is not None):
                             for e0_ in exists:
                                 if (e0_.findSlot(GeoReferent.ATTR_NAME,
                                                  name, True) is not None):
                                     ok = True
                                     break
                         if (not ok):
                             return None
                 full_name = "{0} {1}".format(
                     ProperNameHelper.getNameEx(li[0].begin_token,
                                                noun.begin_token.previous,
                                                MorphClass.ADJECTIVE,
                                                MorphCase.UNDEFINED,
                                                noun.termin_item.gender,
                                                False, False),
                     noun.termin_item.canonic_text)
         else:
             if (not attach_always or
                 ((noun.termin_item is not None
                   and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                 is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter
                 if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList(
                         li, new_name, 0)):
                     if (not is_latin):
                         return None
                 if (not new_name.is_district_name
                         and not BracketHelper.canBeStartOfSequence(
                             new_name.begin_token, False, False)):
                     if (len(adj_list) == 0
                             and MiscHelper.isExistsInDictionary(
                                 new_name.begin_token, new_name.end_token,
                                 (MorphClass.NOUN) | MorphClass.PRONOUN)):
                         if (len(li) == 2 and noun.is_city_region
                                 and (noun.whitespaces_after_count < 2)):
                             pass
                         else:
                             return None
                     if (not is_latin):
                         if ((noun.termin_item.is_region
                              and not attach_always and
                              ((not adj_terr_before or new_name.is_doubt)))
                                 and not noun.is_city_region and
                                 not noun.termin_item.is_specific_prefix):
                             if (not MiscLocationHelper.
                                     checkGeoObjectBefore(
                                         noun.begin_token)):
                                 if (not noun.is_doubt and noun.begin_token
                                         != noun.end_token):
                                     pass
                                 else:
                                     return None
                         if (noun.is_doubt and len(adj_list) == 0):
                             if (((noun.termin_item.acronym == "МО"
                                   or noun.termin_item.acronym == "ЛО"))
                                     and k == (len(li) - 1)
                                     and li[k].termin_item is not None):
                                 add_noun = li[k]
                                 k += 1
                             else:
                                 return None
                         pers = new_name.kit.processReferent(
                             "PERSON", new_name.begin_token)
                         if (pers is not None):
                             return None
             name = MiscHelper.getTextValue(new_name.begin_token,
                                            new_name.end_token,
                                            GetTextAttr.NO)
             if (new_name.begin_token != new_name.end_token):
                 ttt = new_name.begin_token.next0_
                 while ttt is not None and ttt.end_char <= new_name.end_char:
                     if (ttt.chars.is_letter):
                         ty = TerrItemToken.tryParse(
                             ttt, None, False, False)
                         if ((ty is not None and ty.termin_item is not None
                              and noun is not None)
                                 and ((noun.termin_item.canonic_text
                                       in ty.termin_item.canonic_text
                                       or ty.termin_item.canonic_text
                                       in noun.termin_item.canonic_text))):
                             name = MiscHelper.getTextValue(
                                 new_name.begin_token, ttt.previous,
                                 GetTextAttr.NO)
                             break
                     ttt = ttt.next0_
             if (len(adj_list) > 0):
                 npt = NounPhraseHelper.tryParse(adj_list[0].begin_token,
                                                 NounPhraseParseAttr.NO, 0)
                 if (npt is not None and npt.end_token == noun.end_token):
                     alt_name = "{0} {1}".format(
                         npt.getNormalCaseText(None, False,
                                               MorphGender.UNDEFINED,
                                               False), name)
     else:
         if ((len(li) == 1 and noun is not None
              and noun.end_token.next0_ is not None) and (isinstance(
                  noun.end_token.next0_.getReferent(), GeoReferent))):
             g = Utils.asObjectOrNull(noun.end_token.next0_.getReferent(),
                                      GeoReferent)
             if (noun.termin_item is not None):
                 tyy = noun.termin_item.canonic_text.lower()
                 ooo = False
                 if (g.findSlot(GeoReferent.ATTR_TYPE, tyy, True)
                         is not None):
                     ooo = True
                 elif (tyy.endswith("район") and g.findSlot(
                         GeoReferent.ATTR_TYPE, "район", True) is not None):
                     ooo = True
                 if (ooo):
                     return ReferentToken._new719(g, noun.begin_token,
                                                  noun.end_token.next0_,
                                                  noun.begin_token.morph)
         if ((len(li) == 1 and noun == li[0]
              and li[0].termin_item is not None) and TerrItemToken.tryParse(
                  li[0].end_token.next0_, None, True, False) is None
                 and TerrItemToken.tryParse(li[0].begin_token.previous,
                                            None, True, False) is None):
             if (li[0].morph.number == MorphNumber.PLURAL):
                 return None
             cou = 0
             str0_ = li[0].termin_item.canonic_text.lower()
             tt = li[0].begin_token.previous
             first_pass2898 = True
             while True:
                 if first_pass2898: first_pass2898 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after):
                     cou += 10
                 else:
                     cou += 1
                 if (cou > 500):
                     break
                 g = Utils.asObjectOrNull(tt.getReferent(), GeoReferent)
                 if (g is None):
                     continue
                 ok = True
                 cou = 0
                 tt = li[0].end_token.next0_
                 first_pass2899 = True
                 while True:
                     if first_pass2899: first_pass2899 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_newline_before):
                         cou += 10
                     else:
                         cou += 1
                     if (cou > 500):
                         break
                     tee = TerrItemToken.tryParse(tt, None, True, False)
                     if (tee is None):
                         continue
                     ok = False
                     break
                 if (ok):
                     ii = 0
                     while g is not None and (ii < 3):
                         if (g.findSlot(GeoReferent.ATTR_TYPE, str0_, True)
                                 is not None):
                             return ReferentToken._new719(
                                 g, li[0].begin_token, li[0].end_token,
                                 noun.begin_token.morph)
                         g = g.higher
                         ii += 1
                 break
         return None
     ter = None
     if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))):
         ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent))
     else:
         ter = GeoReferent()
         if (ex_obj is not None):
             geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                         GeoReferent)
             if (geo_ is not None and not geo_.is_city):
                 ter._mergeSlots2(geo_, li[0].kit.base_language)
             else:
                 ter._addName(name)
             if (noun is None and ex_obj.can_be_city):
                 ter._addTypCity(li[0].kit.base_language)
             else:
                 pass
         elif (new_name is not None):
             ter._addName(name)
             if (alt_name is not None):
                 ter._addName(alt_name)
         if (noun is not None):
             if (noun.termin_item.canonic_text == "АО"):
                 ter._addTyp(
                     ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua
                      else "АВТОНОМНЫЙ ОКРУГ"))
             elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ"
                   or noun.termin_item.canonic_text
                   == "МУНІЦИПАЛЬНЕ ЗБОРИ"):
                 ter._addTyp(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ"
                              if li[0].kit.base_language.is_ua else
                              "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"))
             elif (noun.termin_item.acronym == "МО"
                   and add_noun is not None):
                 ter._addTyp(add_noun.termin_item.canonic_text)
             else:
                 if (noun.termin_item.canonic_text == "СОЮЗ"
                         and ex_obj is not None
                         and ex_obj.end_char > noun.end_char):
                     return ReferentToken._new719(ter, ex_obj.begin_token,
                                                  ex_obj.end_token,
                                                  ex_obj.morph)
                 ter._addTyp(noun.termin_item.canonic_text)
                 if (noun.termin_item.is_region and ter.is_state):
                     ter._addTypReg(li[0].kit.base_language)
         if (ter.is_state and ter.is_region):
             for a in adj_list:
                 if (a.termin_item.is_region):
                     ter._addTypReg(li[0].kit.base_language)
                     break
         if (ter.is_state):
             if (full_name is not None):
                 ter._addName(full_name)
     res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token)
     if (noun is not None and noun.morph.class0_.is_noun):
         res.morph = noun.morph
     else:
         res.morph = MorphCollection()
         ii = 0
         while ii < k:
             for v in li[ii].morph.items:
                 bi = MorphBaseInfo(v)
                 if (noun is not None):
                     if (bi.class0_.is_adjective):
                         bi.class0_ = MorphClass.NOUN
                 res.morph.addItem(bi)
             ii += 1
     if (li[0].termin_item is not None
             and li[0].termin_item.is_specific_prefix):
         res.begin_token = li[0].end_token.next0_
     if (add_noun is not None and add_noun.end_char > res.end_char):
         res.end_token = add_noun.end_token
     if ((isinstance(res.begin_token.previous, TextToken))
             and (res.whitespaces_before_count < 2)):
         tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken)
         if (tt.term == "АР"):
             for ty in ter.typs:
                 if ("республика" in ty or "республіка" in ty):
                     res.begin_token = tt
                     break
     return res
Exemple #19
0
 def tryParse(t: 'Token', items: typing.List['NounPhraseItem'],
              attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token):
         res = NounPhraseItem.tryParse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             return res
     if (rt is not None and items is not None and len(items) > 0):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.getReferent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = (t).term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and (wf).is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.getMorphClassInDictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.endsWithEx(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.containsAttr("к.ф.", None)):
                             if (t.getMorphClassInDictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.endsWithEx(s, "ИН", "ЕН", "ЫН",
                                                   None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.endsWith(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.containsAttr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.getIndexerItem(0).containsAttr("в.ср.ст.", None)):
                 return None
         mc1 = t.getMorphClassInDictionary()
         if (mc1 == MorphClass.VERB):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.containsAttr("дейст.з.", None)):
                         if (LanguageHelper.endsWith((t).term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if ((((isinstance(t0, TextToken))) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.endsWith((t0).term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new470(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun)) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.tryAccordVariant(
                         items, (0 if items is None else len(items)), v)):
                     is_doub = False
                     if (v.containsAttr("к.ф.", None)):
                         continue
                     if (v.containsAttr("собир.", None)
                             and not ((isinstance(t, NumberToken)))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.containsAttr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = (t).term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.endsWith(s, "ОВ")
                               and t.getMorphClassInDictionary().is_noun):
                             ok = False
                         elif (wf is not None
                               and ((wf.normal_case == "САМ"
                                     or wf.normal_case == "ТО"))):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif ((v.class0_.is_pronoun and
                    ((items is None or len(items) == 0 or
                      ((len(items) == 1
                        and items[0].can_be_adj_for_personal_pronoun))))
                    and wf is not None) and
                   ((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                       or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                      or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                     or wf.normal_case == "КТО"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.isValue("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 if (NounPhraseItem.tryAccordVariant(
                         items, (0 if items is None else len(items)), v)):
                     it.noun_morph.append(NounPhraseItemTextVar(v, t))
                     it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), False)
             for v in it.noun_morph:
                 v.correctPrefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.getNormalCaseText(
                             None, False, MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.tryParse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.tryParse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.tryParse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0)
                 if (npt1 is not None):
                     mc = LanguageHelper.getCaseAfterPreposition((t).lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = (tt2).term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.isValue("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None
Exemple #20
0
 def __getNameWithoutBrackets(begin: 'Token',
                              end: 'Token',
                              normalize_first_noun_group: bool = False,
                              normal_first_group_single: bool = False,
                              ignore_geo_referent: bool = False) -> str:
     """ Получить строковое значение между токенами, при этом исключая кавычки и скобки
     
     Args:
         begin(Token): начальный токен
         end(Token): конечный токен
         normalize_first_noun_group(bool): нормализовывать ли первую именную группу (именит. падеж)
         normal_first_group_single(bool): приводить ли к единственному числу первую именную группу
         ignore_geo_referent(bool): игнорировать внутри географические сущности
     
     """
     res = None
     if (BracketHelper.canBeStartOfSequence(begin, False, False) and
             BracketHelper.canBeEndOfSequence(end, False, begin, False)):
         begin = begin.next0_
         end = end.previous
     if (normalize_first_noun_group
             and not begin.morph.class0_.is_preposition):
         npt = NounPhraseHelper.tryParse(
             begin, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
         if (npt is not None):
             if (npt.noun.getMorphClassInDictionary().is_undefined
                     and len(npt.adjectives) == 0):
                 npt = (None)
         if (npt is not None and npt.end_token.end_char > end.end_char):
             npt = (None)
         if (npt is not None):
             res = npt.getNormalCaseText(None, normal_first_group_single,
                                         MorphGender.UNDEFINED, False)
             te = npt.end_token.next0_
             if (((te is not None and te.next0_ is not None and te.is_comma)
                  and (isinstance(te.next0_, TextToken))
                  and te.next0_.end_char <= end.end_char)
                     and te.next0_.morph.class0_.is_verb
                     and te.next0_.morph.class0_.is_adjective):
                 for it in te.next0_.morph.items:
                     if (it.gender == npt.morph.gender
                             or (((it.gender) & (npt.morph.gender))) !=
                         (MorphGender.UNDEFINED)):
                         if (not (
                             (it.case_) & npt.morph.case_).is_undefined):
                             if (it.number == npt.morph.number or
                                 (((it.number) & (npt.morph.number))) !=
                                 (MorphNumber.UNDEFINED)):
                                 var = (te.next0_).term
                                 if (isinstance(it, MorphWordForm)):
                                     var = (it).normal_case
                                 bi = MorphBaseInfo._new549(
                                     MorphClass.ADJECTIVE, npt.morph.gender,
                                     npt.morph.number, npt.morph.language)
                                 var = Morphology.getWordform(var, bi)
                                 if (var is not None):
                                     res = "{0}, {1}".format(res, var)
                                     te = te.next0_.next0_
                                 break
             if (te is not None and te.end_char <= end.end_char):
                 s = ProperNameHelper.getNameEx(te, end,
                                                MorphClass.UNDEFINED,
                                                MorphCase.UNDEFINED,
                                                MorphGender.UNDEFINED, True,
                                                ignore_geo_referent)
                 if (not Utils.isNullOrEmpty(s)):
                     if (not str.isalnum(s[0])):
                         res = "{0}{1}".format(res, s)
                     else:
                         res = "{0} {1}".format(res, s)
         elif ((isinstance(begin, TextToken))
               and begin.chars.is_cyrillic_letter):
             mm = begin.getMorphClassInDictionary()
             if (not mm.is_undefined):
                 res = begin.getNormalCaseText(mm, False,
                                               MorphGender.UNDEFINED, False)
                 if (begin.end_char < end.end_char):
                     res = "{0} {1}".format(
                         res,
                         ProperNameHelper.getNameEx(begin.next0_, end,
                                                    MorphClass.UNDEFINED,
                                                    MorphCase.UNDEFINED,
                                                    MorphGender.UNDEFINED,
                                                    True, False))
     if (res is None):
         res = ProperNameHelper.getNameEx(begin, end, MorphClass.UNDEFINED,
                                          MorphCase.UNDEFINED,
                                          MorphGender.UNDEFINED, True,
                                          ignore_geo_referent)
     if (not Utils.isNullOrEmpty(res)):
         k = 0
         i = len(res) - 1
         while i >= 0:
             if (res[i] == '*' or Utils.isWhitespace(res[i])):
                 pass
             else:
                 break
             i -= 1
             k += 1
         if (k > 0):
             if (k == len(res)):
                 return None
             res = res[0:0 + len(res) - k]
     return res
Exemple #21
0
 def tryParse(t: 'Token',
              prev: 'FundsItemToken' = None) -> 'FundsItemToken':
     if (t is None):
         return None
     typ0 = FundsItemTyp.UNDEFINED
     tt = t
     first_pass2766 = True
     while True:
         if first_pass2766: first_pass2766 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_adverb):
             continue
         if ((tt.isValue("СУММА", None) or tt.isValue("ОКОЛО", None)
              or tt.isValue("БОЛЕЕ", None)) or tt.isValue("МЕНЕЕ", None)
                 or tt.isValue("СВЫШЕ", None)):
             continue
         if ((tt.isValue("НОМИНАЛ", None) or tt.isValue("ЦЕНА", None)
              or tt.isValue("СТОИМОСТЬ", None))
                 or tt.isValue("СТОИТЬ", None)):
             typ0 = FundsItemTyp.PRICE
             continue
         if (tt.isValue("НОМИНАЛЬНАЯ", None) or tt.isValue("ОБЩАЯ", None)):
             continue
         if (tt.isValue("СОСТАВЛЯТЬ", None)):
             continue
         re = tt.getReferent()
         if (isinstance(re, OrganizationReferent)):
             return FundsItemToken._new428(t, tt, FundsItemTyp.ORG, re)
         if (isinstance(re, MoneyReferent)):
             if (typ0 == FundsItemTyp.UNDEFINED):
                 typ0 = FundsItemTyp.SUM
             if ((tt.next0_ is not None and tt.next0_.isValue("ЗА", None)
                  and tt.next0_.next0_ is not None)
                     and ((tt.next0_.next0_.isValue("АКЦИЯ", None)
                           or tt.next0_.next0_.isValue("АКЦІЯ", None)))):
                 typ0 = FundsItemTyp.PRICE
             res = FundsItemToken._new428(t, tt, typ0, re)
             return res
         if (re is not None):
             break
         npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
         if (npt is not None and npt.noun.isValue("ПАКЕТ", None)):
             npt = NounPhraseHelper.tryParse(npt.end_token.next0_,
                                             NounPhraseParseAttr.NO, 0)
         if (npt is not None):
             res = None
             if (npt.noun.isValue("АКЦІЯ", None)
                     or npt.noun.isValue("АКЦИЯ", None)):
                 res = FundsItemToken._new430(t, npt.end_token,
                                              FundsItemTyp.NOUN,
                                              FundsKind.STOCK)
                 if (len(npt.adjectives) > 0):
                     for v in FundsItemToken.__m_act_types:
                         if (npt.adjectives[0].isValue(v, None)):
                             res.string_val = npt.getNormalCaseText(
                                 None, True, MorphGender.UNDEFINED,
                                 False).lower()
                             if (res.string_val == "голосовавшая акция"):
                                 res.string_val = "голосующая акция"
                             break
             elif (((npt.noun.isValue("БУМАГА", None)
                     or npt.noun.isValue("ПАПІР", None)))
                   and npt.end_token.previous is not None and
                   ((npt.end_token.previous.isValue("ЦЕННЫЙ", None)
                     or npt.end_token.previous.isValue("ЦІННИЙ", None)))):
                 res = FundsItemToken._new431(t, npt.end_token,
                                              FundsItemTyp.NOUN,
                                              FundsKind.STOCK,
                                              "ценные бумаги")
             elif (((npt.noun.isValue("КАПИТАЛ", None)
                     or npt.noun.isValue("КАПІТАЛ", None)))
                   and len(npt.adjectives) > 0
                   and ((npt.adjectives[0].isValue("УСТАВНОЙ", None)
                         or npt.adjectives[0].isValue("УСТАВНЫЙ", None)
                         or npt.adjectives[0].isValue("СТАТУТНИЙ", None)))):
                 res = FundsItemToken._new430(t, npt.end_token,
                                              FundsItemTyp.NOUN,
                                              FundsKind.CAPITAL)
             if (res is not None):
                 rt = res.kit.processReferent(
                     OrganizationAnalyzer.ANALYZER_NAME,
                     res.end_token.next0_)
                 if (rt is not None):
                     res.ref = rt.referent
                     res.end_token = rt.end_token
                 return res
         if (prev is not None and prev.typ == FundsItemTyp.COUNT):
             val = None
             for v in FundsItemToken.__m_act_types:
                 if (tt.isValue(v, None)):
                     val = v
                     break
             if (val is not None):
                 cou = 0
                 ok = False
                 ttt = tt.previous
                 first_pass2767 = True
                 while True:
                     if first_pass2767: first_pass2767 = False
                     else: ttt = ttt.previous
                     if (not (ttt is not None)): break
                     cou += 1
                     if ((cou) > 100):
                         break
                     refs = ttt.getReferents()
                     if (refs is None):
                         continue
                     for r in refs:
                         if (isinstance(r, FundsReferent)):
                             ok = True
                             break
                     if (ok):
                         break
                 cou = 0
                 if (not ok):
                     ttt = tt.next0_
                     while ttt is not None:
                         cou += 1
                         if ((cou) > 100):
                             break
                         fi = FundsItemToken.tryParse(ttt, None)
                         if (fi is not None and fi.kind == FundsKind.STOCK):
                             ok = True
                             break
                         ttt = ttt.next0_
                 if (ok):
                     res = FundsItemToken._new433(t, tt, FundsKind.STOCK,
                                                  FundsItemTyp.NOUN)
                     res.string_val = "{0}ая акция".format(
                         val[0:0 + len(val) - 2].lower())
                     return res
         if (isinstance(tt, NumberToken)):
             num = NumberHelper.tryParseNumberWithPostfix(tt)
             if (num is not None):
                 if (tt.previous is not None
                         and tt.previous.isValue("НА", None)):
                     break
                 if (num.ex_typ == NumberExType.PERCENT):
                     res = FundsItemToken._new434(t, num.end_token,
                                                  FundsItemTyp.PERCENT, num)
                     t = num.end_token.next0_
                     if (t is not None and
                         ((t.isChar('+') or t.isValue("ПЛЮС", None)))
                             and (isinstance(t.next0_, NumberToken))):
                         res.end_token = t.next0_
                         t = res.end_token.next0_
                     if ((t is not None and t.is_hiphen
                          and t.next0_ is not None)
                             and t.next0_.chars.is_all_lower
                             and not t.is_whitespace_after):
                         t = t.next0_.next0_
                     if (t is not None
                             and ((t.isValue("ДОЛЯ", None)
                                   or t.isValue("ЧАСТКА", None)))):
                         res.end_token = t
                     return res
                 break
             t1 = tt
             if (t1.next0_ is not None
                     and t1.next0_.isValue("ШТУКА", None)):
                 t1 = t1.next0_
             return FundsItemToken._new434(
                 t, t1, FundsItemTyp.COUNT,
                 Utils.asObjectOrNull(tt, NumberToken))
         break
     return None
Exemple #22
0
 def __tryAttach(self, t : 'Token', key_word : bool) -> 'ReferentToken':
     if (t is None): 
         return None
     t0 = t
     t1 = t
     uris_keys = None
     uris = None
     org0_ = None
     cor_org = None
     org_is_bank = False
     empty = 0
     last_uri = None
     first_pass2749 = True
     while True:
         if first_pass2749: first_pass2749 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char and t != t0): 
             break
         if (t.is_comma or t.morph.class0_.is_preposition or t.isCharOf("/\\")): 
             continue
         bank_keyword = False
         if (t.isValue("ПОЛНЫЙ", None) and t.next0_ is not None and ((t.next0_.isValue("НАИМЕНОВАНИЕ", None) or t.next0_.isValue("НАЗВАНИЕ", None)))): 
             t = t.next0_.next0_
             if (t is None): 
                 break
         if (t.isValue("БАНК", None)): 
             if ((isinstance(t, ReferentToken)) and t.getReferent().type_name == "ORGANIZATION"): 
                 bank_keyword = True
             tt = t.next0_
             npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
             if (npt is not None): 
                 tt = npt.end_token.next0_
             if (tt is not None and tt.isChar(':')): 
                 tt = tt.next0_
             if (tt is not None): 
                 if (not bank_keyword): 
                     t = tt
                     bank_keyword = True
                 elif (tt.getReferent() is not None and tt.getReferent().type_name == "ORGANIZATION"): 
                     t = tt
         r = t.getReferent()
         if (r is not None and r.type_name == "ORGANIZATION"): 
             is_bank = False
             kk = 0
             rr = r
             while rr is not None and (kk < 4): 
                 is_bank = Utils.compareStrings(Utils.ifNotNull(rr.getStringValue("KIND"), ""), "Bank", True) == 0
                 if (is_bank): 
                     break
                 rr = rr.parent_referent; kk += 1
             if (not is_bank and bank_keyword): 
                 is_bank = True
             if (not is_bank and uris is not None and "ИНН" in uris_keys): 
                 return None
             if ((last_uri is not None and last_uri.scheme == "К/С" and t.previous is not None) and t.previous.isValue("В", None)): 
                 cor_org = r
                 t1 = t
             elif (org0_ is None or ((not org_is_bank and is_bank))): 
                 org0_ = r
                 t1 = t
                 org_is_bank = is_bank
                 if (is_bank): 
                     continue
             if (uris is None and not key_word): 
                 return None
             continue
         if (isinstance(r, UriReferent)): 
             u = Utils.asObjectOrNull(r, UriReferent)
             if (uris is None): 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     return None
                 if (u.scheme == "ИНН" and t.is_newline_after): 
                     return None
                 uris = list()
                 uris_keys = list()
             else: 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     break
                 if (u.scheme in uris_keys): 
                     break
                 if (u.scheme == "ИНН"): 
                     if (empty > 0): 
                         break
             uris_keys.append(u.scheme)
             uris.append(u)
             last_uri = u
             t1 = t
             empty = 0
             continue
         elif (uris is None and not key_word and not org_is_bank): 
             return None
         if (r is not None and ((r.type_name == "GEO" or r.type_name == "ADDRESS"))): 
             empty += 1
             continue
         if (isinstance(t, TextToken)): 
             if (t.isValue("ПОЛНЫЙ", None) or t.isValue("НАИМЕНОВАНИЕ", None) or t.isValue("НАЗВАНИЕ", None)): 
                 pass
             elif (t.chars.is_letter): 
                 tok = BankAnalyzer.__m_ontology.tryParse(t, TerminParseAttr.NO)
                 if (tok is not None): 
                     t = tok.end_token
                     empty = 0
                 else: 
                     empty += 1
                     if (t.is_newline_before): 
                         nnn = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
                         if (nnn is not None and nnn.end_token.next0_ is not None and nnn.end_token.next0_.isChar(':')): 
                             break
                 if (uris is None): 
                     break
         if (empty > 2): 
             break
         if (empty > 0 and t.isChar(':') and t.is_newline_after): 
             break
         if (((isinstance(t, NumberToken)) and t.is_newline_before and t.next0_ is not None) and not t.next0_.chars.is_letter): 
             break
     if (uris is None): 
         return None
     if (not "Р/С" in uris_keys and not "Л/С" in uris_keys): 
         return None
     ok = False
     if ((len(uris) < 2) and org0_ is None): 
         return None
     bdr = BankDataReferent()
     for u in uris: 
         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     if (org0_ is not None): 
         bdr.addSlot(BankDataReferent.ATTR_BANK, org0_, False, 0)
     if (cor_org is not None): 
         bdr.addSlot(BankDataReferent.ATTR_CORBANK, cor_org, False, 0)
     org0 = (None if t0.previous is None else t0.previous.getReferent())
     if (org0 is not None and org0.type_name == "ORGANIZATION"): 
         for s in org0.slots: 
             if (isinstance(s.value, UriReferent)): 
                 u = Utils.asObjectOrNull(s.value, UriReferent)
                 if (BankAnalyzer.__isBankReq(u.scheme)): 
                     if (not u.scheme in uris_keys): 
                         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     return ReferentToken(bdr, t0, t1)
Exemple #23
0
 def tryAttachList(t: 'Token',
                   max_count: int = 20) -> typing.List['DateItemToken']:
     """ Привязать примитивы в контейнере с указанной позиции
     
     Args:
         cnt: 
         indFrom: 
     
     Returns:
         typing.List[DateItemToken]: Список примитивов
     """
     p = DateItemToken.tryAttach(t, None)
     if (p is None):
         return None
     if (p.typ == DateItemToken.DateItemType.DELIM):
         return None
     res = list()
     res.append(p)
     tt = p.end_token.next0_
     while tt is not None:
         if (isinstance(tt, TextToken)):
             if ((tt).checkValue(DateItemToken.M_EMPTY_WORDS) is not None):
                 tt = tt.next0_
                 continue
         p0 = DateItemToken.tryAttach(tt, res)
         if (p0 is None):
             if (tt.is_newline_before):
                 break
             if (tt.chars.is_latin_letter):
                 break
             if (tt.morph is not None
                     and tt.morph.check((MorphClass.ADJECTIVE)
                                        | MorphClass.PRONOUN)):
                 tt = tt.next0_
                 continue
             break
         if (tt.is_newline_before):
             if (p.typ == DateItemToken.DateItemType.MONTH
                     and p0.can_be_year):
                 pass
             elif (p.typ == DateItemToken.DateItemType.NUMBER
                   and p.can_be_day
                   and p0.typ == DateItemToken.DateItemType.MONTH):
                 pass
             else:
                 break
         if (p0.can_be_year
                 and p0.typ == DateItemToken.DateItemType.NUMBER):
             if (p.typ == DateItemToken.DateItemType.HALFYEAR
                     or p.typ == DateItemToken.DateItemType.QUARTAL):
                 p0.typ = DateItemToken.DateItemType.YEAR
             elif (p.typ == DateItemToken.DateItemType.POINTER
                   and p0.int_value > 1990):
                 p0.typ = DateItemToken.DateItemType.YEAR
         p = p0
         res.append(p)
         if (max_count > 0 and len(res) >= max_count):
             break
         tt = p.end_token.next0_
     for i in range(len(res) - 1, -1, -1):
         if (res[i].typ == DateItemToken.DateItemType.DELIM):
             del res[i]
         else:
             break
     if (len(res) > 0 and res[len(res) - 1].typ
             == DateItemToken.DateItemType.NUMBER):
         nex = NumberHelper.tryParseNumberWithPostfix(res[len(res) -
                                                          1].begin_token)
         if (nex is not None and nex.ex_typ != NumberExType.HOUR):
             if (len(res) > 3 and res[len(res) - 2].typ
                     == DateItemToken.DateItemType.DELIM
                     and res[len(res) - 2].string_value == ":"):
                 pass
             else:
                 del res[len(res) - 1]
     if (len(res) == 0):
         return None
     i = 1
     while i < (len(res) - 1):
         if (res[i].typ == DateItemToken.DateItemType.DELIM
                 and res[i].begin_token.is_comma):
             if ((i == 1
                  and res[i - 1].typ == DateItemToken.DateItemType.MONTH
                  and res[i + 1].can_be_year)
                     and (i + 1) == (len(res) - 1)):
                 del res[i]
         i += 1
     if (res[len(res) - 1].typ == DateItemToken.DateItemType.NUMBER):
         rr = res[len(res) - 1]
         npt = NounPhraseHelper.tryParse(rr.begin_token,
                                         NounPhraseParseAttr.NO, 0)
         if (npt is not None and npt.end_char > rr.end_char):
             del res[len(res) - 1]
             if (len(res) > 0 and res[len(res) - 1].typ
                     == DateItemToken.DateItemType.DELIM):
                 del res[len(res) - 1]
     if (len(res) == 0):
         return None
     if (len(res) == 2 and not res[0].is_whitespace_after):
         if (not res[0].is_whitespace_before
                 and not res[1].is_whitespace_after):
             return None
     return res
Exemple #24
0
 def __tryParse(t: 'Token',
                is_in_lit: bool,
                max_char: int = 0) -> typing.List['ReferentToken']:
     if (t is None):
         return None
     is_bracket_regime = False
     if (t.previous is not None and t.previous.isChar('(')):
         is_bracket_regime = True
     blt = BookLinkToken.tryParse(t, 0)
     if (blt is None):
         blt = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
     if (blt is None and not is_bracket_regime):
         return None
     t0 = t
     coef = 0
     is_electr_res = False
     decree = None
     regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED
     num = None
     spec_see = None
     book_prev = None
     if (is_bracket_regime):
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.PERSON):
         if (not is_in_lit):
             return None
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.NUMBER):
         num = blt.value
         t = blt.end_token.next0_
         if (t is None or t.is_newline_before):
             return None
         if (not t.is_whitespace_before):
             if (isinstance(t, NumberToken)):
                 n = (t).value
                 if ((((n == "3" or n == "0")) and not t.is_whitespace_after
                      and (isinstance(t.next0_, TextToken)))
                         and t.next0_.chars.is_all_lower):
                     pass
                 else:
                     return None
             elif (not ((isinstance(t, TextToken)))
                   or t.chars.is_all_lower):
                 r = t.getReferent()
                 if (isinstance(r, PersonReferent)):
                     pass
                 elif (is_in_lit and r is not None
                       and r.type_name == "DECREE"):
                     pass
                 else:
                     return None
         first_pass2757 = True
         while True:
             if first_pass2757: first_pass2757 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, NumberToken)):
                 break
             if (not ((isinstance(t, TextToken)))):
                 break
             if (BracketHelper.canBeStartOfSequence(t, True, False)):
                 break
             if (not t.chars.is_letter):
                 continue
             bbb = BookLinkToken.tryParse(t, 0)
             if (bbb is not None):
                 if (bbb.typ == BookLinkTyp.TAMZE):
                     spec_see = bbb
                     t = bbb.end_token.next0_
                     break
                 if (bbb.typ == BookLinkTyp.SEE):
                     t = bbb.end_token
                     continue
             break
         if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE):
             coef += 1
             max0_ = 1000
             tt = t0
             while tt is not None and max0_ > 0:
                 if (isinstance(tt.getReferent(), BookLinkRefReferent)):
                     book_prev = (tt.getReferent()).book
                     break
                 tt = tt.previous
                 max0_ -= 1
         blt1 = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
         if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON):
             regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
         else:
             ok = False
             tt = t
             first_pass2758 = True
             while True:
                 if first_pass2758: first_pass2758 = False
                 else: tt = (None if tt is None else tt.next0_)
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (is_in_lit and tt.getReferent() is not None
                         and tt.getReferent().type_name == "DECREE"):
                     ok = True
                     decree = tt
                     break
                 bbb = BookLinkToken.tryParse(tt, 0)
                 if (bbb is None):
                     continue
                 if (bbb.typ == BookLinkTyp.ELECTRONRES):
                     is_electr_res = True
                     ok = True
                     break
                 if (bbb.typ == BookLinkTyp.DELIMETER):
                     tt = bbb.end_token.next0_
                     if (BookLinkToken.tryParseAuthor(
                             tt, FioTemplateType.UNDEFINED) is not None):
                         ok = True
                         break
                     bbb = BookLinkToken.tryParse(tt, 0)
                     if (bbb is not None):
                         if (bbb.typ == BookLinkTyp.EDITORS
                                 or bbb.typ == BookLinkTyp.TRANSLATE
                                 or bbb.typ == BookLinkTyp.SOSTAVITEL):
                             ok = True
                             break
             if (not ok and not is_in_lit):
                 if (BookLinkToken.checkLinkBefore(t0, num)):
                     pass
                 else:
                     return None
             regtyp = BookLinkAnalyzer.RegionTyp.NAME
     else:
         return None
     res = BookLinkReferent()
     corr_authors = list()
     t00 = t
     blt00 = None
     start_of_name = None
     prev_pers_templ = FioTemplateType.UNDEFINED
     if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS):
         first_pass2759 = True
         while True:
             if first_pass2759: first_pass2759 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (max_char > 0 and t.begin_char >= max_char):
                 break
             if (t.isCharOf(".;") or t.is_comma_and):
                 continue
             if (t.isChar('/')):
                 break
             if ((t.isChar('(') and t.next0_ is not None
                  and t.next0_.isValue("EDS", None))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.isChar(')')):
                 t = t.next0_.next0_.next0_
                 break
             blt = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt is None and t.previous is not None
                     and t.previous.is_and):
                 blt = BookLinkToken.tryParseAuthor(
                     t.previous, FioTemplateType.UNDEFINED)
             if (blt is None):
                 if ((isinstance(t.getReferent(), OrganizationReferent))
                         and blt00 is not None):
                     bbb2 = BookLinkToken.tryParse(t.next0_, 0)
                     if (bbb2 is not None):
                         if (bbb2.typ == BookLinkTyp.YEAR):
                             res.addSlot(BookLinkReferent.ATTR_AUTHOR,
                                         t.getReferent(), False, 0)
                             res.year = int(bbb2.value)
                             coef += .5
                             t = bbb2.end_token.next0_
                 break
             if (blt.typ == BookLinkTyp.PERSON):
                 tt2 = blt.end_token.next0_
                 bbb2 = BookLinkToken.tryParse(tt2, 0)
                 if (bbb2 is not None):
                     if (bbb2.typ == BookLinkTyp.YEAR):
                         res.year = int(bbb2.value)
                         coef += .5
                         blt.end_token = bbb2.end_token
                         blt00 = (None)
                 if (blt00 is not None
                         and ((blt00.end_token.next0_ == blt.begin_token
                               or blt.begin_token.previous.isChar('.')))):
                     tt11 = blt.end_token.next0_
                     nex = BookLinkToken.tryParse(tt11, 0)
                     if (nex is not None
                             and nex.typ == BookLinkTyp.ANDOTHERS):
                         pass
                     else:
                         if (tt11 is None):
                             break
                         if (tt11.isChar('/') and tt11.next0_ is not None
                                 and tt11.next0_.isChar('/')):
                             break
                         if (tt11.isChar(':')):
                             break
                         if ((str(blt).find('.') < 0)
                                 and str(blt00).find('.') > 0):
                             break
                         if ((isinstance(tt11, TextToken))
                                 and tt11.chars.is_all_lower):
                             break
                         if (tt11.isCharOf(",.;")
                                 and tt11.next0_ is not None):
                             tt11 = tt11.next0_
                         nex = BookLinkToken.tryParse(tt11, 0)
                         if (nex is not None
                                 and nex.typ != BookLinkTyp.PERSON
                                 and nex.typ != BookLinkTyp.ANDOTHERS):
                             break
                 elif (
                     (blt00 is not None
                      and blt00.person_template != FioTemplateType.UNDEFINED
                      and blt.person_template != blt00.person_template)
                         and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     if (blt.end_token.next0_ is None
                             or not blt.end_token.next0_.is_comma_and):
                         break
                     if (BookLinkToken.tryParseAuthor(
                             blt.end_token.next0_.next0_,
                             FioTemplateType.UNDEFINED) is not None):
                         pass
                     else:
                         break
                 if (blt00 is None and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     tt = blt.end_token.next0_
                     if (tt is not None and tt.is_hiphen):
                         tt = tt.next0_
                     if (isinstance(tt, NumberToken)):
                         break
                 BookLinkAnalyzer.__addAuthor(res, blt)
                 coef += 1
                 t = blt.end_token
                 if (isinstance(t.getReferent(), PersonReferent)):
                     corr_authors.append(
                         Utils.asObjectOrNull(t, ReferentToken))
                 blt00 = blt
                 prev_pers_templ = blt.person_template
                 start_of_name = blt.start_of_name
                 if ((start_of_name) is not None):
                     t = t.next0_
                     break
                 continue
             if (blt.typ == BookLinkTyp.ANDOTHERS):
                 coef += .5
                 t = blt.end_token.next0_
                 res.authors_and_other = True
                 break
             break
     if (t is None):
         return None
     if ((t.is_newline_before and t != t0 and num is None) and res.findSlot(
             BookLinkReferent.ATTR_AUTHOR, None, True) is None):
         return None
     if (start_of_name is None):
         if (t.chars.is_all_lower):
             coef -= (1)
         if (t.chars.is_latin_letter and not is_electr_res and num is None):
             if (res.getSlotValue(BookLinkReferent.ATTR_AUTHOR) is None):
                 return None
     tn0 = t
     tn1 = None
     uri = None
     next_num = None
     wrapnn393 = RefOutArgWrapper(0)
     inoutres394 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn393)
     nn = wrapnn393.value
     if (inoutres394):
         next_num = str((nn + 1))
     br = (BracketHelper.tryParse(
         t,
         Utils.valToEnum(
             (BracketParseAttr.CANCONTAINSVERBS) |
             (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100)
           if BracketHelper.canBeStartOfSequence(t, True, False) else None)
     if (br is not None):
         t = t.next0_
     pages = None
     first_pass2760 = True
     while True:
         if first_pass2760: first_pass2760 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (br is not None and br.end_token == t):
             tn1 = t
             break
         tit = TitleItemToken.tryAttach(t)
         if (tit is not None):
             if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t
                  and br is None) and BracketHelper.canBeStartOfSequence(
                      tit.end_token.next0_, True, False)):
                 br = BracketHelper.tryParse(tit.end_token.next0_,
                                             BracketParseAttr.NO, 100)
                 if (br is not None):
                     coef += (1)
                     if (num is not None):
                         coef += 1
                     tn0 = br.begin_token
                     tn1 = br.end_token
                     res.typ = tit.value.lower()
                     t = br.end_token.next0_
                     break
         if (t.is_newline_before and t != tn0):
             if (br is not None and (t.end_char < br.end_char)):
                 pass
             elif (not MiscHelper.canBeStartOfSentence(t)):
                 pass
             else:
                 if (t.newlines_before_count > 1):
                     break
                 if ((isinstance(t, NumberToken)) and num is not None
                         and (t).int_value is not None):
                     if (num == str(((t).int_value - 1))):
                         break
                 elif (num is not None):
                     pass
                 else:
                     nnn = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.PARSEPREPOSITION) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))
                             | (NounPhraseParseAttr.MULTILINES),
                             NounPhraseParseAttr), 0)
                     if (nnn is not None and nnn.end_char >= t.end_char):
                         pass
                     else:
                         break
         if (t.isCharOf(".;") and t.whitespaces_after_count > 0):
             tit = TitleItemToken.tryAttach(t.next0_)
             if ((tit) is not None):
                 if (tit.typ == TitleItemToken.Types.TYP):
                     break
             stop = True
             words = 0
             notwords = 0
             tt = t.next0_
             first_pass2761 = True
             while True:
                 if first_pass2761: first_pass2761 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 blt0 = BookLinkToken.tryParse(tt, 0)
                 if (blt0 is None):
                     if (tt.is_newline_before):
                         break
                     if ((isinstance(tt, TextToken)) and
                             not tt.getMorphClassInDictionary().is_undefined
                         ):
                         words += 1
                     else:
                         notwords += 1
                     if (words > 6 and words > (notwords * 4)):
                         stop = False
                         break
                     continue
                 if ((blt0.typ == BookLinkTyp.DELIMETER
                      or blt0.typ == BookLinkTyp.TRANSLATE
                      or blt0.typ == BookLinkTyp.TYPE)
                         or blt0.typ == BookLinkTyp.GEO
                         or blt0.typ == BookLinkTyp.PRESS):
                     stop = False
                 break
             if (br is not None
                     and br.end_token.previous.end_char > t.end_char):
                 stop = False
             if (stop):
                 break
         if (t == decree):
             t = t.next0_
             break
         blt = BookLinkToken.tryParse(t, 0)
         if (blt is None):
             tn1 = t
             continue
         if (blt.typ == BookLinkTyp.DELIMETER):
             break
         if (((blt.typ == BookLinkTyp.MISC or blt.typ
               == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL)
              or blt.typ == BookLinkTyp.TYPE
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.PAGERANGE
                 or blt.typ == BookLinkTyp.PAGES):
             coef += 1
             break
         if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS):
             if (t.previous.is_hiphen or t.previous.isCharOf(".;")
                     or blt.add_coef > 0):
                 break
         if (blt.typ == BookLinkTyp.YEAR):
             if (t.previous is not None and t.previous.is_comma):
                 break
         if (blt.typ == BookLinkTyp.ELECTRONRES):
             is_electr_res = True
             break
         if (blt.typ == BookLinkTyp.URL):
             if (t == tn0 or t.previous.isCharOf(":.")):
                 is_electr_res = True
                 break
         tn1 = t
     if (tn1 is None and start_of_name is None):
         if (is_electr_res):
             uri_re = BookLinkReferent()
             rt0 = ReferentToken(uri_re, t00, t)
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(uri_re)
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, rt0.end_token)
             ok = False
             while t is not None:
                 if (t.is_newline_before):
                     break
                 blt0 = BookLinkToken.tryParse(t, 0)
                 if (blt0 is not None):
                     if (isinstance(blt0.ref, UriReferent)):
                         uri_re.addSlot(
                             BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt0.ref, UriReferent),
                             False, 0)
                         ok = True
                     t = blt0.end_token
                 rt0.end_token = rt01.end_token = t
                 t = t.next0_
             if (ok):
                 rts0.append(rt01)
                 rts0.append(rt0)
                 return rts0
         if (decree is not None and num is not None):
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(decree.getReferent())
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, decree)
             t = decree.next0_
             while t is not None:
                 if (t.is_newline_before):
                     break
                 if (isinstance(t, TextToken)):
                     if ((t).is_pure_verb):
                         return None
                 rt01.end_token = t
                 t = t.next0_
             rts0.append(rt01)
             return rts0
         if (book_prev is not None):
             tt = t
             while tt is not None and ((tt.isCharOf(",.") or tt.is_hiphen)):
                 tt = tt.next0_
             blt0 = BookLinkToken.tryParse(tt, 0)
             if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE):
                 rts0 = list()
                 bref0 = BookLinkRefReferent._new389(book_prev)
                 if (num is not None):
                     bref0.number = num
                 bref0.pages = blt0.value
                 rt00 = ReferentToken(bref0, t0, blt0.end_token)
                 rts0.append(rt00)
                 return rts0
         return None
     if (br is not None
             and ((tn1 == br.end_token or tn1 == br.end_token.previous))):
         tn0 = tn0.next0_
         tn1 = tn1.previous
     if (start_of_name is None):
         while tn0 is not None:
             if (tn0.isCharOf(":,~")):
                 tn0 = tn0.next0_
             else:
                 break
     while tn1 is not None and tn1.begin_char > tn0.begin_char:
         if (tn1.isCharOf(".;,:(~") or tn1.is_hiphen
                 or tn1.isValue("РЕД", None)):
             pass
         else:
             break
         tn1 = tn1.previous
     nam = MiscHelper.getTextValue(
         tn0, tn1,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     if (start_of_name is not None):
         if (nam is None or (len(nam) < 3)):
             nam = start_of_name
         else:
             nam = "{0}{1}{2}".format(
                 start_of_name, (" " if tn0.is_whitespace_before else ""),
                 nam)
     if (nam is None):
         return None
     res.name = nam
     if (num is None and not is_in_lit):
         if (len(nam) < 20):
             return None
         coef -= (2)
     if (len(nam) > 500):
         coef -= (math.floor(len(nam) / 500))
     if (is_bracket_regime):
         coef -= 1
     if (len(nam) > 200):
         if (num is None):
             return None
         if (res.findSlot(BookLinkReferent.ATTR_AUTHOR, None, True) is None
                 and not BookLinkToken.checkLinkBefore(t0, num)):
             return None
     en = 0
     ru = 0
     ua = 0
     cha = 0
     nocha = 0
     chalen = 0
     lt0 = tn0
     lt1 = tn1
     if (tn1 is None):
         if (t is None):
             return None
         lt0 = t0
         lt1 = t
         tn1 = t.previous
     tt = lt0
     while tt is not None and tt.end_char <= lt1.end_char:
         if ((isinstance(tt, TextToken)) and tt.chars.is_letter):
             if (tt.chars.is_latin_letter):
                 en += 1
             elif (tt.morph.language.is_ua):
                 ua += 1
             elif (tt.morph.language.is_ru):
                 ru += 1
             if (tt.length_char > 2):
                 cha += 1
                 chalen += tt.length_char
         elif (not ((isinstance(tt, ReferentToken)))):
             nocha += 1
         tt = tt.next0_
     if (ru > (ua + en)):
         res.lang = "RU"
     elif (ua > (ru + en)):
         res.lang = "UA"
     elif (en > (ru + ua)):
         res.lang = "EN"
     if (nocha > 3 and nocha > cha and start_of_name is None):
         if (nocha > (math.floor(chalen / 3))):
             coef -= (2)
     if (res.lang == "EN"):
         tt = tn0.next0_
         first_pass2762 = True
         while True:
             if first_pass2762: first_pass2762 = False
             else: tt = tt.next0_
             if (not (tt is not None and (tt.end_char < tn1.end_char))):
                 break
             if (tt.is_comma and tt.next0_ is not None
                     and ((not tt.next0_.chars.is_all_lower or
                           (isinstance(tt.next0_, ReferentToken))))):
                 if (tt.next0_.next0_ is not None
                         and tt.next0_.next0_.is_comma_and):
                     if (isinstance(tt.next0_, ReferentToken)):
                         pass
                     else:
                         continue
                 nam = MiscHelper.getTextValue(
                     tn0, tt.previous,
                     Utils.valToEnum((GetTextAttr.KEEPQUOTES) |
                                     (GetTextAttr.KEEPREGISTER),
                                     GetTextAttr))
                 if (nam is not None and len(nam) > 15):
                     res.name = nam
                     break
     rt = ReferentToken(res, t00, tn1)
     authors = True
     edits = False
     br = (None)
     first_pass2763 = True
     while True:
         if first_pass2763: first_pass2763 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (BracketHelper.canBeStartOfSequence(t, False, False)):
             br = BracketHelper.tryParse(t, BracketParseAttr.CANBEMANYLINES,
                                         100)
             if (br is not None and br.length_char > 300):
                 br = (None)
         blt = BookLinkToken.tryParse(t, 0)
         if (t.is_newline_before and not t.isChar('/')
                 and not t.previous.isChar('/')):
             if (blt is not None and blt.typ == BookLinkTyp.NUMBER):
                 break
             if (t.previous.isCharOf(":")):
                 pass
             elif (blt is not None and ((
                 ((blt.typ == BookLinkTyp.DELIMETER or blt.typ
                   == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES)
                  or blt.typ == BookLinkTyp.GEO or blt.typ
                  == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))):
                 pass
             elif (num is not None and BookLinkToken.tryParseAuthor(
                     t, FioTemplateType.UNDEFINED) is not None):
                 pass
             elif (num is not None and blt is not None
                   and blt.typ != BookLinkTyp.NUMBER):
                 pass
             elif (br is not None and (t.end_char < br.end_char)
                   and t.begin_char > br.begin_char):
                 pass
             else:
                 ok = False
                 mmm = 50
                 tt = t.next0_
                 while tt is not None and mmm > 0:
                     if (tt.is_newline_before):
                         blt2 = BookLinkToken.tryParse(tt, 0)
                         if (blt2 is not None
                                 and blt2.typ == BookLinkTyp.NUMBER
                                 and blt2.value == next_num):
                             ok = True
                             break
                         if (blt2 is not None):
                             if (blt2.typ == BookLinkTyp.PAGES
                                     or blt2.typ == BookLinkTyp.GEO
                                     or blt2.typ == BookLinkTyp.PRESS):
                                 ok = True
                                 break
                     tt = tt.next0_
                     mmm -= 1
                 if (not ok):
                     npt = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.MULTILINES) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSEPREPOSITION)) |
                             (NounPhraseParseAttr.PARSEVERBS) |
                             (NounPhraseParseAttr.PARSEPRONOUNS),
                             NounPhraseParseAttr), 0)
                     if (npt is not None and npt.end_char >= t.end_char):
                         ok = True
                 if (not ok):
                     break
         rt.end_token = t
         if (blt is not None):
             rt.end_token = blt.end_token
         if (t.isCharOf(".,") or t.is_hiphen):
             continue
         if (t.isValue("С", None)):
             pass
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.EDITORS):
             edits = True
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.SOSTAVITEL):
             edits = False
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors):
             blt2 = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON):
                 prev_pers_templ = blt2.person_template
                 if (not edits):
                     BookLinkAnalyzer.__addAuthor(res, blt2)
                 coef += 1
                 t = blt2.end_token
                 continue
             if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS):
                 if (not edits):
                     res.authors_and_other = True
                 coef += 1
                 t = blt2.end_token
                 continue
             authors = False
         if (blt is None):
             continue
         if (blt.typ == BookLinkTyp.ELECTRONRES
                 or blt.typ == BookLinkTyp.URL):
             is_electr_res = True
             if (blt.typ == BookLinkTyp.ELECTRONRES):
                 coef += 1.5
             else:
                 coef += .5
             if (isinstance(blt.ref, UriReferent)):
                 res.addSlot(BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt.ref, UriReferent),
                             False, 0)
         elif (blt.typ == BookLinkTyp.YEAR):
             if (res.year == 0):
                 res.year = int(blt.value)
                 coef += .5
         elif (blt.typ == BookLinkTyp.DELIMETER):
             coef += 1
             if (blt.length_char == 2):
                 regtyp = BookLinkAnalyzer.RegionTyp.SECOND
             else:
                 regtyp = BookLinkAnalyzer.RegionTyp.FIRST
         elif (
             (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE
                or blt.typ == BookLinkTyp.PAGES) or blt.typ
               == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE)
              or blt.typ == BookLinkTyp.PRESS
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.N):
             coef += 1
         elif (blt.typ == BookLinkTyp.PAGERANGE):
             pages = blt
             coef += 1
             if (is_bracket_regime and blt.end_token.next0_ is not None
                     and blt.end_token.next0_.isChar(')')):
                 coef += (2)
                 if (res.name is not None
                         and res.findSlot(BookLinkReferent.ATTR_AUTHOR,
                                          None, True) is not None):
                     coef = (10)
         elif (blt.typ == BookLinkTyp.GEO
               and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND
                     or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))):
             coef += 1
         elif (blt.typ == BookLinkTyp.GEO and t.previous is not None
               and t.previous.isChar('.')):
             coef += 1
         elif (blt.typ == BookLinkTyp.ANDOTHERS):
             coef += 1
             if (authors):
                 res.authors_and_other = True
         coef += blt.add_coef
         t = blt.end_token
     if ((coef < 2.5) and num is not None):
         if (BookLinkToken.checkLinkBefore(t0, num)):
             coef += (2)
         elif (BookLinkToken.checkLinkAfter(rt.end_token, num)):
             coef += (1)
     if (rt.length_char > 500):
         return None
     if (is_in_lit):
         coef += 1
     if (coef < 2.5):
         if (is_electr_res and uri is not None):
             pass
         elif (coef >= 2 and is_in_lit):
             pass
         else:
             return None
     for rr in corr_authors:
         pits0 = PersonItemToken.tryAttachList(
             rr.begin_token, None,
             PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10)
         if (pits0 is None or (len(pits0) < 2)):
             continue
         if (pits0[0].typ == PersonItemToken.ItemType.VALUE):
             exi = False
             for i in range(len(rr.referent.slots) - 1, -1, -1):
                 s = rr.referent.slots[i]
                 if (s.type_name == PersonReferent.ATTR_LASTNAME):
                     ln = Utils.asObjectOrNull(s.value, str)
                     if (ln is None):
                         continue
                     if (ln == pits0[0].value):
                         exi = True
                         continue
                     if (ln.find('-') > 0):
                         ln = ln[0:0 + ln.find('-')]
                     if (pits0[0].begin_token.isValue(ln, None)):
                         del rr.referent.slots[i]
             if (not exi):
                 rr.referent.addSlot(PersonReferent.ATTR_LASTNAME,
                                     pits0[0].value, False, 0)
     rts = list()
     bref = BookLinkRefReferent._new389(res)
     if (num is not None):
         bref.number = num
     rt1 = ReferentToken(bref, t0, rt.end_token)
     if (pages is not None):
         if (pages.value is not None):
             bref.pages = pages.value
         rt.end_token = pages.begin_token.previous
     rts.append(rt1)
     rts.append(rt)
     return rts
Exemple #25
0
 def tryParse(t: 'Token',
              add_units: 'TerminCollection',
              can_be_set: bool = True,
              can_units_absent: bool = False) -> 'MeasureToken':
     """ Выделение вместе с наименованием
     
     Args:
         t(Token): 
     
     """
     if (not ((isinstance(t, TextToken)))):
         return None
     if (t.is_table_control_char):
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1516 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516)
     minmax = wrapminmax1516.value
     if (tt is not None):
         t = tt.next0_
     npt = NounPhraseHelper.tryParse(
         t,
         Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) |
                         (NounPhraseParseAttr.IGNOREBRACKETS),
                         NounPhraseParseAttr), 0)
     if (npt is None):
         whd = NumbersWithUnitToken._tryParseWHL(t)
         if (whd is not None):
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.isValue("КПД", None)):
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3
               and t0.getMorphClassInDictionary().is_undefined):
             npt = NounPhraseToken(t0, t0)
         else:
             return None
     elif (NumberHelper.tryParseRealNumber(t, True) is not None):
         return None
     else:
         dtok = DateItemToken.tryAttach(t, None)
         if (dtok is not None):
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3037 = True
     while True:
         if first_pass3037: first_pass3037 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before):
             break
         if (tt.is_table_control_char):
             break
         wrapminmax1510 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510)
         minmax = wrapminmax1510.value
         if (tt2 is not None):
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None)
              or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None)
                 or
             ((tt.isValue("СОСТАВЛЯТЬ", None)
               and not tt.getMorphClassInDictionary().is_adjective))):
             t = tt
             t1 = t
             if (tt.previous.isValue("НЕ", None)):
                 not0_ = True
             continue
         www = NumbersWithUnitToken._tryParseWHL(tt)
         if (www is not None):
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0 and tt.is_comma_and):
             continue
         if (tt.isValue("ПРИ", None) or len(internals_) > 0):
             mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False)
             if (mt1 is not None and mt1.reliable):
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken))
                 and (tt).typ == NumberSpellingType.WORDS):
             npt3 = NounPhraseHelper.tryParse(
                 tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0)
             if (npt3 is not None):
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0):
                     name_.end_token = t1
                 continue
         mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False)
         if (mt0 is not None):
             break
         if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None):
             www = NumbersWithUnitToken._tryParseWHL(tt.next0_)
             if (www is not None):
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma):
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.isChar(')')):
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.tryParseList(tt.next0_, add_units, False)
             if (uu is not None):
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.isChar('(') and t1.next0_ is not None
                         and t1.next0_.isChar(')')):
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.isChar('(')):
                     uu = UnitToken.tryParseList(t1.next0_.next0_,
                                                 add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_
                             is not None and
                             uu[len(uu) - 1].end_token.next0_.isChar(')')):
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt):
                     break
         if (BracketHelper.canBeStartOfSequence(tt, False, False)):
             br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100)
             if (br is not None):
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.isValue("НЕ", None) and tt.next0_ is not None):
             mc = tt.next0_.getMorphClassInDictionary()
             if (mc.is_adverb or mc.is_misc):
                 break
             continue
         if (tt.isValue("ЯМЗ", None)):
             pass
         npt2 = NounPhraseHelper.tryParse(
             tt,
             Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) |
                             (NounPhraseParseAttr.IGNOREBRACKETS),
                             NounPhraseParseAttr), 0)
         if (npt2 is None):
             if (tt.morph.class0_.is_preposition
                     or tt.morph.class0_.is_conjunction):
                 to = NumbersWithUnitToken.M_TERMINS.tryParse(
                     tt, TerminParseAttr.NO)
                 if (to is not None):
                     if ((isinstance(to.end_token.next0_, TextToken))
                             and to.end_token.next0_.is_letters):
                         pass
                     else:
                         break
                 t1 = tt
                 continue
             mc = tt.getMorphClassInDictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter
                  and tt.length_char > 1)
                     and (((tt.chars.is_all_upper or mc.is_adverb
                            or mc.is_undefined) or mc.is_adjective))):
                 uu = UnitToken.tryParseList(tt, add_units, False)
                 if (uu is not None):
                     if (uu[0].length_char > 2 or len(uu) > 1):
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0):
                     name_.end_token = tt
                 continue
             if (tt.is_comma):
                 continue
             if (tt.isChar('.')):
                 if (not MiscHelper.canBeStartOfSentence(tt.next0_)):
                     continue
                 uu = UnitToken.tryParseList(tt.next0_, add_units, False)
                 if (uu is not None):
                     if (uu[0].length_char > 2 or len(uu) > 1):
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0):
             pass
         elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None)
               or t.isValue("ДИАПАЗОН", None)):
             pass
         elif (t.chars.is_letter):
             name_.end_token = t1
     t1 = t1.next0_
     first_pass3038 = True
     while True:
         if first_pass3038: first_pass3038 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char):
             pass
         elif (t1.isCharOf(":,_")):
             www = NumbersWithUnitToken._tryParseWHL(t1.next0_)
             if (www is not None):
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
         elif (t1.is_hiphen and t1.is_whitespace_after
               and t1.is_whitespace_before):
             pass
         else:
             break
     if (t1 is None):
         return None
     mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_)
     if (mts is None):
         return None
     mt = mts[0]
     if (name_.begin_token.morph.class0_.is_preposition):
         name_.begin_token = name_.begin_token.next0_
     if (len(mts) > 1 and len(internals_) == 0):
         if (len(mt.units) == 0):
             if (units is not None):
                 for m in mts:
                     m.units = units
         res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token,
                                      name_.morph, True)
         res1.name = MiscHelper.getTextValueOfMetaToken(
             name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts):
             ttt = MeasureToken._new1506(mts[k].begin_token,
                                         mts[k].end_token, mts[k])
             if (whd is not None):
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)):
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.isChar('±')):
             nn = NumbersWithUnitToken._tryParse(tt1, add_units, True,
                                                 False)
             if (nn is not None and nn.plus_minus_percent):
                 res1.end_token = nn.end_token
                 res1.nums = nn
         return res1
     if (not mt.is_whitespace_before):
         if (mt.begin_token.previous is None):
             return None
         if (mt.begin_token.previous.isCharOf(":),")
                 or mt.begin_token.previous.is_table_control_char):
             pass
         else:
             return None
     if (len(mt.units) == 0 and units is not None):
         mt.units = units
         if (mt.div_num is not None and len(units) > 1
                 and len(mt.div_num.units) == 0):
             i = 1
             while i < len(units):
                 if (units[i].pow0_ == -1):
                     j = i
                     while j < len(units):
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (-units[j].pow0_)
                         j += 1
                     del mt.units[i:i + len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None):
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None):
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0):
         units = UnitToken.tryParseList(mt.end_token.next0_, add_units,
                                        True)
         if (units is None):
             if (can_units_absent):
                 pass
             else:
                 return None
         else:
             mt.units = units
     res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None
           and t0 == name_.begin_token) and t0.previous.is_hiphen
          and not t0.previous.is_whitespace_before)
             and (isinstance(t0.previous.previous, TextToken))):
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.getTextValueOfMetaToken(
         name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
     res.nums = mt
     for u in res.nums.units:
         if (u.keyword is not None):
             if (u.keyword.begin_char >= res.begin_char):
                 res.reliable = True
     res.__parseInternals(add_units)
     if (len(res.internals) > 0 or not can_be_set):
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and):
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False)
     if ((mts1 is not None and len(mts1) == 1 and
          (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0
             and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)):
         res.is_set = True
         res.nums = (None)
         res.internals.append(
             MeasureToken._new1506(mt.begin_token, mt.end_token, mt))
         res.internals.append(
             MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token,
                                   mts1[0]))
         res.end_token = mts1[0].end_token
     return res
Exemple #26
0
 def tryAttach(t: 'Token',
               p1: 'InstrumentParticipant' = None,
               p2: 'InstrumentParticipant' = None,
               is_contract: bool = False) -> 'ParticipantToken':
     if (t is None):
         return None
     tt = t
     br = False
     if (p1 is None and p2 is None and is_contract):
         r1 = t.getReferent()
         if ((r1 is not None and t.next0_ is not None
              and t.next0_.is_comma_and)
                 and (isinstance(t.next0_.next0_, ReferentToken))):
             r2 = t.next0_.next0_.getReferent()
             if (r1.type_name == r2.type_name):
                 ttt = t.next0_.next0_.next0_
                 refs = list()
                 refs.append(r1)
                 refs.append(r2)
                 first_pass3014 = True
                 while True:
                     if first_pass3014: first_pass3014 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if ((ttt.is_comma_and and ttt.next0_ is not None
                          and ttt.next0_.getReferent() is not None)
                             and ttt.next0_.getReferent().type_name
                             == r1.type_name):
                         ttt = ttt.next0_
                         if (not ttt.getReferent() in refs):
                             refs.append(ttt.getReferent())
                         continue
                     break
                 first_pass3015 = True
                 while True:
                     if first_pass3015: first_pass3015 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_comma or ttt.morph.class0_.is_preposition):
                         continue
                     if ((ttt.isValue("ИМЕНОВАТЬ", None)
                          or ttt.isValue("ДАЛЬНЕЙШИЙ", None)
                          or ttt.isValue("ДАЛЕЕ", None))
                             or ttt.isValue("ТЕКСТ", None)):
                         continue
                     if (ttt.isValue("ДОГОВАРИВАТЬСЯ", None)):
                         continue
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None
                             and npt.noun.isValue("СТОРОНА", None)
                             and npt.morph.number != MorphNumber.SINGULAR):
                         re = ParticipantToken._new1467(
                             t, npt.end_token,
                             ParticipantToken.Kinds.NAMEDASPARTS)
                         re.parts = refs
                         return re
                     break
         if ((isinstance(r1, OrganizationReferent))
                 or (isinstance(r1, PersonReferent))):
             has_br = False
             has_named = False
             if (isinstance(r1, PersonReferent)):
                 if (t.previous is not None
                         and t.previous.isValue("ЛИЦО", None)):
                     return None
             elif (t.previous is not None
                   and ((t.previous.isValue("ВЫДАВАТЬ", None)
                         or t.previous.isValue("ВЫДАТЬ", None)))):
                 return None
             ttt = (t).begin_token
             while ttt is not None and (ttt.end_char < t.end_char):
                 if (ttt.isChar('(')):
                     has_br = True
                 elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue(
                         "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None))
                       or ttt.isValue("ТЕКСТ", None)):
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition
                        or ttt.is_hiphen) or ttt.isChar(':')):
                     pass
                 elif (isinstance(ttt, ReferentToken)):
                     pass
                 elif (has_br or has_named):
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
                     if (npt is None):
                         break
                     if (has_br):
                         if (npt.end_token.next0_ is None
                                 or not npt.end_token.next0_.isChar(')')):
                             break
                     if (not has_named):
                         if (ParticipantToken.M_ONTOLOGY.tryParse(
                                 ttt, TerminParseAttr.NO) is None):
                             break
                     re = ParticipantToken._new1467(
                         t, t, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = npt.getNormalCaseText(None, True,
                                                    MorphGender.UNDEFINED,
                                                    False)
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 ttt = ttt.next0_
             has_br = False
             has_named = False
             end_side = None
             brr = None
             add_refs = None
             ttt = t.next0_
             first_pass3016 = True
             while True:
                 if first_pass3016: first_pass3016 = False
                 else: ttt = ttt.next0_
                 if (not (ttt is not None)): break
                 if ((isinstance(ttt, NumberToken))
                         and (isinstance(ttt.next0_, TextToken))
                         and (ttt.next0_).term == "СТОРОНЫ"):
                     ttt = ttt.next0_
                     end_side = ttt
                     if (ttt.next0_ is not None and ttt.next0_.is_comma):
                         ttt = ttt.next0_
                     if (ttt.next0_ is not None and ttt.next0_.is_and):
                         break
                 if (brr is not None and ttt.begin_char > brr.end_char):
                     brr = (None)
                 if (BracketHelper.canBeStartOfSequence(ttt, False, False)):
                     brr = BracketHelper.tryParse(ttt, BracketParseAttr.NO,
                                                  100)
                     if (brr is not None and (brr.length_char < 7)
                             and ttt.isChar('(')):
                         ttt = brr.end_token
                         brr = (None)
                         continue
                 elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue(
                         "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None))
                       or ttt.isValue("ТЕКСТ", None)):
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition
                        or ttt.is_hiphen) or ttt.isChar(':')):
                     pass
                 elif (brr is not None or has_named):
                     if (BracketHelper.canBeStartOfSequence(
                             ttt, True, False)):
                         ttt = ttt.next0_
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
                     typ22 = None
                     if (npt is not None):
                         ttt = npt.end_token
                         if (npt.end_token.isValue("ДОГОВОР", None)):
                             continue
                     else:
                         ttok = None
                         if (isinstance(ttt, MetaToken)):
                             ttok = ParticipantToken.M_ONTOLOGY.tryParse(
                                 (ttt).begin_token, TerminParseAttr.NO)
                         if (ttok is not None):
                             typ22 = ttok.termin.canonic_text
                         elif (has_named
                               and ttt.morph.class0_.is_adjective):
                             typ22 = ttt.getNormalCaseText(
                                 MorphClass.ADJECTIVE, False,
                                 MorphGender.UNDEFINED, False)
                         elif (brr is not None):
                             continue
                         else:
                             break
                     if (BracketHelper.canBeEndOfSequence(
                             ttt.next0_, True, None, False)):
                         ttt = ttt.next0_
                     if (brr is not None):
                         if (ttt.next0_ is None):
                             ttt = brr.end_token
                             continue
                         ttt = ttt.next0_
                     if (not has_named and typ22 is None):
                         if (ParticipantToken.M_ONTOLOGY.tryParse(
                                 npt.begin_token, TerminParseAttr.NO) is
                                 None):
                             break
                     re = ParticipantToken._new1467(
                         t, ttt, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = (Utils.ifNotNull(
                         typ22,
                         npt.getNormalCaseText(None, True,
                                               MorphGender.UNDEFINED,
                                               False)))
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 elif ((ttt.isValue("ЗАРЕГИСТРИРОВАННЫЙ", None)
                        or ttt.isValue("КАЧЕСТВО", None)
                        or ttt.isValue("ПРОЖИВАЮЩИЙ", None))
                       or ttt.isValue("ЗАРЕГ", None)):
                     pass
                 elif (ttt.getReferent() == r1):
                     pass
                 elif (
                     (isinstance(ttt.getReferent(), PersonIdentityReferent))
                         or
                     (isinstance(ttt.getReferent(), AddressReferent))):
                     if (add_refs is None):
                         add_refs = list()
                     add_refs.append(ttt.getReferent())
                 else:
                     prr = ttt.kit.processReferent("PERSONPROPERTY", ttt)
                     if (prr is not None):
                         ttt = prr.end_token
                         continue
                     if (isinstance(ttt.getReferent(), GeoReferent)):
                         continue
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None):
                         if ((npt.noun.isValue("МЕСТО", None)
                              or npt.noun.isValue("ЖИТЕЛЬСТВО", None)
                              or npt.noun.isValue("ПРЕДПРИНИМАТЕЛЬ", None))
                                 or npt.noun.isValue("ПОЛ", None)
                                 or npt.noun.isValue("РОЖДЕНИЕ", None)):
                             ttt = npt.end_token
                             continue
                     if (ttt.is_newline_before):
                         break
                     if (ttt.length_char < 3):
                         continue
                     mc = ttt.getMorphClassInDictionary()
                     if (mc.is_adverb or mc.is_adjective):
                         continue
                     if (ttt.chars.is_all_upper):
                         continue
                     break
             if (end_side is not None
                     or ((add_refs is not None and t.previous is not None
                          and t.previous.is_and))):
                 re = ParticipantToken._new1467(
                     t, Utils.ifNotNull(end_side, t),
                     ParticipantToken.Kinds.NAMEDAS)
                 re.typ = (None)
                 re.parts = list()
                 re.parts.append(r1)
                 if (add_refs is not None):
                     re.parts.extend(add_refs)
                 return re
         too = ParticipantToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO)
         if (too is not None):
             if ((isinstance(t.previous, TextToken))
                     and t.previous.isValue("ЛИЦО", None)):
                 too = (None)
         if (too is not None and too.termin.tag is not None
                 and too.termin.canonic_text != "СТОРОНА"):
             tt1 = too.end_token.next0_
             if (tt1 is not None):
                 if (tt1.is_hiphen or tt1.isChar(':')):
                     tt1 = tt1.next0_
             if (isinstance(tt1, ReferentToken)):
                 r1 = tt1.getReferent()
                 if ((isinstance(r1, PersonReferent))
                         or (isinstance(r1, OrganizationReferent))):
                     re = ParticipantToken._new1467(
                         t, tt1, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = too.termin.canonic_text
                     re.parts = list()
                     re.parts.append(r1)
                     return re
     add_typ1 = (None if p1 is None else p1.typ)
     add_typ2 = (None if p2 is None else p2.typ)
     if (BracketHelper.canBeStartOfSequence(tt, False, False)
             and tt.next0_ is not None):
         br = True
         tt = tt.next0_
     term1 = None
     term2 = None
     if (add_typ1 is not None and add_typ1.find(' ') > 0
             and not add_typ1.startswith("СТОРОНА")):
         term1 = Termin(add_typ1)
     if (add_typ2 is not None and add_typ2.find(' ') > 0
             and not add_typ2.startswith("СТОРОНА")):
         term2 = Termin(add_typ2)
     named = False
     typ_ = None
     t1 = None
     t0 = tt
     first_pass3017 = True
     while True:
         if first_pass3017: first_pass3017 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition and typ_ is not None):
             continue
         if (tt.isCharOf("(:)") or tt.is_hiphen):
             continue
         if (tt.is_table_control_char):
             break
         if (tt.is_newline_before and tt != t0):
             if (isinstance(tt, NumberToken)):
                 break
             if ((isinstance(tt, TextToken))
                     and (isinstance(tt.previous, TextToken))):
                 if (tt.previous.isValue((tt).term, None)):
                     break
         if (BracketHelper.isBracket(tt, False)):
             continue
         tok = (ParticipantToken.M_ONTOLOGY.tryParse(
             tt, TerminParseAttr.NO)
                if ParticipantToken.M_ONTOLOGY is not None else None)
         if (tok is not None and (isinstance(tt.previous, TextToken))):
             if (tt.previous.isValue("ЛИЦО", None)):
                 return None
         if (tok is None):
             if (add_typ1 is not None
                     and ((MiscHelper.isNotMoreThanOneError(add_typ1, tt) or
                           ((((isinstance(tt, MetaToken))) and
                             (tt).begin_token.isValue(add_typ1, None)))))):
                 if (typ_ is not None):
                     if (not ParticipantToken.__isTypesEqual(
                             add_typ1, typ_)):
                         break
                 typ_ = add_typ1
                 t1 = tt
                 continue
             if (add_typ2 is not None
                     and ((MiscHelper.isNotMoreThanOneError(add_typ2, tt) or
                           ((((isinstance(tt, MetaToken))) and
                             (tt).begin_token.isValue(add_typ2, None)))))):
                 if (typ_ is not None):
                     if (not ParticipantToken.__isTypesEqual(
                             add_typ2, typ_)):
                         break
                 typ_ = add_typ2
                 t1 = tt
                 continue
             if (tt.chars.is_letter):
                 if (term1 is not None):
                     tok1 = term1.tryParse(tt, TerminParseAttr.NO)
                     if (tok1 is not None):
                         if (typ_ is not None):
                             if (not ParticipantToken.__isTypesEqual(
                                     add_typ1, typ_)):
                                 break
                         typ_ = add_typ1
                         tt = tok1.end_token
                         t1 = tt
                         continue
                 if (term2 is not None):
                     tok2 = term2.tryParse(tt, TerminParseAttr.NO)
                     if (tok2 is not None):
                         if (typ_ is not None):
                             if (not ParticipantToken.__isTypesEqual(
                                     add_typ2, typ_)):
                                 break
                         typ_ = add_typ2
                         tt = tok2.end_token
                         t1 = tt
                         continue
                 if (named and tt.getMorphClassInDictionary().is_noun):
                     if (not tt.chars.is_all_lower
                             or BracketHelper.isBracket(tt.previous, True)):
                         if (DecreeToken.isKeyword(tt, False) is None):
                             val = tt.getNormalCaseText(
                                 MorphClass.NOUN, True,
                                 MorphGender.UNDEFINED, False)
                             if (typ_ is not None):
                                 if (not ParticipantToken.__isTypesEqual(
                                         typ_, val)):
                                     break
                             typ_ = val
                             t1 = tt
                             continue
             if (named and typ_ is None and is_contract):
                 if ((isinstance(tt, TextToken))
                         and tt.chars.is_cyrillic_letter
                         and tt.chars.is_capital_upper):
                     dc = tt.getMorphClassInDictionary()
                     if (dc.is_undefined or dc.is_noun):
                         dt = DecreeToken.tryAttach(tt, None, False)
                         ok = True
                         if (dt is not None):
                             ok = False
                         elif (tt.isValue("СТОРОНА", None)):
                             ok = False
                         if (ok):
                             typ_ = (tt).getLemma()
                             t1 = tt
                             continue
                     if (dc.is_adjective):
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is not None and len(npt.adjectives) > 0
                                 and npt.noun.getMorphClassInDictionary(
                                 ).is_noun):
                             typ_ = npt.getNormalCaseText(
                                 None, True, MorphGender.UNDEFINED, False)
                             t1 = npt.end_token
                             continue
             if (tt == t):
                 break
             if ((isinstance(tt, NumberToken)) or tt.isChar('.')):
                 break
             if (tt.length_char < 4):
                 if (typ_ is not None):
                     continue
             break
         if (tok.termin.tag is None):
             named = True
         else:
             if (typ_ is not None):
                 break
             if (tok.termin.canonic_text == "СТОРОНА"):
                 tt1 = tt.next0_
                 if (tt1 is not None and tt1.is_hiphen):
                     tt1 = tt1.next0_
                 if (not ((isinstance(tt1, NumberToken)))):
                     break
                 if (tt1.is_newline_before):
                     break
                 typ_ = "{0} {1}".format(tok.termin.canonic_text,
                                         (tt1).value)
                 t1 = tt1
             else:
                 typ_ = tok.termin.canonic_text
                 t1 = tok.end_token
             break
         tt = tok.end_token
     if (typ_ is None):
         return None
     if (not named and t1 != t and not typ_.startswith("СТОРОНА")):
         if (not ParticipantToken.__isTypesEqual(typ_, add_typ1)
                 and not ParticipantToken.__isTypesEqual(typ_, add_typ2)):
             return None
     if (BracketHelper.canBeEndOfSequence(t1.next0_, False, None, False)):
         t1 = t1.next0_
         if (not t.is_whitespace_before
                 and BracketHelper.canBeStartOfSequence(
                     t.previous, False, False)):
             t = t.previous
     elif (BracketHelper.canBeStartOfSequence(t, False, False)
           and BracketHelper.canBeEndOfSequence(t1.next0_, True, t, True)):
         t1 = t1.next0_
     if (br and t1.next0_ is not None and BracketHelper.canBeEndOfSequence(
             t1.next0_, False, None, False)):
         t1 = t1.next0_
     res = ParticipantToken._new1472(
         t, t1, (ParticipantToken.Kinds.NAMEDAS
                 if named else ParticipantToken.Kinds.PURE), typ_)
     if (t.isChar(':')):
         res.begin_token = t.next0_
     return res
Exemple #27
0
 def tryParse(
         self,
         t0: 'Token',
         pars: 'TerminParseAttr' = TerminParseAttr.NO) -> 'TerminToken':
     """ Попробовать привязать термин
     
     Args:
         t0(Token): 
         fullWordsOnly: 
     
     """
     from pullenti.ner.core.MiscHelper import MiscHelper
     from pullenti.ner.core.BracketHelper import BracketHelper
     if (t0 is None):
         return None
     term = None
     if (isinstance(t0, TextToken)):
         term = (t0).term
     if (self.acronym_smart is not None
             and (((pars) &
                   (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)
             and term is not None):
         if (self.acronym_smart == term):
             if (t0.next0_ is not None and t0.next0_.isChar('.')
                     and not t0.is_whitespace_after):
                 return TerminToken._new606(t0, t0.next0_, self)
             else:
                 return TerminToken._new606(t0, t0, self)
         t1 = Utils.asObjectOrNull(t0, TextToken)
         tt = Utils.asObjectOrNull(t0, TextToken)
         i = 0
         while i < len(self.acronym):
             if (tt is None):
                 break
             term1 = tt.term
             if (len(term1) != 1 or tt.is_whitespace_after):
                 break
             if (i > 0 and tt.is_whitespace_before):
                 break
             if (term1[0] != self.acronym[i]):
                 break
             if (tt.next0_ is None or not tt.next0_.isChar('.')):
                 break
             t1 = (Utils.asObjectOrNull(tt.next0_, TextToken))
             tt = (Utils.asObjectOrNull(tt.next0_.next0_, TextToken))
             i += 1
         if (i >= len(self.acronym)):
             return TerminToken._new606(t0, t1, self)
     if (self.acronym is not None and term is not None
             and self.acronym == term):
         if (t0.chars.is_all_upper or self.acronym_can_be_lower
                 or ((not t0.chars.is_all_lower and len(term) >= 3))):
             return TerminToken._new606(t0, t0, self)
     if (self.acronym is not None and t0.chars.is_last_lower
             and t0.length_char > 3):
         if (t0.isValue(self.acronym, None)):
             return TerminToken._new606(t0, t0, self)
     cou = 0
     i = 0
     while i < len(self.terms):
         if (self.terms[i].is_hiphen):
             cou -= 1
         else:
             cou += 1
         i += 1
     if (len(self.terms) > 0
             and ((not self.ignore_terms_order or cou == 1))):
         t1 = t0
         tt = t0
         e0_ = None
         eup = None
         ok = True
         mc = None
         dont_change_mc = False
         i = 0
         first_pass2812 = True
         while True:
             if first_pass2812: first_pass2812 = False
             else: i += 1
             if (not (i < len(self.terms))): break
             if (self.terms[i].is_hiphen):
                 continue
             if (tt is not None and tt.is_hiphen and i > 0):
                 tt = tt.next0_
             if (i > 0 and tt is not None):
                 if ((((pars) & (TerminParseAttr.IGNOREBRACKETS))) !=
                     (TerminParseAttr.NO) and not tt.chars.is_letter
                         and BracketHelper.isBracket(tt, False)):
                     tt = tt.next0_
             if (((((pars) & (TerminParseAttr.CANBEGEOOBJECT))) !=
                  (TerminParseAttr.NO) and i > 0 and
                  (isinstance(tt, ReferentToken)))
                     and tt.getReferent().type_name == "GEO"):
                 tt = tt.next0_
             if ((isinstance(tt, ReferentToken)) and e0_ is None):
                 eup = tt
                 e0_ = (tt).end_token
                 tt = (tt).begin_token
             if (tt is None):
                 ok = False
                 break
             if (not self.terms[i].checkByToken(tt)):
                 if (tt.next0_ is not None and tt.isChar('.')
                         and self.terms[i].checkByToken(tt.next0_)):
                     tt = tt.next0_
                 elif (((i > 0 and tt.next0_ is not None and
                         (isinstance(tt, TextToken))) and
                        ((tt.morph.class0_.is_preposition
                          or MiscHelper.isEngArticle(tt)))
                        and self.terms[i].checkByToken(tt.next0_))
                       and not self.terms[i - 1].is_pattern_any):
                     tt = tt.next0_
                 else:
                     ok = False
                     if (((i + 2) < len(self.terms))
                             and self.terms[i + 1].is_hiphen
                             and self.terms[i + 2].checkByPrefToken(
                                 self.terms[i],
                                 Utils.asObjectOrNull(tt, TextToken))):
                         i += 2
                         ok = True
                     elif (((not tt.is_whitespace_after
                             and tt.next0_ is not None and
                             (isinstance(tt, TextToken))) and
                            (tt).length_char == 1
                            and tt.next0_.isCharOf("\"'`’“”"))
                           and not tt.next0_.is_whitespace_after
                           and (isinstance(tt.next0_.next0_, TextToken))):
                         if (self.terms[i].checkByStrPrefToken(
                             (tt).term,
                                 Utils.asObjectOrNull(
                                     tt.next0_.next0_, TextToken))):
                             ok = True
                             tt = tt.next0_.next0_
                     if (not ok):
                         if (i > 0 and
                             (((pars) &
                               (TerminParseAttr.IGNORESTOPWORDS))) !=
                             (TerminParseAttr.NO)):
                             if (isinstance(tt, TextToken)):
                                 if (not tt.chars.is_letter):
                                     tt = tt.next0_
                                     i -= 1
                                     continue
                                 mc1 = tt.getMorphClassInDictionary()
                                 if (mc1.is_conjunction
                                         or mc1.is_preposition):
                                     tt = tt.next0_
                                     i -= 1
                                     continue
                             if (isinstance(tt, NumberToken)):
                                 tt = tt.next0_
                                 i -= 1
                                 continue
                         break
             if (tt.morph.items_count > 0 and not dont_change_mc):
                 mc = MorphCollection(tt.morph)
                 if (((mc.class0_.is_noun or mc.class0_.is_verb))
                         and not mc.class0_.is_adjective):
                     if (((i + 1) < len(self.terms))
                             and self.terms[i + 1].is_hiphen):
                         pass
                     else:
                         dont_change_mc = True
             if (tt.morph.class0_.is_preposition
                     or tt.morph.class0_.is_conjunction):
                 dont_change_mc = True
             if (tt == e0_):
                 tt = eup
                 eup = (None)
                 e0_ = (None)
             if (e0_ is None):
                 t1 = tt
             tt = tt.next0_
         if (ok and i >= len(self.terms)):
             if (t1.next0_ is not None and t1.next0_.isChar('.')
                     and self.abridges is not None):
                 for a in self.abridges:
                     if (a.tryAttach(t0) is not None):
                         t1 = t1.next0_
                         break
             if (t0 != t1 and t0.morph.class0_.is_adjective):
                 npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None and npt.end_char <= t1.end_char):
                     mc = npt.morph
             return TerminToken._new611(t0, t1, mc)
     if (len(self.terms) > 1 and self.ignore_terms_order):
         terms_ = list(self.terms)
         t1 = t0
         tt = t0
         while len(terms_) > 0:
             if (tt != t0 and tt is not None and tt.is_hiphen):
                 tt = tt.next0_
             if (tt is None):
                 break
             j = 0
             while j < len(terms_):
                 if (terms_[j].checkByToken(tt)):
                     break
                 j += 1
             if (j >= len(terms_)):
                 if (tt != t0 and (((pars) &
                                    (TerminParseAttr.IGNORESTOPWORDS))) !=
                     (TerminParseAttr.NO)):
                     if (isinstance(tt, TextToken)):
                         if (not tt.chars.is_letter):
                             tt = tt.next0_
                             continue
                         mc1 = tt.getMorphClassInDictionary()
                         if (mc1.is_conjunction or mc1.is_preposition):
                             tt = tt.next0_
                             continue
                     if (isinstance(tt, NumberToken)):
                         tt = tt.next0_
                         continue
                 break
             del terms_[j]
             t1 = tt
             tt = tt.next0_
         for i in range(len(terms_) - 1, -1, -1):
             if (terms_[i].is_hiphen):
                 del terms_[i]
         if (len(terms_) == 0):
             return TerminToken(t0, t1)
     if (self.abridges is not None and
         (((pars) &
           (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)):
         res = None
         for a in self.abridges:
             r = a.tryAttach(t0)
             if (r is None):
                 continue
             if (r.abridge_without_point and len(self.terms) > 0):
                 if (not ((isinstance(t0, TextToken)))):
                     continue
                 if (a.parts[0].value != (t0).term):
                     continue
             if (res is None or (res.length_char < r.length_char)):
                 res = r
         if (res is not None):
             return res
     return None
Exemple #28
0
 def __tryAttachContractGround(t: 'Token',
                               ip: 'InstrumentParticipant',
                               can_be_passport: bool = False) -> 'Token':
     ok = False
     first_pass3021 = True
     while True:
         if first_pass3021: first_pass3021 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.isChar(',') or t.morph.class0_.is_preposition):
             continue
         if (t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 t = br.end_token
                 continue
         if (t.isValue("ОСНОВАНИЕ", None) or t.isValue("ДЕЙСТВОВАТЬ", None)
                 or t.isValue("ДЕЙСТВУЮЩИЙ", None)):
             ok = True
             if (t.next0_ is not None and t.next0_.isChar('(')):
                 br = BracketHelper.tryParse(t.next0_, BracketParseAttr.NO,
                                             100)
                 if (br is not None and (br.length_char < 10)):
                     t = br.end_token
             continue
         dr = Utils.asObjectOrNull(t.getReferent(), DecreeReferent)
         if (dr is not None):
             ip.ground = dr
             return t
         pir = Utils.asObjectOrNull(t.getReferent(), PersonIdentityReferent)
         if (pir is not None and can_be_passport):
             if (pir.typ is not None and not "паспорт" in pir.typ):
                 ip.ground = pir
                 return t
         if (t.isValue("УСТАВ", None)):
             ip.ground = t.getNormalCaseText(MorphClass.NOUN, True,
                                             MorphGender.UNDEFINED, False)
             return t
         if (t.isValue("ДОВЕРЕННОСТЬ", None)):
             dts = DecreeToken.tryAttachList(t.next0_, None, 10, False)
             if (dts is None):
                 has_spec = False
                 ttt = t.next0_
                 first_pass3022 = True
                 while True:
                     if first_pass3022: first_pass3022 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None and
                              ((ttt.end_char - t.end_char) < 200))):
                         break
                     if (ttt.is_comma):
                         continue
                     if (ttt.isValue("УДОСТОВЕРИТЬ", None)
                             or ttt.isValue("УДОСТОВЕРЯТЬ", None)):
                         has_spec = True
                         continue
                     dt = DecreeToken.tryAttach(ttt, None, False)
                     if (dt is not None):
                         if (dt.typ == DecreeToken.ItemType.DATE
                                 or dt.typ == DecreeToken.ItemType.NUMBER):
                             dts = DecreeToken.tryAttachList(
                                 ttt, None, 10, False)
                             break
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None):
                         if (npt.end_token.isValue("НОТАРИУС", None)):
                             ttt = npt.end_token
                             has_spec = True
                             continue
                     if (ttt.getReferent() is not None):
                         if (has_spec):
                             continue
                     break
             if (dts is not None and len(dts) > 0):
                 t0 = t
                 dr = DecreeReferent()
                 dr.typ = "ДОВЕРЕННОСТЬ"
                 for d in dts:
                     if (d.typ == DecreeToken.ItemType.DATE):
                         dr._addDate(d)
                         t = d.end_token
                     elif (d.typ == DecreeToken.ItemType.NUMBER):
                         dr._addNumber(d)
                         t = d.end_token
                     else:
                         break
                 ad = t.kit.getAnalyzerDataByAnalyzerName(
                     InstrumentAnalyzer.ANALYZER_NAME)
                 ip.ground = ad.registerReferent(dr)
                 rt = ReferentToken(
                     Utils.asObjectOrNull(ip.ground, Referent), t0, t)
                 t.kit.embedToken(rt)
                 return rt
             ip.ground = "ДОВЕРЕННОСТЬ"
             return t
         break
     return None
Exemple #29
0
 def tryParse(
         t: 'Token',
         prev: typing.List['DateExItemToken']) -> 'DateExItemToken':
     from pullenti.ner.NumberToken import NumberToken
     from pullenti.ner.core.NounPhraseParseAttr import NounPhraseParseAttr
     from pullenti.ner.core.NounPhraseHelper import NounPhraseHelper
     if (t is None):
         return None
     if (t.isValue("ЗАВТРА", None)):
         return DateExToken.DateExItemToken._new642(
             t, t, DateExToken.DateExItemTokenType.DAY, 1, True)
     if (t.isValue("ПОСЛЕЗАВТРА", None)):
         return DateExToken.DateExItemToken._new642(
             t, t, DateExToken.DateExItemTokenType.DAY, 2, True)
     if (t.isValue("ВЧЕРА", None)):
         return DateExToken.DateExItemToken._new642(
             t, t, DateExToken.DateExItemTokenType.DAY, -1, True)
     if (t.isValue("ПОЗАВЧЕРА", None)):
         return DateExToken.DateExItemToken._new642(
             t, t, DateExToken.DateExItemTokenType.DAY, -2, True)
     if (t.isValue("ПОЛЧАСА", None)):
         return DateExToken.DateExItemToken._new642(
             t, t, DateExToken.DateExItemTokenType.MINUTE, 30, True)
     npt = NounPhraseHelper.tryParse(
         t,
         Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) |
                         (NounPhraseParseAttr.PARSEPREPOSITION),
                         NounPhraseParseAttr), 0)
     if (npt is None):
         if ((isinstance(t, NumberToken))
                 and (t).int_value is not None):
             res0 = DateExToken.DateExItemToken.tryParse(t.next0_, prev)
             if (res0 is not None and res0.value == 1):
                 res0.begin_token = t
                 res0.value = (t).int_value
                 if (t.previous is not None
                         and t.previous.isValue("ЧЕРЕЗ", None)):
                     res0.is_value_relate = True
                 return res0
         return None
     ty = DateExToken.DateExItemTokenType.HOUR
     val = 0
     if (npt.noun.isValue("ГОД", None)
             or npt.noun.isValue("ГОДИК", None)):
         ty = DateExToken.DateExItemTokenType.YEAR
     elif (npt.noun.isValue("КВАРТАЛ", None)):
         ty = DateExToken.DateExItemTokenType.QUARTAL
     elif (npt.noun.isValue("МЕСЯЦ", None)):
         ty = DateExToken.DateExItemTokenType.MONTH
     elif (npt.noun.isValue("ДЕНЬ", None)
           or npt.noun.isValue("ДЕНЕК", None)):
         ty = DateExToken.DateExItemTokenType.DAY
     elif (npt.noun.isValue("НЕДЕЛЯ", None)
           or npt.noun.isValue("НЕДЕЛЬКА", None)):
         ty = DateExToken.DateExItemTokenType.DAY
         val = 7
     elif (npt.noun.isValue("ЧАС", None)
           or npt.noun.isValue("ЧАСИК", None)
           or npt.noun.isValue("ЧАСОК", None)):
         ty = DateExToken.DateExItemTokenType.HOUR
     elif (npt.noun.isValue("МИНУТА", None)
           or npt.noun.isValue("МИНУТКА", None)):
         ty = DateExToken.DateExItemTokenType.MINUTE
     elif (npt.noun.isValue("ПОНЕДЕЛЬНИК", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 1
     elif (npt.noun.isValue("ВТОРНИК", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 2
     elif (npt.noun.isValue("СРЕДА", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 3
     elif (npt.noun.isValue("ЧЕТВЕРГ", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 4
     elif (npt.noun.isValue("ПЯТНИЦА", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 5
     elif (npt.noun.isValue("СУББОТА", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 6
     elif (npt.noun.isValue("ВОСКРЕСЕНЬЕ", None)
           or npt.noun.isValue("ВОСКРЕСЕНИЕ", None)):
         ty = DateExToken.DateExItemTokenType.DAYOFWEEK
         val = 7
     else:
         return None
     res = DateExToken.DateExItemToken._new641(t, npt.end_token, ty,
                                               val)
     heg = False
     for a in npt.adjectives:
         if (a.isValue("СЛЕДУЮЩИЙ", None)
                 or a.isValue("БУДУЩИЙ", None)):
             res.is_value_relate = True
         elif (a.isValue("ПРЕДЫДУЩИЙ", None)
               or a.isValue("ПРОШЛЫЙ", None)):
             res.is_value_relate = True
             heg = True
         elif (a.begin_token == a.end_token
               and (isinstance(a.begin_token, NumberToken))
               and (a.begin_token).int_value is not None):
             if (res.typ != DateExToken.DateExItemTokenType.DAYOFWEEK):
                 res.value = (a.begin_token).int_value
         elif (a.isValue("ЭТОТ", None) or a.isValue("ТЕКУЩИЙ", None)):
             pass
         elif (a.isValue("БЛИЖАЙШИЙ", None) and res.typ
               == DateExToken.DateExItemTokenType.DAYOFWEEK):
             pass
         else:
             return None
     if (res.value == 0):
         res.value = 1
     if (heg):
         res.value = (-res.value)
     if (t.previous is not None):
         if (t.previous.isValue("ЧЕРЕЗ", None)):
             res.is_value_relate = True
             res.begin_token = t.previous
     return res
Exemple #30
0
 def tryAttach(t: 'Token') -> 'ParenthesisToken':
     if (t is None):
         return None
     tok = ParenthesisToken.__m_termins.tryParse(t, TerminParseAttr.NO)
     if (tok is not None):
         res = ParenthesisToken(t, tok.end_token)
         return res
     if (not ((isinstance(t, TextToken)))):
         return None
     mc = t.getMorphClassInDictionary()
     ok = False
     if (mc.is_adverb):
         ok = True
     elif (mc.is_adjective):
         if (t.morph.containsAttr("сравн.", None)
                 and t.morph.containsAttr("кач.прил.", None)):
             ok = True
     if (ok and t.next0_ is not None):
         if (t.next0_.isChar(',')):
             return ParenthesisToken(t, t)
         t1 = t.next0_
         if (t1.getMorphClassInDictionary() == MorphClass.VERB):
             if (t1.morph.containsAttr("н.вр.", None)
                     and t1.morph.containsAttr("нес.в.", None)
                     and t1.morph.containsAttr("дейст.з.", None)):
                 return ParenthesisToken(t, t1)
     t1 = (None)
     if ((t.isValue("В", None) and t.next0_ is not None
          and t.next0_.isValue("СООТВЕТСТВИЕ", None))
             and t.next0_.next0_ is not None
             and t.next0_.next0_.morph.class0_.is_preposition):
         t1 = t.next0_.next0_.next0_
     elif (t.isValue("СОГЛАСНО", None)):
         t1 = t.next0_
     elif (t.isValue("В", None) and t.next0_ is not None):
         if (t.next0_.isValue("СИЛА", None)):
             t1 = t.next0_.next0_
         elif (t.next0_.morph.class0_.is_adjective
               or t.next0_.morph.class0_.is_pronoun):
             npt = NounPhraseHelper.tryParse(t.next0_,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None):
                 if (npt.noun.isValue("ВИД", None)
                         or npt.noun.isValue("СЛУЧАЙ", None)
                         or npt.noun.isValue("СФЕРА", None)):
                     return ParenthesisToken(t, npt.end_token)
     if (t1 is not None):
         if (t1.next0_ is not None):
             npt1 = NounPhraseHelper.tryParse(t1, NounPhraseParseAttr.NO, 0)
             if (npt1 is not None):
                 if (npt1.noun.isValue("НОРМА", None)
                         or npt1.noun.isValue("ПОЛОЖЕНИЕ", None)
                         or npt1.noun.isValue("УКАЗАНИЕ", None)):
                     t1 = npt1.end_token.next0_
         r = t1.getReferent()
         if (r is not None):
             res = ParenthesisToken._new1086(t, t1, r)
             if (t1.next0_ is not None and t1.next0_.is_comma):
                 sila = False
                 ttt = t1.next0_.next0_
                 first_pass2873 = True
                 while True:
                     if first_pass2873: first_pass2873 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.isValue("СИЛА", None)
                             or ttt.isValue("ДЕЙСТВИЕ", None)):
                         sila = True
                         continue
                     if (ttt.is_comma):
                         if (sila):
                             res.end_token = ttt.previous
                         break
                     if (BracketHelper.canBeStartOfSequence(
                             ttt, False, False)):
                         break
             return res
         npt = NounPhraseHelper.tryParse(t1, NounPhraseParseAttr.NO, 0)
         if (npt is not None):
             return ParenthesisToken(t, npt.end_token)
     tt = t
     if (tt.isValue("НЕ", None) and t is not None):
         tt = tt.next0_
     if (tt.morph.class0_.is_preposition and tt is not None):
         tt = tt.next0_
         npt1 = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
         if (npt1 is not None):
             tt = npt1.end_token
             if (tt.next0_ is not None and tt.next0_.is_comma):
                 return ParenthesisToken(t, tt.next0_)
             if (npt1.noun.isValue("ОЧЕРЕДЬ", None)):
                 return ParenthesisToken(t, tt)
     if (t.isValue("ВЕДЬ", None)):
         return ParenthesisToken(t, t)
     return None