예제 #1
0
 def try_parse(t: 'Token') -> 'DelimToken':
     if (not (isinstance(t, TextToken))):
         return None
     if (t.is_comma_and):
         res0 = DelimToken.try_parse(t.next0_)
         if (res0 is not None):
             res0.begin_token = t
             return res0
         return None
     tok = DelimToken.__m_onto.try_parse(t, TerminParseAttr.NO)
     if (tok is not None):
         res = DelimToken(t, tok.end_token)
         res.typ = (Utils.valToEnum(tok.termin.tag, DelimType))
         res.doublt = tok.termin.tag2 is not None
         res2 = DelimToken.try_parse(res.end_token.next0_)
         if (res2 is not None):
             if (res2.typ == res.typ):
                 res.end_token = res2.end_token
                 res.doublt = False
         if (t.morph.class0_.is_pronoun):
             npt = NounPhraseHelper.try_parse(
                 t, NounPhraseParseAttr.PARSEADVERBS, 0, None)
             if (npt is not None and npt.end_char > res.end_char):
                 return None
         return res
     return None
예제 #2
0
 def try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken':
     res = WeaponItemToken.__try_parse(t, prev, after_conj, attach_high)
     if (res is None): 
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and npt.noun.begin_char > npt.begin_char): 
             res = WeaponItemToken.__try_parse(npt.noun.begin_token, prev, after_conj, attach_high)
             if (res is not None): 
                 if (res.typ == WeaponItemToken.Typs.NOUN): 
                     str0_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                     if (str0_ == "РУЧНОЙ ГРАНАТ"): 
                         str0_ = "РУЧНАЯ ГРАНАТА"
                     if ((Utils.ifNotNull(str0_, "")).endswith(res.value)): 
                         if (res.alt_value is None): 
                             res.alt_value = str0_
                         else: 
                             str0_ = str0_[0:0+len(str0_) - len(res.value)].strip()
                             res.alt_value = "{0} {1}".format(str0_, res.alt_value)
                         res.begin_token = t
                         return res
         return None
     if (res.typ == WeaponItemToken.Typs.NAME): 
         br = BracketHelper.try_parse(res.end_token.next0_, BracketParseAttr.NO, 100)
         if (br is not None and br.is_char('(')): 
             alt = MiscHelper.get_text_value_of_meta_token(br, GetTextAttr.NO)
             if (MiscHelper.can_be_equal_cyr_and_latss(res.value, alt)): 
                 res.alt_value = alt
                 res.end_token = br.end_token
     return res
예제 #3
0
 def try_parse(t: 'Token') -> 'ConjunctionToken':
     """ Попытаться выделить союз с указанного токена.
     
     Args:
         t(Token): начальный токен
     
     Returns:
         ConjunctionToken: результат или null
     """
     if (not (isinstance(t, TextToken))):
         return None
     if (t.is_comma):
         ne = ConjunctionHelper.try_parse(t.next0_)
         if (ne is not None):
             ne.begin_token = t
             ne.is_simple = False
             return ne
         return ConjunctionToken._new478(t, t, ConjunctionType.COMMA, True,
                                         ",")
     tok = ConjunctionHelper.__m_ontology.try_parse(t, TerminParseAttr.NO)
     if (tok is not None):
         if (t.is_value("ТО", None)):
             npt = NounPhraseHelper.try_parse(
                 t, NounPhraseParseAttr.PARSEADVERBS, 0, None)
             if (npt is not None and npt.end_char > tok.end_token.end_char):
                 return None
         if (tok.termin.tag2 is not None):
             if (not (isinstance(tok.end_token, TextToken))):
                 return None
             if (tok.end_token.get_morph_class_in_dictionary().is_verb):
                 if (not tok.end_token.term.endswith("АЯ")):
                     return None
         return ConjunctionToken._new479(
             t, tok.end_token, tok.termin.canonic_text,
             Utils.valToEnum(tok.termin.tag, ConjunctionType))
     if (not t.get_morph_class_in_dictionary().is_conjunction):
         return None
     if (t.is_and or t.is_or):
         res = ConjunctionToken._new480(
             t, t, t.term, True,
             (ConjunctionType.OR if t.is_or else ConjunctionType.AND))
         if (((t.next0_ is not None and t.next0_.is_char('(') and
               (isinstance(t.next0_.next0_, TextToken))) and
              t.next0_.next0_.is_or and t.next0_.next0_.next0_ is not None)
                 and t.next0_.next0_.next0_.is_char(')')):
             res.end_token = t.next0_.next0_.next0_
         elif ((t.next0_ is not None and t.next0_.is_char_of("\\/") and
                (isinstance(t.next0_.next0_, TextToken)))
               and t.next0_.next0_.is_or):
             res.end_token = t.next0_.next0_
         return res
     term = t.term
     if (term == "НИ"):
         return ConjunctionToken._new479(t, t, term, ConjunctionType.NOT)
     if ((term == "А" or term == "НО" or term == "ЗАТО")
             or term == "ОДНАКО"):
         return ConjunctionToken._new479(t, t, term, ConjunctionType.BUT)
     return None
예제 #4
0
 def check_unknown_region(t: 'Token') -> 'Token':
     from pullenti.ner.geo.internal.TerrItemToken import TerrItemToken
     if (not (isinstance(t, TextToken))):
         return None
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is None):
         return None
     if (TerrItemToken._m_unknown_regions.try_parse(
             npt.end_token, TerminParseAttr.FULLWORDSONLY) is not None):
         return npt.end_token
     return None
예제 #5
0
 def try_attach(t0: 'Token') -> 'PhoneItemToken':
     res = PhoneItemToken.__try_attach(t0)
     if (res is None):
         return None
     if (res.item_type != PhoneItemToken.PhoneItemType.PREFIX):
         return res
     t = res.end_token.next0_
     first_pass3388 = True
     while True:
         if first_pass3388: first_pass3388 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         if (t.is_newline_before):
             break
         res2 = PhoneItemToken.__try_attach(t)
         if (res2 is not None):
             if (res2.item_type == PhoneItemToken.PhoneItemType.PREFIX):
                 if (res.kind == PhoneKind.UNDEFINED):
                     res.kind = res2.kind
                 res.end_token = res2.end_token
                 t = res.end_token
                 continue
             break
         if (t.is_char(':')):
             res.end_token = t
             break
         if (not (isinstance(t, TextToken))):
             break
         if (t0.length_char == 1):
             break
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0,
                                          None)
         if (npt is not None):
             t = npt.end_token
             if (t.is_value("ПОСЕЛЕНИЕ", None)):
                 return None
             res.end_token = t
             continue
         if (t.get_morph_class_in_dictionary().is_proper):
             res.end_token = t
             continue
         if (t.morph.class0_.is_preposition):
             continue
         break
     return res
예제 #6
0
 def __site_before(t: 'Token') -> 'Token':
     if (t is not None and t.is_char(':')):
         t = t.previous
     if (t is None):
         return None
     if ((t.is_value("ВЕБСАЙТ", None) or t.is_value("WEBSITE", None)
          or t.is_value("WEB", None)) or t.is_value("WWW", None)):
         return t
     t0 = None
     if (t.is_value("САЙТ", None) or t.is_value("SITE", None)):
         t0 = t
         t = t.previous
     elif (t.is_value("АДРЕС", None)):
         t0 = t.previous
         if (t0 is not None and t0.is_char('.')):
             t0 = t0.previous
         if (t0 is not None):
             if (t0.is_value("ЭЛ", None)
                     or t0.is_value("ЭЛЕКТРОННЫЙ", None)):
                 return t0
         return None
     else:
         return None
     if (t is not None and t.is_hiphen):
         t = t.previous
     if (t is None):
         return t0
     if (t.is_value("WEB", None) or t.is_value("ВЕБ", None)):
         t0 = t
     if (t0.previous is not None and t0.previous.morph.class0_.is_adjective
             and (t0.whitespaces_before_count < 3)):
         npt = NounPhraseHelper.try_parse(t0.previous,
                                          NounPhraseParseAttr.NO, 0, None)
         if (npt is not None):
             t0 = npt.begin_token
     return t0
예제 #7
0
 def try_attach(t : 'Token', p1 : 'InstrumentParticipantReferent'=None, p2 : 'InstrumentParticipantReferent'=None, is_contract : bool=False) -> 'ParticipantToken':
     if (t is None): 
         return None
     tt = t
     br = False
     if (p1 is None and p2 is None and is_contract): 
         r1 = t.get_referent()
         if ((r1 is not None and t.next0_ is not None and t.next0_.is_comma_and) and (isinstance(t.next0_.next0_, ReferentToken))): 
             r2 = t.next0_.next0_.get_referent()
             if (r1.type_name == r2.type_name): 
                 ttt = t.next0_.next0_.next0_
                 refs = list()
                 refs.append(r1)
                 refs.append(r2)
                 first_pass3282 = True
                 while True:
                     if first_pass3282: first_pass3282 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if ((ttt.is_comma_and and ttt.next0_ is not None and ttt.next0_.get_referent() is not None) and ttt.next0_.get_referent().type_name == r1.type_name): 
                         ttt = ttt.next0_
                         if (not ttt.get_referent() in refs): 
                             refs.append(ttt.get_referent())
                         continue
                     break
                 first_pass3283 = True
                 while True:
                     if first_pass3283: first_pass3283 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_comma or ttt.morph.class0_.is_preposition): 
                         continue
                     if ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                         continue
                     if (ttt.is_value("ДОГОВАРИВАТЬСЯ", None)): 
                         continue
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None and npt.noun.is_value("СТОРОНА", None) and npt.morph.number != MorphNumber.SINGULAR): 
                         re = ParticipantToken._new1573(t, npt.end_token, ParticipantToken.Kinds.NAMEDASPARTS)
                         re.parts = refs
                         return re
                     break
         if ((isinstance(r1, OrganizationReferent)) or (isinstance(r1, PersonReferent))): 
             has_br = False
             has_named = False
             if (isinstance(r1, PersonReferent)): 
                 if (t.previous is not None and t.previous.is_value("ЛИЦО", None)): 
                     return None
             elif (t.previous is not None and ((t.previous.is_value("ВЫДАВАТЬ", None) or t.previous.is_value("ВЫДАТЬ", None)))): 
                 return None
             ttt = t.begin_token
             while ttt is not None and (ttt.end_char < t.end_char): 
                 if (ttt.is_char('(')): 
                     has_br = True
                 elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): 
                     pass
                 elif (isinstance(ttt, ReferentToken)): 
                     pass
                 elif (has_br or has_named): 
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
                     if (npt is None): 
                         break
                     if (has_br): 
                         if (npt.end_token.next0_ is None or not npt.end_token.next0_.is_char(')')): 
                             break
                     if (not has_named): 
                         if (ParticipantToken.M_ONTOLOGY.try_parse(ttt, TerminParseAttr.NO) is None): 
                             break
                     re = ParticipantToken._new1573(t, t, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 ttt = ttt.next0_
             has_br = False
             has_named = False
             end_side = None
             brr = None
             add_refs = None
             ttt = t.next0_
             first_pass3284 = True
             while True:
                 if first_pass3284: first_pass3284 = False
                 else: ttt = ttt.next0_
                 if (not (ttt is not None)): break
                 if ((isinstance(ttt, NumberToken)) and (isinstance(ttt.next0_, TextToken)) and ttt.next0_.term == "СТОРОНЫ"): 
                     ttt = ttt.next0_
                     end_side = ttt
                     if (ttt.next0_ is not None and ttt.next0_.is_comma): 
                         ttt = ttt.next0_
                     if (ttt.next0_ is not None and ttt.next0_.is_and): 
                         break
                 if (brr is not None and ttt.begin_char > brr.end_char): 
                     brr = (None)
                 if (BracketHelper.can_be_start_of_sequence(ttt, False, False)): 
                     brr = BracketHelper.try_parse(ttt, BracketParseAttr.NO, 100)
                     if (brr is not None and (brr.length_char < 7) and ttt.is_char('(')): 
                         ttt = brr.end_token
                         brr = (None)
                         continue
                 elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): 
                     pass
                 elif (brr is not None or has_named): 
                     if (BracketHelper.can_be_start_of_sequence(ttt, True, False)): 
                         ttt = ttt.next0_
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
                     typ22 = None
                     if (npt is not None): 
                         ttt = npt.end_token
                         if (npt.end_token.is_value("ДОГОВОР", None)): 
                             continue
                     else: 
                         ttok = None
                         if (isinstance(ttt, MetaToken)): 
                             ttok = ParticipantToken.M_ONTOLOGY.try_parse(ttt.begin_token, TerminParseAttr.NO)
                         if (ttok is not None): 
                             typ22 = ttok.termin.canonic_text
                         elif (has_named and ttt.morph.class0_.is_adjective): 
                             typ22 = ttt.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
                         elif (brr is not None): 
                             continue
                         else: 
                             break
                     if (BracketHelper.can_be_end_of_sequence(ttt.next0_, True, None, False)): 
                         ttt = ttt.next0_
                     if (brr is not None): 
                         if (ttt.next0_ is None): 
                             ttt = brr.end_token
                             continue
                         ttt = ttt.next0_
                     if (not has_named and typ22 is None): 
                         if (ParticipantToken.M_ONTOLOGY.try_parse(npt.begin_token, TerminParseAttr.NO) is None): 
                             break
                     re = ParticipantToken._new1573(t, ttt, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = (Utils.ifNotNull(typ22, npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)))
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 elif ((ttt.is_value("ЗАРЕГИСТРИРОВАННЫЙ", None) or ttt.is_value("КАЧЕСТВО", None) or ttt.is_value("ПРОЖИВАЮЩИЙ", None)) or ttt.is_value("ЗАРЕГ", None)): 
                     pass
                 elif (ttt.get_referent() == r1): 
                     pass
                 elif ((isinstance(ttt.get_referent(), PersonIdentityReferent)) or (isinstance(ttt.get_referent(), AddressReferent))): 
                     if (add_refs is None): 
                         add_refs = list()
                     add_refs.append(ttt.get_referent())
                 else: 
                     prr = ttt.kit.process_referent("PERSONPROPERTY", ttt)
                     if (prr is not None): 
                         ttt = prr.end_token
                         continue
                     if (isinstance(ttt.get_referent(), GeoReferent)): 
                         continue
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None): 
                         if ((npt.noun.is_value("МЕСТО", None) or npt.noun.is_value("ЖИТЕЛЬСТВО", None) or npt.noun.is_value("ПРЕДПРИНИМАТЕЛЬ", None)) or npt.noun.is_value("ПОЛ", None) or npt.noun.is_value("РОЖДЕНИЕ", None)): 
                             ttt = npt.end_token
                             continue
                     if (ttt.is_newline_before): 
                         break
                     if (ttt.length_char < 3): 
                         continue
                     mc = ttt.get_morph_class_in_dictionary()
                     if (mc.is_adverb or mc.is_adjective): 
                         continue
                     if (ttt.chars.is_all_upper): 
                         continue
                     break
             if (end_side is not None or ((add_refs is not None and t.previous is not None and t.previous.is_and))): 
                 re = ParticipantToken._new1573(t, Utils.ifNotNull(end_side, t), ParticipantToken.Kinds.NAMEDAS)
                 re.typ = (None)
                 re.parts = list()
                 re.parts.append(r1)
                 if (add_refs is not None): 
                     re.parts.extend(add_refs)
                 return re
         too = ParticipantToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
         if (too is not None): 
             if ((isinstance(t.previous, TextToken)) and t.previous.is_value("ЛИЦО", None)): 
                 too = (None)
         if (too is not None and too.termin.tag is not None and too.termin.canonic_text != "СТОРОНА"): 
             tt1 = too.end_token.next0_
             if (tt1 is not None): 
                 if (tt1.is_hiphen or tt1.is_char(':')): 
                     tt1 = tt1.next0_
             if (isinstance(tt1, ReferentToken)): 
                 r1 = tt1.get_referent()
                 if ((isinstance(r1, PersonReferent)) or (isinstance(r1, OrganizationReferent))): 
                     re = ParticipantToken._new1573(t, tt1, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = too.termin.canonic_text
                     re.parts = list()
                     re.parts.append(r1)
                     return re
     add_typ1 = (None if p1 is None else p1.typ)
     add_typ2 = (None if p2 is None else p2.typ)
     if (BracketHelper.can_be_start_of_sequence(tt, False, False) and tt.next0_ is not None): 
         br = True
         tt = tt.next0_
     term1 = None
     term2 = None
     if (add_typ1 is not None and add_typ1.find(' ') > 0 and not add_typ1.startswith("СТОРОНА")): 
         term1 = Termin(add_typ1)
     if (add_typ2 is not None and add_typ2.find(' ') > 0 and not add_typ2.startswith("СТОРОНА")): 
         term2 = Termin(add_typ2)
     named = False
     typ_ = None
     t1 = None
     t0 = tt
     first_pass3285 = True
     while True:
         if first_pass3285: first_pass3285 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition and typ_ is not None): 
             continue
         if (tt.is_char_of("(:)") or tt.is_hiphen): 
             continue
         if (tt.is_table_control_char): 
             break
         if (tt.is_newline_before and tt != t0): 
             if (isinstance(tt, NumberToken)): 
                 break
             if ((isinstance(tt, TextToken)) and (isinstance(tt.previous, TextToken))): 
                 if (tt.previous.is_value(tt.term, None)): 
                     break
         if (BracketHelper.is_bracket(tt, False)): 
             continue
         tok = (ParticipantToken.M_ONTOLOGY.try_parse(tt, TerminParseAttr.NO) if ParticipantToken.M_ONTOLOGY is not None else None)
         if (tok is not None and (isinstance(tt.previous, TextToken))): 
             if (tt.previous.is_value("ЛИЦО", None)): 
                 return None
         if (tok is None): 
             if (add_typ1 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ1, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ1, None)))))): 
                 if (typ_ is not None): 
                     if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): 
                         break
                 typ_ = add_typ1
                 t1 = tt
                 continue
             if (add_typ2 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ2, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ2, None)))))): 
                 if (typ_ is not None): 
                     if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): 
                         break
                 typ_ = add_typ2
                 t1 = tt
                 continue
             if (tt.chars.is_letter): 
                 if (term1 is not None): 
                     tok1 = term1.try_parse(tt, TerminParseAttr.NO)
                     if (tok1 is not None): 
                         if (typ_ is not None): 
                             if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): 
                                 break
                         typ_ = add_typ1
                         tt = tok1.end_token
                         t1 = tt
                         continue
                 if (term2 is not None): 
                     tok2 = term2.try_parse(tt, TerminParseAttr.NO)
                     if (tok2 is not None): 
                         if (typ_ is not None): 
                             if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): 
                                 break
                         typ_ = add_typ2
                         tt = tok2.end_token
                         t1 = tt
                         continue
                 if (named and tt.get_morph_class_in_dictionary().is_noun): 
                     if (not tt.chars.is_all_lower or BracketHelper.is_bracket(tt.previous, True)): 
                         if (DecreeToken.is_keyword(tt, False) is None): 
                             val = tt.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                             if (typ_ is not None): 
                                 if (not ParticipantToken.__is_types_equal(typ_, val)): 
                                     break
                             typ_ = val
                             t1 = tt
                             continue
             if (named and typ_ is None and is_contract): 
                 if ((isinstance(tt, TextToken)) and tt.chars.is_cyrillic_letter and tt.chars.is_capital_upper): 
                     dc = tt.get_morph_class_in_dictionary()
                     if (dc.is_undefined or dc.is_noun): 
                         dt = DecreeToken.try_attach(tt, None, False)
                         ok = True
                         if (dt is not None): 
                             ok = False
                         elif (tt.is_value("СТОРОНА", None)): 
                             ok = False
                         if (ok): 
                             typ_ = tt.lemma
                             t1 = tt
                             continue
                     if (dc.is_adjective): 
                         npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is not None and len(npt.adjectives) > 0 and npt.noun.get_morph_class_in_dictionary().is_noun): 
                             typ_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                             t1 = npt.end_token
                             continue
             if (tt == t): 
                 break
             if ((isinstance(tt, NumberToken)) or tt.is_char('.')): 
                 break
             if (tt.length_char < 4): 
                 if (typ_ is not None): 
                     continue
             break
         if (tok.termin.tag is None): 
             named = True
         else: 
             if (typ_ is not None): 
                 break
             if (tok.termin.canonic_text == "СТОРОНА"): 
                 tt1 = tt.next0_
                 if (tt1 is not None and tt1.is_hiphen): 
                     tt1 = tt1.next0_
                 if (not (isinstance(tt1, NumberToken))): 
                     break
                 if (tt1.is_newline_before): 
                     break
                 typ_ = "{0} {1}".format(tok.termin.canonic_text, tt1.value)
                 t1 = tt1
             else: 
                 typ_ = tok.termin.canonic_text
                 t1 = tok.end_token
             break
         tt = tok.end_token
     if (typ_ is None): 
         return None
     if (not named and t1 != t and not typ_.startswith("СТОРОНА")): 
         if (not ParticipantToken.__is_types_equal(typ_, add_typ1) and not ParticipantToken.__is_types_equal(typ_, add_typ2)): 
             return None
     if (BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
         t1 = t1.next0_
         if (not t.is_whitespace_before and BracketHelper.can_be_start_of_sequence(t.previous, False, False)): 
             t = t.previous
     elif (BracketHelper.can_be_start_of_sequence(t, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, True, t, True)): 
         t1 = t1.next0_
     if (br and t1.next0_ is not None and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
         t1 = t1.next0_
     res = ParticipantToken._new1578(t, t1, (ParticipantToken.Kinds.NAMEDAS if named else ParticipantToken.Kinds.PURE), typ_)
     if (t.is_char(':')): 
         res.begin_token = t.next0_
     return res
예제 #8
0
 def __try_attach_contract_ground(t : 'Token', ip : 'InstrumentParticipantReferent', can_be_passport : bool=False) -> 'Token':
     ok = False
     first_pass3289 = True
     while True:
         if first_pass3289: first_pass3289 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_char(',') or t.morph.class0_.is_preposition): 
             continue
         if (t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t = br.end_token
                 continue
         if (t.is_value("ОСНОВАНИЕ", None) or t.is_value("ДЕЙСТВОВАТЬ", None) or t.is_value("ДЕЙСТВУЮЩИЙ", None)): 
             ok = True
             if (t.next0_ is not None and t.next0_.is_char('(')): 
                 br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100)
                 if (br is not None and (br.length_char < 10)): 
                     t = br.end_token
             continue
         dr = Utils.asObjectOrNull(t.get_referent(), DecreeReferent)
         if (dr is not None): 
             ip.ground = dr
             return t
         pir = Utils.asObjectOrNull(t.get_referent(), PersonIdentityReferent)
         if (pir is not None and can_be_passport): 
             if (pir.typ is not None and not "паспорт" in pir.typ): 
                 ip.ground = pir
                 return t
         if (t.is_value("УСТАВ", None)): 
             ip.ground = t.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             return t
         if (t.is_value("ДОВЕРЕННОСТЬ", None)): 
             dts = DecreeToken.try_attach_list(t.next0_, None, 10, False)
             if (dts is None): 
                 has_spec = False
                 ttt = t.next0_
                 first_pass3290 = True
                 while True:
                     if first_pass3290: first_pass3290 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None and ((ttt.end_char - t.end_char) < 200))): break
                     if (ttt.is_comma): 
                         continue
                     if (ttt.is_value("УДОСТОВЕРИТЬ", None) or ttt.is_value("УДОСТОВЕРЯТЬ", None)): 
                         has_spec = True
                         continue
                     dt = DecreeToken.try_attach(ttt, None, False)
                     if (dt is not None): 
                         if (dt.typ == DecreeToken.ItemType.DATE or dt.typ == DecreeToken.ItemType.NUMBER): 
                             dts = DecreeToken.try_attach_list(ttt, None, 10, False)
                             break
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None): 
                         if (npt.end_token.is_value("НОТАРИУС", None)): 
                             ttt = npt.end_token
                             has_spec = True
                             continue
                     if (ttt.get_referent() is not None): 
                         if (has_spec): 
                             continue
                     break
             if (dts is not None and len(dts) > 0): 
                 t0 = t
                 dr = DecreeReferent()
                 dr.typ = "ДОВЕРЕННОСТЬ"
                 for d in dts: 
                     if (d.typ == DecreeToken.ItemType.DATE): 
                         dr._add_date(d)
                         t = d.end_token
                     elif (d.typ == DecreeToken.ItemType.NUMBER): 
                         dr._add_number(d)
                         t = d.end_token
                     else: 
                         break
                 ad = t.kit.get_analyzer_data_by_analyzer_name(InstrumentAnalyzer.ANALYZER_NAME)
                 ip.ground = ad.register_referent(dr)
                 rt = ReferentToken(Utils.asObjectOrNull(ip.ground, Referent), t0, t)
                 t.kit.embed_token(rt)
                 return rt
             ip.ground = "ДОВЕРЕННОСТЬ"
             return t
         break
     return None
예제 #9
0
 def try_parse(t : 'Token', attrs : 'BracketParseAttr'=BracketParseAttr.NO, max_tokens : int=100) -> 'BracketSequenceToken':
     """ Попробовать восстановить последовательность, обрамляемую кавычками или скобками. Поддерживается
     вложенность, возможность отсутствия закрывающего элемента и др.
     
     Args:
         t(Token): начальный токен
         attrs(BracketParseAttr): параметры выделения
         max_tokens(int): максимально токенов (вдруг забыли закрывающую кавычку)
     
     Returns:
         BracketSequenceToken: метатокен BracketSequenceToken
     
     """
     t0 = t
     cou = 0
     if (not BracketHelper.can_be_start_of_sequence(t0, False, False)): 
         return None
     br_list = list()
     br_list.append(BracketHelper.Bracket(t0))
     cou = 0
     crlf = 0
     last = None
     lev = 1
     is_assim = br_list[0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find(br_list[0].char0_) >= 0
     gen_case = False
     t = t0.next0_
     first_pass3057 = True
     while True:
         if first_pass3057: first_pass3057 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         last = t
         if (t.is_char_of(BracketHelper.M_OPEN_CHARS) or t.is_char_of(BracketHelper.M_CLOSE_CHARS)): 
             if (t.is_newline_before and (((attrs) & (BracketParseAttr.CANBEMANYLINES))) == (BracketParseAttr.NO)): 
                 if (t.whitespaces_before_count > 10 or BracketHelper.can_be_start_of_sequence(t, False, False)): 
                     if (t.is_char('(') and not t0.is_char('(')): 
                         pass
                     else: 
                         last = t.previous
                         break
             bb = BracketHelper.Bracket(t)
             br_list.append(bb)
             if (len(br_list) > 20): 
                 break
             if ((len(br_list) == 3 and br_list[1].can_be_open and bb.can_be_close) and BracketHelper.__must_be_close_char(bb.char0_, br_list[1].char0_) and BracketHelper.__must_be_close_char(bb.char0_, br_list[0].char0_)): 
                 ok = False
                 tt = t.next0_
                 while tt is not None: 
                     if (tt.is_newline_before): 
                         break
                     if (tt.is_char(',')): 
                         break
                     if (tt.is_char('.')): 
                         tt = tt.next0_
                         while tt is not None: 
                             if (tt.is_newline_before): 
                                 break
                             elif (tt.is_char_of(BracketHelper.M_OPEN_CHARS) or tt.is_char_of(BracketHelper.M_CLOSE_CHARS)): 
                                 bb2 = BracketHelper.Bracket(tt)
                                 if (BracketHelper.can_be_end_of_sequence(tt, False, None, False) and BracketHelper.__can_be_close_char(bb2.char0_, br_list[0].char0_)): 
                                     ok = True
                                 break
                             tt = tt.next0_
                         break
                     if (t.is_char_of(BracketHelper.M_OPEN_CHARS) or t.is_char_of(BracketHelper.M_CLOSE_CHARS)): 
                         ok = True
                         break
                     tt = tt.next0_
                 if (not ok): 
                     break
             if (is_assim): 
                 if (bb.can_be_open and not bb.can_be_close and bb.char0_ == br_list[0].char0_): 
                     lev += 1
                 elif (bb.can_be_close and not bb.can_be_open and BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_) == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)): 
                     lev -= 1
                     if (lev == 0): 
                         break
         else: 
             cou += 1
             if (cou > max_tokens): 
                 break
             if ((((attrs) & (BracketParseAttr.CANCONTAINSVERBS))) == (BracketParseAttr.NO)): 
                 if (t.morph.language.is_cyrillic): 
                     if (t.get_morph_class_in_dictionary() == MorphClass.VERB): 
                         if (not t.morph.class0_.is_adjective and not t.morph.contains_attr("страд.з.", None)): 
                             if (t.chars.is_all_lower): 
                                 norm = t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
                                 if (not LanguageHelper.ends_with(norm, "СЯ")): 
                                     if (len(br_list) > 1): 
                                         break
                                     if (br_list[0].char0_ != '('): 
                                         break
                 elif (t.morph.language.is_en): 
                     if (t.morph.class0_ == MorphClass.VERB and t.chars.is_all_lower): 
                         break
                 r = t.get_referent()
                 if (r is not None and r.type_name == "ADDRESS"): 
                     if (not t0.is_char('(')): 
                         break
         if ((((attrs) & (BracketParseAttr.CANBEMANYLINES))) != (BracketParseAttr.NO)): 
             if (t.is_newline_before): 
                 if (t.newlines_before_count > 1): 
                     break
                 crlf += 1
             continue
         if (t.is_newline_before): 
             if (t.whitespaces_before_count > 15): 
                 last = t.previous
                 break
             crlf += 1
             if (not t.chars.is_all_lower): 
                 if (MiscHelper.can_be_start_of_sentence(t)): 
                     has = False
                     tt = t.next0_
                     while tt is not None: 
                         if (tt.is_newline_before): 
                             break
                         elif (tt.length_char == 1 and tt.is_char_of(BracketHelper.M_OPEN_CHARS) and tt.is_whitespace_before): 
                             break
                         elif (tt.length_char == 1 and tt.is_char_of(BracketHelper.M_CLOSE_CHARS) and not tt.is_whitespace_before): 
                             has = True
                             break
                         tt = tt.next0_
                     if (not has): 
                         last = t.previous
                         break
             if ((isinstance(t.previous, MetaToken)) and BracketHelper.can_be_end_of_sequence(t.previous.end_token, False, None, False)): 
                 last = t.previous
                 break
         if (crlf > 1): 
             if (len(br_list) > 1): 
                 break
             if (crlf > 10): 
                 break
         if (t.is_char(';') and t.is_newline_after): 
             break
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
         if (npt is not None): 
             if (t.is_newline_before): 
                 gen_case = npt.morph.case_.is_genitive
             t = npt.end_token
             last = t
     if ((len(br_list) == 1 and br_list[0].can_be_open and (isinstance(last, MetaToken))) and last.is_newline_after): 
         if (BracketHelper.can_be_end_of_sequence(last.end_token, False, None, False)): 
             return BracketSequenceToken(t0, last)
     if ((len(br_list) == 1 and br_list[0].can_be_open and gen_case) and last.is_newline_after and crlf <= 2): 
         return BracketSequenceToken(t0, last)
     if (len(br_list) < 1): 
         return None
     i = 1
     while i < (len(br_list) - 1): 
         if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'): 
             br_list[i].can_be_open = True
             br_list[i + 1].can_be_close = True
         i += 1
     internals = None
     while len(br_list) > 3:
         i = len(br_list) - 1
         if ((br_list[i].can_be_close and br_list[i - 1].can_be_open and not BracketHelper.__can_be_close_char(br_list[i].char0_, br_list[0].char0_)) and BracketHelper.__can_be_close_char(br_list[i].char0_, br_list[i - 1].char0_)): 
             del br_list[len(br_list) - 2:len(br_list) - 2+2]
             continue
         break
     while len(br_list) >= 4:
         changed = False
         i = 1
         while i < (len(br_list) - 2): 
             if ((br_list[i].can_be_open and not br_list[i].can_be_close and br_list[i + 1].can_be_close) and not br_list[i + 1].can_be_open): 
                 ok = False
                 if (BracketHelper.__must_be_close_char(br_list[i + 1].char0_, br_list[i].char0_) or br_list[i].char0_ != br_list[0].char0_): 
                     ok = True
                     if ((i == 1 and ((i + 2) < len(br_list)) and br_list[i + 2].char0_ == ')') and br_list[i + 1].char0_ != ')' and BracketHelper.__can_be_close_char(br_list[i + 1].char0_, br_list[i - 1].char0_)): 
                         br_list[i + 2] = br_list[i + 1]
                 elif (i > 1 and ((i + 2) < len(br_list)) and BracketHelper.__must_be_close_char(br_list[i + 2].char0_, br_list[i - 1].char0_)): 
                     ok = True
                 if (ok): 
                     if (internals is None): 
                         internals = list()
                     internals.append(BracketSequenceToken(br_list[i].source, br_list[i + 1].source))
                     del br_list[i:i+2]
                     changed = True
                     break
             i += 1
         if (not changed): 
             break
     res = None
     if ((len(br_list) >= 4 and br_list[1].can_be_open and br_list[2].can_be_close) and br_list[3].can_be_close and not br_list[3].can_be_open): 
         if (BracketHelper.__can_be_close_char(br_list[3].char0_, br_list[0].char0_)): 
             res = BracketSequenceToken(br_list[0].source, br_list[3].source)
             if (br_list[0].source.next0_ != br_list[1].source or br_list[2].source.next0_ != br_list[3].source): 
                 res.internal.append(BracketSequenceToken(br_list[1].source, br_list[2].source))
             if (internals is not None): 
                 res.internal.extend(internals)
     if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close) and not br_list[2].can_be_open): 
         if ((((attrs) & (BracketParseAttr.NEARCLOSEBRACKET))) != (BracketParseAttr.NO)): 
             if (BracketHelper.__can_be_close_char(br_list[1].char0_, br_list[0].char0_)): 
                 return BracketSequenceToken(br_list[0].source, br_list[1].source)
         ok = True
         if (BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[0].char0_) and BracketHelper.__can_be_close_char(br_list[1].char0_, br_list[0].char0_) and br_list[1].can_be_close): 
             t = br_list[1].source
             while t != br_list[2].source and t is not None: 
                 if (t.is_newline_before): 
                     ok = False
                     break
                 if (t.chars.is_letter and t.chars.is_all_lower): 
                     ok = False
                     break
                 npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
                 if (npt is not None): 
                     t = npt.end_token
                 t = t.next0_
             if (ok): 
                 t = br_list[0].source.next0_
                 while t != br_list[1].source and t is not None: 
                     if (t.is_newline_before): 
                         return BracketSequenceToken(br_list[0].source, t.previous)
                     t = t.next0_
             lev1 = 0
             tt = br_list[0].source.previous
             first_pass3058 = True
             while True:
                 if first_pass3058: first_pass3058 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after or tt.is_table_control_char): 
                     break
                 if (not (isinstance(tt, TextToken))): 
                     continue
                 if (tt.chars.is_letter or tt.length_char > 1): 
                     continue
                 ch = tt.term[0]
                 if (BracketHelper.__can_be_close_char(ch, br_list[0].char0_)): 
                     lev1 += 1
                 elif (BracketHelper.__can_be_close_char(br_list[1].char0_, ch)): 
                     lev1 -= 1
                     if (lev1 < 0): 
                         return BracketSequenceToken(br_list[0].source, br_list[1].source)
         if (ok and BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[0].char0_)): 
             intern = BracketSequenceToken(br_list[1].source, br_list[2].source)
             res = BracketSequenceToken(br_list[0].source, br_list[2].source)
             res.internal.append(intern)
         elif (ok and BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[1].char0_) and br_list[0].can_be_open): 
             if (BracketHelper.__can_be_close_char(br_list[2].char0_, br_list[0].char0_)): 
                 intern = BracketSequenceToken(br_list[1].source, br_list[2].source)
                 res = BracketSequenceToken(br_list[0].source, br_list[2].source)
                 res.internal.append(intern)
             elif (len(br_list) == 3): 
                 return None
     if (res is None and len(br_list) > 1 and br_list[1].can_be_close): 
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None and len(br_list) > 1 and BracketHelper.__can_be_close_char(br_list[1].char0_, br_list[0].char0_)): 
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None and len(br_list) == 2 and br_list[0].char0_ == br_list[1].char0_): 
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is not None and internals is not None): 
         for i in internals: 
             if (i.begin_char < res.end_char): 
                 res.internal.append(i)
     if (res is None): 
         cou = 0
         tt = t0.next0_
         first_pass3059 = True
         while True:
             if first_pass3059: first_pass3059 = False
             else: tt = tt.next0_; cou += 1
             if (not (tt is not None)): break
             if (tt.is_table_control_char): 
                 break
             if (MiscHelper.can_be_start_of_sentence(tt)): 
                 break
             if (max_tokens > 0 and cou > max_tokens): 
                 break
             mt = Utils.asObjectOrNull(tt, MetaToken)
             if (mt is None): 
                 continue
             if (isinstance(mt.end_token, TextToken)): 
                 if (mt.end_token.is_char_of(BracketHelper.M_CLOSE_CHARS)): 
                     bb = BracketHelper.Bracket(Utils.asObjectOrNull(mt.end_token, TextToken))
                     if (bb.can_be_close and BracketHelper.__can_be_close_char(bb.char0_, br_list[0].char0_)): 
                         return BracketSequenceToken(t0, tt)
     return res
예제 #10
0
 def try_attach(t: 'Token') -> 'TitleItemToken':
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is not None):
         t1 = tt
         if (tt.term == "ТЕМА"):
             tit = TitleItemToken.try_attach(tt.next0_)
             if (tit is not None and tit.typ == TitleItemToken.Types.TYP):
                 t1 = tit.end_token
                 if (t1.next0_ is not None and t1.next0_.is_char(':')):
                     t1 = t1.next0_
                 return TitleItemToken._new2655(
                     t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value)
             if (tt.next0_ is not None and tt.next0_.is_char(':')):
                 t1 = tt.next0_
             return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПО" or tt.term == "НА"):
             if (tt.next0_ is not None
                     and tt.next0_.is_value("ТЕМА", None)):
                 t1 = tt.next0_
                 if (t1.next0_ is not None and t1.next0_.is_char(':')):
                     t1 = t1.next0_
                 return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"):
             tt2 = tt.next0_
             if (tt2 is not None and tt2.is_char('.')):
                 tt2 = tt2.next0_
             if (isinstance(tt2, TextToken)):
                 if (tt2.term == "C" or tt2.term == "С"):
                     tt2 = tt2.next0_
                     if (isinstance(tt2, TextToken)):
                         return TitleItemToken(
                             t, tt2, TitleItemToken.Types.TRANSLATE)
         if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION"
                 or tt.term == "СЕКЦІЯ"):
             t1 = tt.next0_
             if (t1 is not None and t1.is_char(':')):
                 t1 = t1.next0_
             br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100)
             if (br is not None):
                 t1 = br.end_token
             elif (t1 != tt.next0_):
                 while t1 is not None:
                     if (t1.is_newline_after):
                         break
                     t1 = t1.next0_
                 if (t1 is None):
                     return None
             if (t1 != tt.next0_):
                 return TitleItemToken(tt, t1, TitleItemToken.Types.DUST)
         t1 = (None)
         if (tt.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")):
             t1 = tt.next0_
         elif (tt.morph.class0_.is_preposition and tt.next0_ is not None
               and tt.next0_.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")):
             t1 = tt.next0_.next0_
         elif (tt.is_char('/') and tt.is_newline_before):
             t1 = tt.next0_
         if (t1 is not None):
             if (t1.is_char_of(":") or t1.is_hiphen):
                 t1 = t1.next0_
             spec = TitleItemToken.__try_attach_speciality(t1, True)
             if (spec is not None):
                 spec.begin_token = t
                 return spec
     sss = TitleItemToken.__try_attach_speciality(t, False)
     if (sss is not None):
         return sss
     if (isinstance(t, ReferentToken)):
         return None
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is not None):
         s = npt.get_normal_case_text(None, MorphNumber.UNDEFINED,
                                      MorphGender.UNDEFINED, False)
         tok = TitleItemToken.M_TERMINS.try_parse(npt.end_token,
                                                  TerminParseAttr.NO)
         if (tok is not None):
             ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types)
             if (ty == TitleItemToken.Types.TYP):
                 tit = TitleItemToken.try_attach(tok.end_token.next0_)
                 if (tit is not None
                         and tit.typ == TitleItemToken.Types.THEME):
                     return TitleItemToken._new2655(
                         npt.begin_token, tit.end_token,
                         TitleItemToken.Types.TYPANDTHEME, s)
                 if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"):
                     return None
                 t1 = tok.end_token
                 if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"):
                     err = 0
                     ttt = t1.next0_
                     first_pass3394 = True
                     while True:
                         if first_pass3394: first_pass3394 = False
                         else: ttt = ttt.next0_
                         if (not (ttt is not None)): break
                         if (ttt.morph.class0_.is_preposition):
                             continue
                         if (ttt.is_value("СОИСКАНИЕ", "")):
                             continue
                         npt1 = NounPhraseHelper.try_parse(
                             ttt, NounPhraseParseAttr.NO, 0, None)
                         if (npt1 is not None and npt1.noun.is_value(
                                 "СТЕПЕНЬ", "СТУПІНЬ")):
                             ttt = npt1.end_token
                             t1 = ttt
                             continue
                         rt = t1.kit.process_referent("PERSON", ttt)
                         if (rt is not None and (isinstance(
                                 rt.referent, PersonPropertyReferent))):
                             ppr = Utils.asObjectOrNull(
                                 rt.referent, PersonPropertyReferent)
                             if (ppr.name == "доктор наук"):
                                 t1 = rt.end_token
                                 s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "кандидат наук"):
                                 t1 = rt.end_token
                                 s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "магистр"):
                                 t1 = rt.end_token
                                 s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                         if (ttt.is_value("ДОКТОР", None)
                                 or ttt.is_value("КАНДИДАТ", None)
                                 or ttt.is_value("МАГИСТР", "МАГІСТР")):
                             t1 = ttt
                             npt1 = NounPhraseHelper.try_parse(
                                 ttt.next0_, NounPhraseParseAttr.NO, 0,
                                 None)
                             if (npt1 is not None
                                     and npt1.end_token.is_value(
                                         "НАУК", None)):
                                 t1 = npt1.end_token
                             s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                  if ttt.is_value("МАГИСТР", "МАГІСТР") else
                                  ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value(
                                      "ДОКТОР", None) else
                                   "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"))
                             break
                         err += 1
                         if (err > 3):
                             break
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 if (s.endswith("ОТЧЕТ") and t1.next0_ is not None
                         and t1.next0_.is_value("О", None)):
                     npt1 = NounPhraseHelper.try_parse(
                         t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0,
                         None)
                     if (npt1 is not None
                             and npt1.morph.case_.is_prepositional):
                         t1 = npt1.end_token
                 return TitleItemToken._new2655(npt.begin_token, t1, ty, s)
     tok1 = TitleItemToken.M_TERMINS.try_parse(t, TerminParseAttr.NO)
     if (tok1 is not None):
         t1 = tok1.end_token
         re = TitleItemToken(
             tok1.begin_token, t1,
             Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
         return re
     if (BracketHelper.can_be_start_of_sequence(t, False, False)):
         tok1 = TitleItemToken.M_TERMINS.try_parse(t.next0_,
                                                   TerminParseAttr.NO)
         if (tok1 is not None and BracketHelper.can_be_end_of_sequence(
                 tok1.end_token.next0_, False, None, False)):
             t1 = tok1.end_token.next0_
             return TitleItemToken(
                 tok1.begin_token, t1,
                 Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
     return None
예제 #11
0
 def try_attach_territory(
         li: typing.List['TerrItemToken'],
         ad: 'AnalyzerData',
         attach_always: bool = False,
         cits: typing.List['CityItemToken'] = None,
         exists: typing.List['GeoReferent'] = None) -> 'ReferentToken':
     if (li is None or len(li) == 0):
         return None
     ex_obj = None
     new_name = None
     adj_list = list()
     noun = None
     add_noun = None
     rt = TerrAttachHelper.__try_attach_moscowao(li, ad)
     if (rt is not None):
         return rt
     if (li[0].termin_item is not None
             and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"):
         res2 = TerrAttachHelper.__try_attach_pure_terr(li, ad)
         return res2
     if (len(li) == 2):
         if (li[0].rzd is not None and li[1].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._add_name(li[1].rzd_dir)
             rzd._add_typ_ter(li[0].kit.base_language)
             rzd.add_slot(GeoReferent.ATTR_REF, li[0].rzd.referent, False,
                          0)
             rzd.add_ext_referent(li[0].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
         if (li[1].rzd is not None and li[0].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._add_name(li[0].rzd_dir)
             rzd._add_typ_ter(li[0].kit.base_language)
             rzd.add_slot(GeoReferent.ATTR_REF, li[1].rzd.referent, False,
                          0)
             rzd.add_ext_referent(li[1].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
     can_be_city_before = False
     adj_terr_before = False
     if (cits is not None):
         if (cits[0].typ == CityItemToken.ItemType.CITY):
             can_be_city_before = True
         elif (cits[0].typ == CityItemToken.ItemType.NOUN
               and len(cits) > 1):
             can_be_city_before = True
     k = 0
     k = 0
     while k < len(li):
         if (li[k].onto_item is not None):
             if (ex_obj is not None or new_name is not None):
                 break
             if (noun is not None):
                 if (k == 1):
                     if (noun.termin_item.canonic_text == "РАЙОН"
                             or noun.termin_item.canonic_text == "ОБЛАСТЬ"
                             or noun.termin_item.canonic_text == "СОЮЗ"):
                         if (isinstance(li[k].onto_item.referent,
                                        GeoReferent)):
                             if (li[k].onto_item.referent.is_state):
                                 break
                         ok = False
                         tt = li[k].end_token.next0_
                         if (tt is None):
                             ok = True
                         elif (tt.is_char_of(",.")):
                             ok = True
                         if (not ok):
                             ok = MiscLocationHelper.check_geo_object_before(
                                 li[0].begin_token)
                         if (not ok):
                             adr = AddressItemToken.try_parse(
                                 tt, None, False, False, None)
                             if (adr is not None):
                                 if (adr.typ ==
                                         AddressItemToken.ItemType.STREET):
                                     ok = True
                         if (not ok):
                             break
                     if (li[k].onto_item is not None):
                         if (noun.begin_token.is_value("МО", None)
                                 or noun.begin_token.is_value("ЛО", None)):
                             return None
             ex_obj = li[k]
         elif (li[k].termin_item is not None):
             if (noun is not None):
                 break
             if (li[k].termin_item.is_always_prefix and k > 0):
                 break
             if (k > 0 and li[k].is_doubt):
                 if (li[k].begin_token == li[k].end_token
                         and li[k].begin_token.is_value("ЗАО", None)):
                     break
             if (li[k].termin_item.is_adjective
                     or li[k].is_geo_in_dictionary):
                 adj_list.append(li[k])
             else:
                 if (ex_obj is not None):
                     geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                                 GeoReferent)
                     if (geo_ is None):
                         break
                     if (ex_obj.is_adjective and
                         ((li[k].termin_item.canonic_text == "СОЮЗ" or
                           li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                         str0_ = str(ex_obj.onto_item)
                         if (not li[k].termin_item.canonic_text in str0_):
                             return None
                     if (li[k].termin_item.canonic_text == "РАЙОН"
                             or li[k].termin_item.canonic_text == "ОКРУГ"
                             or li[k].termin_item.canonic_text == "КРАЙ"):
                         tmp = io.StringIO()
                         for s in geo_.slots:
                             if (s.type_name == GeoReferent.ATTR_TYPE):
                                 print("{0};".format(s.value),
                                       end="",
                                       file=tmp,
                                       flush=True)
                         if (not li[k].termin_item.canonic_text
                                 in Utils.toStringStringIO(tmp).upper()):
                             if (k != 1 or new_name is not None):
                                 break
                             new_name = li[0]
                             new_name.is_adjective = True
                             new_name.onto_item = (None)
                             ex_obj = (None)
                 noun = li[k]
                 if (k == 0):
                     tt = TerrItemToken.try_parse(
                         li[k].begin_token.previous, None, True, False,
                         None)
                     if (tt is not None and tt.morph.class0_.is_adjective):
                         adj_terr_before = True
         else:
             if (ex_obj is not None):
                 break
             if (new_name is not None):
                 break
             new_name = li[k]
         k += 1
     name = None
     alt_name = None
     full_name = None
     morph_ = None
     if (ex_obj is not None):
         if (ex_obj.is_adjective and not ex_obj.morph.language.is_en
                 and noun is None):
             if (attach_always and ex_obj.end_token.next0_ is not None):
                 npt = NounPhraseHelper.try_parse(ex_obj.begin_token,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (ex_obj.end_token.next0_.is_comma_and):
                     pass
                 elif (npt is None):
                     pass
                 else:
                     str0_ = StreetItemToken.try_parse(
                         ex_obj.end_token.next0_, None, False, None, False)
                     if (str0_ is not None):
                         if (str0_.typ == StreetItemType.NOUN
                                 and str0_.end_token == npt.end_token):
                             return None
             else:
                 cit = CityItemToken.try_parse(ex_obj.end_token.next0_,
                                               None, False, None)
                 if (cit is not None
                         and ((cit.typ == CityItemToken.ItemType.NOUN
                               or cit.typ == CityItemToken.ItemType.CITY))):
                     npt = NounPhraseHelper.try_parse(
                         ex_obj.begin_token, NounPhraseParseAttr.NO, 0,
                         None)
                     if (npt is not None
                             and npt.end_token == cit.end_token):
                         pass
                     else:
                         return None
                 elif (ex_obj.begin_token.is_value("ПОДНЕБЕСНЫЙ", None)):
                     pass
                 else:
                     return None
         if (noun is None and ex_obj.can_be_city):
             cit0 = CityItemToken.try_parse_back(
                 ex_obj.begin_token.previous)
             if (cit0 is not None
                     and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                 return None
         if (ex_obj.is_doubt and noun is None):
             ok2 = False
             if (TerrAttachHelper.__can_be_geo_after(
                     ex_obj.end_token.next0_)):
                 ok2 = True
             elif (not ex_obj.can_be_surname and not ex_obj.can_be_city):
                 if ((ex_obj.end_token.next0_ is not None
                      and ex_obj.end_token.next0_.is_char(')')
                      and ex_obj.begin_token.previous is not None)
                         and ex_obj.begin_token.previous.is_char('(')):
                     ok2 = True
                 elif (ex_obj.chars.is_latin_letter
                       and ex_obj.begin_token.previous is not None):
                     if (ex_obj.begin_token.previous.is_value("IN", None)):
                         ok2 = True
                     elif (ex_obj.begin_token.previous.is_value(
                             "THE", None) and
                           ex_obj.begin_token.previous.previous is not None
                           and
                           ex_obj.begin_token.previous.previous.is_value(
                               "IN", None)):
                         ok2 = True
             if (not ok2):
                 cit0 = CityItemToken.try_parse_back(
                     ex_obj.begin_token.previous)
                 if (cit0 is not None
                         and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                     pass
                 elif (MiscLocationHelper.check_geo_object_before(
                         ex_obj.begin_token.previous)):
                     pass
                 else:
                     return None
         name = ex_obj.onto_item.canonic_text
         morph_ = ex_obj.morph
     elif (new_name is not None):
         if (noun is None):
             return None
         j = 1
         while j < k:
             if (li[j].is_newline_before and not li[0].is_newline_before):
                 if (BracketHelper.can_be_start_of_sequence(
                         li[j].begin_token, False, False)):
                     pass
                 else:
                     return None
             j += 1
         morph_ = noun.morph
         if (new_name.is_adjective):
             if (noun.termin_item.acronym == "АО"):
                 if (noun.begin_token != noun.end_token):
                     return None
                 if (new_name.morph.gender != MorphGender.FEMINIE):
                     return None
             geo_before = None
             tt0 = li[0].begin_token.previous
             if (tt0 is not None and tt0.is_comma_and):
                 tt0 = tt0.previous
             if (not li[0].is_newline_before and tt0 is not None):
                 geo_before = (Utils.asObjectOrNull(tt0.get_referent(),
                                                    GeoReferent))
             if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList(
                     li, new_name, 0)):
                 if (noun.termin_item.is_state):
                     return None
                 if (new_name.can_be_surname and geo_before is None):
                     if (((noun.morph.case_)
                          & new_name.morph.case_).is_undefined):
                         return None
                 if (MiscHelper.is_exists_in_dictionary(
                         new_name.begin_token, new_name.end_token,
                     (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                         | MorphClass.VERB)):
                     if (noun.begin_token != new_name.begin_token):
                         if (geo_before is None):
                             if (len(li) == 2 and
                                     TerrAttachHelper.__can_be_geo_after(
                                         li[1].end_token.next0_)):
                                 pass
                             elif (len(li) == 3
                                   and li[2].termin_item is not None
                                   and TerrAttachHelper.__can_be_geo_after(
                                       li[2].end_token.next0_)):
                                 pass
                             elif (new_name.is_geo_in_dictionary):
                                 pass
                             elif (new_name.end_token.is_newline_after):
                                 pass
                             else:
                                 return None
                 npt = NounPhraseHelper.try_parse(
                     new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS,
                     0, None)
                 if (npt is not None
                         and npt.end_token != new_name.end_token):
                     if (len(li) >= 3 and li[2].termin_item is not None
                             and npt.end_token == li[2].end_token):
                         add_noun = li[2]
                     else:
                         return None
                 rtp = new_name.kit.process_referent(
                     "PERSON", new_name.begin_token)
                 if (rtp is not None):
                     return None
                 name = ProperNameHelper.get_name_ex(
                     new_name.begin_token, new_name.end_token,
                     MorphClass.ADJECTIVE, MorphCase.UNDEFINED,
                     noun.termin_item.gender, False, False)
             else:
                 ok = False
                 if (((k + 1) < len(li)) and li[k].termin_item is None
                         and li[k + 1].termin_item is not None):
                     ok = True
                 elif ((k < len(li)) and li[k].onto_item is not None):
                     ok = True
                 elif (k == len(li) and not new_name.is_adj_in_dictionary):
                     ok = True
                 elif (MiscLocationHelper.check_geo_object_before(
                         li[0].begin_token) or can_be_city_before):
                     ok = True
                 elif (MiscLocationHelper.check_geo_object_after(
                         li[k - 1].end_token, False)):
                     ok = True
                 elif (len(li) == 3 and k == 2):
                     cit = CityItemToken.try_parse(li[2].begin_token, None,
                                                   False, None)
                     if (cit is not None):
                         if (cit.typ == CityItemToken.ItemType.CITY
                                 or cit.typ == CityItemToken.ItemType.NOUN):
                             ok = True
                 elif (len(li) == 2):
                     ok = TerrAttachHelper.__can_be_geo_after(
                         li[len(li) - 1].end_token.next0_)
                 if (not ok and not li[0].is_newline_before
                         and not li[0].chars.is_all_lower):
                     rt00 = li[0].kit.process_referent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt00 is not None):
                         ok = True
                 if (noun.termin_item is not None
                         and noun.termin_item.is_strong
                         and new_name.is_adjective):
                     ok = True
                 if (noun.is_doubt and len(adj_list) == 0
                         and geo_before is None):
                     return None
                 name = ProperNameHelper.get_name_ex(
                     new_name.begin_token, new_name.end_token,
                     MorphClass.ADJECTIVE, MorphCase.UNDEFINED,
                     noun.termin_item.gender, False, False)
                 if (not ok and not attach_always):
                     if (MiscHelper.is_exists_in_dictionary(
                             new_name.begin_token, new_name.end_token,
                         (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                             | MorphClass.VERB)):
                         if (exists is not None):
                             for e0_ in exists:
                                 if (e0_.find_slot(GeoReferent.ATTR_NAME,
                                                   name, True) is not None):
                                     ok = True
                                     break
                         if (not ok):
                             return None
                 full_name = "{0} {1}".format(
                     ProperNameHelper.get_name_ex(li[0].begin_token,
                                                  noun.begin_token.previous,
                                                  MorphClass.ADJECTIVE,
                                                  MorphCase.UNDEFINED,
                                                  noun.termin_item.gender,
                                                  False, False),
                     noun.termin_item.canonic_text)
         else:
             if (not attach_always or
                 ((noun.termin_item is not None
                   and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                 is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter
                 if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList(
                         li, new_name, 0)):
                     if (not is_latin):
                         return None
                 if (not new_name.is_district_name
                         and not BracketHelper.can_be_start_of_sequence(
                             new_name.begin_token, False, False)):
                     if (len(adj_list) == 0
                             and MiscHelper.is_exists_in_dictionary(
                                 new_name.begin_token, new_name.end_token,
                                 (MorphClass.NOUN) | MorphClass.PRONOUN)):
                         if (len(li) == 2 and noun.is_city_region
                                 and (noun.whitespaces_after_count < 2)):
                             pass
                         else:
                             return None
                     if (not is_latin):
                         if ((noun.termin_item.is_region
                              and not attach_always and
                              ((not adj_terr_before or new_name.is_doubt)))
                                 and not noun.is_city_region and
                                 not noun.termin_item.is_specific_prefix):
                             if (not MiscLocationHelper.
                                     check_geo_object_before(
                                         noun.begin_token)):
                                 if (not noun.is_doubt and noun.begin_token
                                         != noun.end_token):
                                     pass
                                 elif ((noun.termin_item.is_always_prefix
                                        and len(li) == 2 and li[0] == noun)
                                       and li[1] == new_name):
                                     pass
                                 else:
                                     return None
                         if (noun.is_doubt and len(adj_list) == 0):
                             if (noun.termin_item.acronym == "МО"
                                     or noun.termin_item.acronym == "ЛО"):
                                 if (k == (len(li) - 1)
                                         and li[k].termin_item is not None):
                                     add_noun = li[k]
                                     k += 1
                                 elif (len(li) == 2 and noun == li[0]
                                       and str(new_name).endswith("совет")):
                                     pass
                                 else:
                                     return None
                             else:
                                 return None
                         pers = new_name.kit.process_referent(
                             "PERSON", new_name.begin_token)
                         if (pers is not None):
                             return None
             name = MiscHelper.get_text_value(new_name.begin_token,
                                              new_name.end_token,
                                              GetTextAttr.NO)
             if (new_name.begin_token != new_name.end_token):
                 ttt = new_name.begin_token.next0_
                 while ttt is not None and ttt.end_char <= new_name.end_char:
                     if (ttt.chars.is_letter):
                         ty = TerrItemToken.try_parse(
                             ttt, None, False, False, None)
                         if ((ty is not None and ty.termin_item is not None
                              and noun is not None)
                                 and ((noun.termin_item.canonic_text
                                       in ty.termin_item.canonic_text
                                       or ty.termin_item.canonic_text
                                       in noun.termin_item.canonic_text))):
                             name = MiscHelper.get_text_value(
                                 new_name.begin_token, ttt.previous,
                                 GetTextAttr.NO)
                             break
                     ttt = ttt.next0_
             if (len(adj_list) > 0):
                 npt = NounPhraseHelper.try_parse(adj_list[0].begin_token,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.end_token == noun.end_token):
                     alt_name = "{0} {1}".format(
                         npt.get_normal_case_text(None,
                                                  MorphNumber.UNDEFINED,
                                                  MorphGender.UNDEFINED,
                                                  False), name)
     else:
         if ((len(li) == 1 and noun is not None
              and noun.end_token.next0_ is not None) and (isinstance(
                  noun.end_token.next0_.get_referent(), GeoReferent))):
             g = Utils.asObjectOrNull(noun.end_token.next0_.get_referent(),
                                      GeoReferent)
             if (noun.termin_item is not None):
                 tyy = noun.termin_item.canonic_text.lower()
                 ooo = False
                 if (g.find_slot(GeoReferent.ATTR_TYPE, tyy, True)
                         is not None):
                     ooo = True
                 elif (tyy.endswith("район") and g.find_slot(
                         GeoReferent.ATTR_TYPE, "район", True) is not None):
                     ooo = True
                 if (ooo):
                     return ReferentToken._new734(g, noun.begin_token,
                                                  noun.end_token.next0_,
                                                  noun.begin_token.morph)
         if ((len(li) == 1 and noun == li[0]
              and li[0].termin_item is not None)
                 and TerrItemToken.try_parse(li[0].end_token.next0_, None,
                                             True, False, None) is None and
                 TerrItemToken.try_parse(li[0].begin_token.previous, None,
                                         True, False, None) is None):
             if (li[0].morph.number == MorphNumber.PLURAL):
                 return None
             cou = 0
             str0_ = li[0].termin_item.canonic_text.lower()
             tt = li[0].begin_token.previous
             first_pass3158 = True
             while True:
                 if first_pass3158: first_pass3158 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after):
                     cou += 10
                 else:
                     cou += 1
                 if (cou > 500):
                     break
                 g = Utils.asObjectOrNull(tt.get_referent(), GeoReferent)
                 if (g is None):
                     continue
                 ok = True
                 cou = 0
                 tt = li[0].end_token.next0_
                 first_pass3159 = True
                 while True:
                     if first_pass3159: first_pass3159 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_newline_before):
                         cou += 10
                     else:
                         cou += 1
                     if (cou > 500):
                         break
                     tee = TerrItemToken.try_parse(tt, None, True, False,
                                                   None)
                     if (tee is None):
                         continue
                     ok = False
                     break
                 if (ok):
                     ii = 0
                     while g is not None and (ii < 3):
                         if (g.find_slot(GeoReferent.ATTR_TYPE, str0_, True)
                                 is not None):
                             return ReferentToken._new734(
                                 g, li[0].begin_token, li[0].end_token,
                                 noun.begin_token.morph)
                         g = g.higher
                         ii += 1
                 break
         return None
     ter = None
     if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))):
         ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent))
     else:
         ter = GeoReferent()
         if (ex_obj is not None):
             geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                         GeoReferent)
             if (geo_ is not None and not geo_.is_city):
                 ter._merge_slots2(geo_, li[0].kit.base_language)
             else:
                 ter._add_name(name)
             if (noun is None and ex_obj.can_be_city):
                 ter._add_typ_city(li[0].kit.base_language)
             else:
                 pass
         elif (new_name is not None):
             ter._add_name(name)
             if (alt_name is not None):
                 ter._add_name(alt_name)
         if (noun is not None):
             if (noun.termin_item.canonic_text == "АО"):
                 ter._add_typ(
                     ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua
                      else "АВТОНОМНЫЙ ОКРУГ"))
             elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ"
                   or noun.termin_item.canonic_text
                   == "МУНІЦИПАЛЬНЕ ЗБОРИ"):
                 ter._add_typ(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ"
                               if li[0].kit.base_language.is_ua else
                               "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"))
             elif (noun.termin_item.acronym == "МО"
                   and add_noun is not None):
                 ter._add_typ(add_noun.termin_item.canonic_text)
             else:
                 if (noun.termin_item.canonic_text == "СОЮЗ"
                         and ex_obj is not None
                         and ex_obj.end_char > noun.end_char):
                     return ReferentToken._new734(ter, ex_obj.begin_token,
                                                  ex_obj.end_token,
                                                  ex_obj.morph)
                 ter._add_typ(noun.termin_item.canonic_text)
                 if (noun.termin_item.is_region and ter.is_state):
                     ter._add_typ_reg(li[0].kit.base_language)
         if (ter.is_state and ter.is_region):
             for a in adj_list:
                 if (a.termin_item.is_region):
                     ter._add_typ_reg(li[0].kit.base_language)
                     break
         if (ter.is_state):
             if (full_name is not None):
                 ter._add_name(full_name)
     res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token)
     if (noun is not None and noun.morph.class0_.is_noun):
         res.morph = noun.morph
     else:
         res.morph = MorphCollection()
         ii = 0
         while ii < k:
             for v in li[ii].morph.items:
                 bi = MorphBaseInfo()
                 bi.copy_from(v)
                 if (noun is not None):
                     if (bi.class0_.is_adjective):
                         bi.class0_ = MorphClass.NOUN
                 res.morph.add_item(bi)
             ii += 1
     if (li[0].termin_item is not None
             and li[0].termin_item.is_specific_prefix):
         res.begin_token = li[0].end_token.next0_
     if (add_noun is not None and add_noun.end_char > res.end_char):
         res.end_token = add_noun.end_token
     if ((isinstance(res.begin_token.previous, TextToken))
             and (res.whitespaces_before_count < 2)):
         tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken)
         if (tt.term == "АР"):
             for ty in ter.typs:
                 if ("республика" in ty or "республіка" in ty):
                     res.begin_token = tt
                     break
     return res
예제 #12
0
 def __try_attach(t: 'Token', prev: typing.List['DateItemToken'],
                  detail_regime: bool) -> 'DateItemToken':
     from pullenti.ner.measure.internal.MeasureToken import MeasureToken
     if (t is None):
         return None
     nt = Utils.asObjectOrNull(t, NumberToken)
     begin = t
     end = t
     is_in_brack = False
     if ((BracketHelper.can_be_start_of_sequence(t, False, False)
          and t.next0_ is not None and (isinstance(t.next0_, NumberToken)))
             and BracketHelper.can_be_end_of_sequence(
                 t.next0_.next0_, False, None, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if ((t.is_newline_before and BracketHelper.is_bracket(t, False) and
          (isinstance(t.next0_, NumberToken)))
             and BracketHelper.is_bracket(t.next0_.next0_, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if (nt is not None):
         if (nt.int_value is None):
             return None
         if (nt.typ == NumberSpellingType.WORDS):
             if (nt.morph.class0_.is_noun
                     and not nt.morph.class0_.is_adjective):
                 if (t.next0_ is not None
                         and ((t.next0_.is_value("КВАРТАЛ", None)
                               or t.next0_.is_value("ПОЛУГОДИЕ", None)
                               or t.next0_.is_value("ПІВРІЧЧЯ", None)))):
                     pass
                 else:
                     return None
         if (NumberHelper.try_parse_age(nt) is not None):
             return None
         tt = None
         res = DateItemToken._new628(begin, end,
                                     DateItemToken.DateItemType.NUMBER,
                                     nt.int_value, nt.morph)
         if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken))
              and nt.next0_.int_value is not None)
                 and nt.next0_.length_char == 2 and prev is not None):
             num = 2000 + nt.next0_.int_value
             if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ
                     == DateItemToken.DateItemType.MONTH):
                 ok = False
                 if (nt.whitespaces_after_count == 1):
                     ok = True
                 elif (nt.is_newline_after and nt.is_newline_after):
                     ok = True
                 if (ok):
                     nt = (Utils.asObjectOrNull(nt.next0_, NumberToken))
                     res.end_token = nt
                     res.int_value = num
         if (res.int_value == 20 or res.int_value == 201):
             tt = t.next0_
             if (tt is not None and tt.is_char('_')):
                 while tt is not None:
                     if (not tt.is_char('_')):
                         break
                     tt = tt.next0_
                 tt = DateItemToken.__test_year_rus_word(tt, False)
                 if (tt is not None):
                     res.int_value = 0
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.YEAR
                     return res
         if (res.int_value <= 12 and t.next0_ is not None
                 and (t.whitespaces_after_count < 3)):
             tt = t.next0_
             if (tt.is_value("ЧАС", None)):
                 if (((isinstance(t.previous, TextToken))
                      and not t.previous.chars.is_letter
                      and not t.is_whitespace_before)
                         and (isinstance(t.previous.previous, NumberToken))
                         and not t.previous.is_whitespace_before):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = tt
                     tt = tt.next0_
                     if (tt is not None and tt.is_char('.')):
                         res.end_token = tt
                         tt = tt.next0_
             first_pass3072 = True
             while True:
                 if first_pass3072: first_pass3072 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_value("УТРО", "РАНОК")):
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("ВЕЧЕР", "ВЕЧІР")):
                     res.end_token = tt
                     res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("ДЕНЬ", None)):
                     res.end_token = tt
                     if (res.int_value < 10):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("НОЧЬ", "НІЧ")):
                     res.end_token = tt
                     if (res.int_value == 12):
                         res.int_value = 0
                     elif (res.int_value > 9):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_comma or tt.morph.class0_.is_adverb):
                     continue
                 break
             if (res.typ == DateItemToken.DateItemType.HOUR):
                 return res
         can_be_year_ = True
         if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ
                 == DateItemToken.DateItemType.MONTH):
             pass
         elif ((prev is not None and len(prev) >= 4 and
                prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM)
               and prev[len(prev) - 2].can_by_month):
             pass
         elif (nt.next0_ is not None
               and ((nt.next0_.is_value("ГОД", None)
                     or nt.next0_.is_value("РІК", None)))):
             if (res.int_value < 1000):
                 can_be_year_ = False
         tt = DateItemToken.__test_year_rus_word(nt.next0_, False)
         if (tt is not None and DateItemToken.__is_new_age(tt.next0_)):
             res.typ = DateItemToken.DateItemType.YEAR
             res.end_token = tt
         elif (can_be_year_):
             if (res.can_be_year
                     or res.typ == DateItemToken.DateItemType.NUMBER):
                 tt = DateItemToken.__test_year_rus_word(
                     nt.next0_, res.is_newline_before)
                 if ((tt) is not None):
                     if ((tt.is_value("Г", None)
                          and not tt.is_whitespace_before
                          and t.previous is not None)
                             and ((t.previous.is_value("КОРПУС", None)
                                   or t.previous.is_value("КОРП", None)))):
                         pass
                     elif (
                         (((nt.next0_.is_value("Г", None) and
                            (t.whitespaces_before_count < 3) and t.previous
                            is not None) and t.previous.is_value("Я", None)
                           and t.previous.previous is not None)
                          and t.previous.previous.is_char_of("\\/")
                          and t.previous.previous.previous is not None)
                             and t.previous.previous.previous.is_value(
                                 "А", None)):
                         return None
                     elif (nt.next0_.length_char == 1
                           and not res.can_be_year
                           and ((prev is None or
                                 ((len(prev) > 0 and prev[len(prev) - 1].typ
                                   != DateItemToken.DateItemType.DELIM))))):
                         pass
                     else:
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
             elif (tt is not None and (nt.whitespaces_after_count < 2)
                   and (nt.end_char - nt.begin_char) == 1):
                 res.end_token = tt
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.lang = tt.morph.language
         if (nt.previous is not None):
             if (nt.previous.is_value("В", "У")
                     or nt.previous.is_value("К", None)
                     or nt.previous.is_value("ДО", None)):
                 tt = DateItemToken.__test_year_rus_word(nt.next0_, False)
                 if ((tt) is not None):
                     ok = False
                     if ((res.int_value < 100)
                             and (isinstance(tt, TextToken)) and
                         ((tt.term == "ГОДА" or tt.term == "РОКИ"))):
                         pass
                     else:
                         ok = True
                         if (nt.previous.is_value("ДО", None)
                                 and nt.next0_.is_value("Г", None)):
                             cou = 0
                             ttt = nt.previous.previous
                             while ttt is not None and (cou < 10):
                                 mt = MeasureToken.try_parse(
                                     ttt, None, False, False, False, False)
                                 if (mt is not None
                                         and mt.end_char > nt.end_char):
                                     ok = False
                                     break
                                 ttt = ttt.previous
                                 cou += 1
                     if (ok):
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
                         res.begin_token = nt.previous
             elif (((nt.previous.is_value("IN", None)
                     or nt.previous.is_value("SINCE", None)))
                   and res.can_be_year):
                 uu = (NumbersWithUnitToken.try_parse(
                     nt, None, False, False, False, False)
                       if nt.previous.is_value("IN", None) else None)
                 if (uu is not None and len(uu.units) > 0):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.begin_token = nt.previous
             elif (nt.previous.is_value("NEL", None)
                   or nt.previous.is_value("DEL", None)):
                 if (res.can_be_year):
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.lang = MorphLang.IT
                     res.begin_token = nt.previous
             elif (nt.previous.is_value("IL", None) and res.can_be_day):
                 res.lang = MorphLang.IT
                 res.begin_token = nt.previous
         t1 = res.end_token.next0_
         if (t1 is not None):
             if (t1.is_value("ЧАС", "ГОДИНА") or t1.is_value("HOUR", None)):
                 if ((((prev is not None and len(prev) == 2
                        and prev[0].can_be_hour)
                       and prev[1].typ == DateItemToken.DateItemType.DELIM
                       and not prev[1].is_whitespace_after)
                      and not prev[1].is_whitespace_after
                      and res.int_value >= 0) and (res.int_value < 59)):
                     prev[0].typ = DateItemToken.DateItemType.HOUR
                     res.typ = DateItemToken.DateItemType.MINUTE
                     res.end_token = t1
                 elif (res.int_value < 24):
                     if (t1.next0_ is not None and t1.next0_.is_char('.')):
                         t1 = t1.next0_
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = t1
             elif ((res.int_value < 60)
                   and ((t1.is_value("МИНУТА", "ХВИЛИНА") or t1.is_value(
                       "МИН", None) or t.is_value("MINUTE", None)))):
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.MINUTE
                 res.end_token = t1
             elif (
                 (res.int_value < 60) and
                 ((t1.is_value("СЕКУНДА", None) or t1.is_value("СЕК", None)
                   or t1.is_value("SECOND", None)))):
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.SECOND
                 res.end_token = t1
             elif ((res.int_value < 30)
                   and ((t1.is_value("ВЕК", "ВІК")
                         or t1.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")))):
                 res.typ = DateItemToken.DateItemType.CENTURY
                 res.end_token = t1
             elif (res.int_value <= 4 and t1.is_value("КВАРТАЛ", None)):
                 res.typ = DateItemToken.DateItemType.QUARTAL
                 res.end_token = t1
             elif (res.int_value <= 2
                   and ((t1.is_value("ПОЛУГОДИЕ", None)
                         or t1.is_value("ПІВРІЧЧЯ", None)))):
                 res.typ = DateItemToken.DateItemType.HALFYEAR
                 res.end_token = t1
         return res
     t0 = Utils.asObjectOrNull(t, TextToken)
     if (t0 is None):
         return None
     txt = t0.get_source_text()
     if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х')
             or txt[0] == 'V'):
         lat = NumberHelper.try_parse_roman(t)
         if (lat is not None and lat.end_token.next0_ is not None
                 and lat.int_value is not None):
             val = lat.int_value
             tt = lat.end_token.next0_
             if (tt.is_value("КВАРТАЛ", None) and val > 0 and val <= 4):
                 return DateItemToken._new629(
                     t, tt, DateItemToken.DateItemType.QUARTAL, val)
             if (tt.is_value("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0
                     and val <= 2):
                 return DateItemToken._new629(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.HALFYEAR, val)
             if (tt.is_value("ВЕК", "ВІК")
                     or tt.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")):
                 return DateItemToken._new629(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.CENTURY, val)
             if (tt.is_value("В", None) and tt.next0_ is not None
                     and tt.next0_.is_char('.')):
                 if (prev is not None and len(prev) > 0
                         and prev[len(prev) - 1].typ
                         == DateItemToken.DateItemType.POINTER):
                     return DateItemToken._new629(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
                 if (DateItemToken.__is_new_age(tt.next0_.next0_)):
                     return DateItemToken._new629(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
             if (tt.is_hiphen):
                 lat2 = NumberHelper.try_parse_roman(tt.next0_)
                 if (lat2 is not None and lat2.int_value is not None
                         and lat2.end_token.next0_ is not None):
                     if (lat2.end_token.next0_.is_value("ВЕК", "ВІК")
                             or lat2.end_token.next0_.is_value(
                                 "СТОЛЕТИЕ", "СТОЛІТТЯ")):
                         ddd = DateItemToken.try_attach(
                             tt.next0_, None, False)
                         return DateItemToken._new634(
                             t, lat.end_token,
                             DateItemToken.DateItemType.CENTURY, val,
                             ((ddd.new_age if ddd is not None else 0)))
     if (t is not None and t.is_value("НАПРИКІНЦІ", None)):
         return DateItemToken._new635(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "конец")
     if (t is not None and t.is_value("ДОНЕДАВНА", None)):
         return DateItemToken._new635(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "сегодня")
     if (prev is None):
         if (t is not None):
             if (t.is_value("ОКОЛО", "БІЛЯ")
                     or t.is_value("ПРИМЕРНО", "ПРИБЛИЗНО")
                     or t.is_value("ABOUT", None)):
                 return DateItemToken._new635(
                     t, t, DateItemToken.DateItemType.POINTER, "около")
         if (t.is_value("ОК", None) or t.is_value("OK", None)):
             if (t.next0_ is not None and t.next0_.is_char('.')):
                 return DateItemToken._new635(
                     t, t.next0_, DateItemToken.DateItemType.POINTER,
                     "около")
             return DateItemToken._new635(
                 t, t, DateItemToken.DateItemType.POINTER, "около")
     tok = DateItemToken.M_SEASONS.try_parse(t, TerminParseAttr.NO)
     if ((tok is not None and
          (Utils.valToEnum(tok.termin.tag, DatePointerType))
          == DatePointerType.SUMMER and t.morph.language.is_ru)
             and (isinstance(t, TextToken))):
         str0_ = t.term
         if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
             tok = (None)
     if (tok is not None):
         return DateItemToken._new629(
             t, tok.end_token, DateItemToken.DateItemType.POINTER,
             Utils.valToEnum(tok.termin.tag, DatePointerType))
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is not None):
         tok = DateItemToken.M_SEASONS.try_parse(npt.end_token,
                                                 TerminParseAttr.NO)
         if ((tok is not None and
              (Utils.valToEnum(tok.termin.tag, DatePointerType))
              == DatePointerType.SUMMER and t.morph.language.is_ru)
                 and (isinstance(t, TextToken))):
             str0_ = t.term
             if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
                 tok = (None)
         if (tok is not None):
             return DateItemToken._new629(
                 t, tok.end_token, DateItemToken.DateItemType.POINTER,
                 Utils.valToEnum(tok.termin.tag, DatePointerType))
         typ_ = DateItemToken.DateItemType.NUMBER
         if (npt.noun.is_value("КВАРТАЛ", None)):
             typ_ = DateItemToken.DateItemType.QUARTAL
         elif (npt.end_token.is_value("ПОЛУГОДИЕ", None)
               or npt.end_token.is_value("ПІВРІЧЧЯ", None)):
             typ_ = DateItemToken.DateItemType.HALFYEAR
         elif (npt.end_token.is_value("НАЧАЛО", None)
               or npt.end_token.is_value("ПОЧАТОК", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "начало")
         elif (npt.end_token.is_value("СЕРЕДИНА", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "середина")
         elif (npt.end_token.is_value("КОНЕЦ", None)
               or npt.end_token.is_value("КІНЕЦЬ", None)
               or npt.end_token.is_value("НАПРИКІНЕЦЬ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "конец")
         elif (npt.end_token.is_value("ВРЕМЯ", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.is_value("НАСТОЯЩЕЕ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         elif (npt.end_token.is_value("ЧАС", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.is_value("ДАНИЙ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         if (typ_ != DateItemToken.DateItemType.NUMBER or detail_regime):
             delta = 0
             if (len(npt.adjectives) > 0):
                 if (npt.adjectives[0].is_value("ПОСЛЕДНИЙ", "ОСТАННІЙ")):
                     return DateItemToken._new629(
                         t0, npt.end_token, typ_,
                         (4 if typ_ == DateItemToken.DateItemType.QUARTAL
                          else 2))
                 if (npt.adjectives[0].is_value("ПРЕДЫДУЩИЙ", "ПОПЕРЕДНІЙ")
                         or npt.adjectives[0].is_value("ПРОШЛЫЙ", None)):
                     delta = -1
                 elif (npt.adjectives[0].is_value("СЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].is_value("ПОСЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].is_value("НАСТУПНИЙ", None)):
                     delta = 1
                 else:
                     return None
             cou = 0
             tt = t.previous
             first_pass3073 = True
             while True:
                 if first_pass3073: first_pass3073 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (cou > 200):
                     break
                 dr = Utils.asObjectOrNull(tt.get_referent(),
                                           DateRangeReferent)
                 if (dr is None):
                     continue
                 if (typ_ == DateItemToken.DateItemType.QUARTAL):
                     ii = dr.quarter_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 4):
                         continue
                     return DateItemToken._new629(t0, npt.end_token, typ_,
                                                  ii)
                 if (typ_ == DateItemToken.DateItemType.HALFYEAR):
                     ii = dr.halfyear_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 2):
                         continue
                     return DateItemToken._new629(t0, npt.end_token, typ_,
                                                  ii)
     term = t0.term
     if (not str.isalnum(term[0])):
         if (t0.is_char_of(".\\/:") or t0.is_hiphen):
             return DateItemToken._new635(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         elif (t0.is_char(',')):
             return DateItemToken._new635(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         else:
             return None
     if (term == "O" or term == "О"):
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after
                 and len(t.next0_.value) == 1):
             return DateItemToken._new629(t, t.next0_,
                                          DateItemToken.DateItemType.NUMBER,
                                          t.next0_.int_value)
     if (str.isalpha(term[0])):
         inf = DateItemToken.M_MONTHES.try_parse(t, TerminParseAttr.NO)
         if (inf is not None and inf.termin.tag is None):
             inf = DateItemToken.M_MONTHES.try_parse(
                 inf.end_token.next0_, TerminParseAttr.NO)
         if (inf is not None and (isinstance(inf.termin.tag, int))):
             return DateItemToken._new653(inf.begin_token, inf.end_token,
                                          DateItemToken.DateItemType.MONTH,
                                          inf.termin.tag, inf.termin.lang)
     return None
예제 #13
0
 def __try_noun_name(li: typing.List['CityItemToken'],
                     oi: 'IntOntologyItem',
                     always: bool) -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 2)
             or ((li[0].typ != CityItemToken.ItemType.NOUN
                  and li[0].typ != CityItemToken.ItemType.MISC))):
         return None
     ok = not li[0].doubtful
     if (ok and li[0].typ == CityItemToken.ItemType.MISC):
         ok = False
     typ = (None
            if li[0].typ == CityItemToken.ItemType.MISC else li[0].value)
     typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else
             li[0].alt_value)
     prob_adj = None
     i1 = 1
     org0_ = None
     if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and
          ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and
         (((LanguageHelper.ends_with(typ, "ПОСЕЛОК")
            or LanguageHelper.ends_with(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ")
           or typ == "СЕЛО"))):
         if (li[i1].begin_token == li[i1].end_token):
             ooo = AddressItemToken.try_attach_org(li[i1].begin_token)
             if (ooo is not None and ooo.ref_token is not None):
                 return None
         typ2 = li[i1].value
         if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.is_value("СТ", None)
                 and ((i1 + 1) < len(li))):
             m = li[i1 + 1].morph
             if (m.number == MorphNumber.PLURAL):
                 prob_adj = "СТАРЫЕ"
             elif (m.gender == MorphGender.FEMINIE):
                 prob_adj = "СТАРАЯ"
             elif (m.gender == MorphGender.MASCULINE):
                 prob_adj = "СТАРЫЙ"
             else:
                 prob_adj = "СТАРОЕ"
         i1 += 1
     name = Utils.ifNotNull(li[i1].value,
                            ((None if li[i1].onto_item is None else
                              li[i1].onto_item.canonic_text)))
     alt_name = li[i1].alt_value
     if (name is None):
         return None
     mc = li[0].morph
     if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY
             and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО"
                   or li[0].typ == CityItemToken.ItemType.MISC))):
         if (typ is None and ((i1 + 1) < len(li))
                 and li[i1 + 1].typ == CityItemToken.ItemType.NOUN):
             return None
         oi.value = li[i1].onto_item
         if (oi.value is not None):
             name = oi.value.canonic_text
         if (len(name) > 2 or oi.value.misc_attr is not None):
             if (not li[1].doubtful
                     or ((oi.value is not None
                          and oi.value.misc_attr is not None))):
                 ok = True
             elif (not ok and not li[1].is_newline_before):
                 if (li[0].geo_object_before or li[1].geo_object_after):
                     ok = True
                 elif (StreetDefineHelper.check_street_after(
                         li[1].end_token.next0_)):
                     ok = True
                 elif (li[1].end_token.next0_ is not None and
                       (isinstance(li[1].end_token.next0_.get_referent(),
                                   DateReferent))):
                     ok = True
                 elif ((li[1].whitespaces_before_count < 2)
                       and li[1].onto_item is not None):
                     if (li[1].is_newline_after):
                         ok = True
                     else:
                         ok = True
             if (li[1].doubtful and li[1].end_token.next0_ is not None and
                     li[1].end_token.chars == li[1].end_token.next0_.chars):
                 ok = False
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.is_value("В", None)):
                 ok = True
         if (not ok):
             ok = CityAttachHelper.check_year_after(li[1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.check_city_after(li[1].end_token.next0_)
     elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME
            or li[i1].typ == CityItemToken.ItemType.CITY)):
         if (((li[0].value == "АДМИНИСТРАЦИЯ"
               or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1):
             return None
         if (li[i1].is_newline_before):
             if (len(li) != 2):
                 return None
         if (not li[0].doubtful):
             ok = True
             if (len(name) < 2):
                 ok = False
             elif ((len(name) < 3)
                   and li[0].morph.number != MorphNumber.SINGULAR):
                 ok = False
             if (li[i1].doubtful and not li[i1].geo_object_after
                     and not li[0].geo_object_before):
                 if (li[i1].morph.case_.is_genitive):
                     if (li[i1].end_token.next0_ is None
                             or MiscLocationHelper.check_geo_object_after(
                                 li[i1].end_token.next0_, False)
                             or AddressItemToken.check_house_after(
                                 li[i1].end_token.next0_, False, True)):
                         pass
                     elif (li[0].begin_token.previous is None
                           or MiscLocationHelper.check_geo_object_before(
                               li[0].begin_token)):
                         pass
                     else:
                         ok = False
                 if (ok):
                     rt0 = li[i1].kit.process_referent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt0 is not None):
                         rt1 = li[i1].kit.process_referent(
                             "PERSON", li[i1].begin_token)
                         if (rt1 is not None):
                             ok = False
             npt = NounPhraseHelper.try_parse(li[i1].begin_token,
                                              NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None):
                 if (npt.end_token.end_char > li[i1].end_char
                         and len(npt.adjectives) > 0 and
                         not npt.adjectives[0].end_token.next0_.is_comma):
                     ok = False
                 elif (TerrItemToken._m_unknown_regions.try_parse(
                         npt.end_token, TerminParseAttr.FULLWORDSONLY)
                       is not None):
                     ok1 = False
                     if (li[0].begin_token.previous is not None):
                         ttt = li[0].begin_token.previous
                         if (ttt.is_comma and ttt.previous is not None):
                             ttt = ttt.previous
                         geo_ = Utils.asObjectOrNull(
                             ttt.get_referent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (npt.end_token.next0_ is not None):
                         ttt = npt.end_token.next0_
                         if (ttt.is_comma and ttt.next0_ is not None):
                             ttt = ttt.next0_
                         geo_ = Utils.asObjectOrNull(
                             ttt.get_referent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (not ok1):
                         return None
             if (li[0].value == "ПОРТ"):
                 if (li[i1].chars.is_all_upper
                         or li[i1].chars.is_latin_letter):
                     return None
         elif (li[0].geo_object_before):
             ok = True
         elif (li[i1].geo_object_after and not li[i1].is_newline_after):
             ok = True
         else:
             ok = CityAttachHelper.check_year_after(li[i1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.check_street_after(
                 li[i1].end_token.next0_)
         if (not ok and li[0].begin_token.previous is not None
                 and li[0].begin_token.previous.is_value("В", None)):
             ok = True
     else:
         return None
     if (not ok and not always):
         if (MiscLocationHelper.check_near_before(
                 li[0].begin_token.previous) is None):
             return None
     if (len(li) > (i1 + 1)):
         del li[i1 + 1:i1 + 1 + len(li) - i1 - 1]
     city = GeoReferent()
     if (oi.value is not None and oi.value.referent is not None):
         city = (Utils.asObjectOrNull(oi.value.referent.clone(),
                                      GeoReferent))
         city.occurrence.clear()
     if (not li[0].morph.case_.is_undefined
             and li[0].morph.gender != MorphGender.UNDEFINED):
         if (li[i1].end_token.morph.class0_.is_adjective
                 and li[i1].begin_token == li[i1].end_token):
             nam = ProperNameHelper.get_name_ex(
                 li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE,
                 li[0].morph.case_, li[0].morph.gender, False, False)
             if (nam is not None and nam != name):
                 name = nam
     if (li[0].morph.case_.is_nominative):
         if (alt_name is not None):
             city._add_name(alt_name)
         alt_name = (None)
     city._add_name(name)
     if (prob_adj is not None):
         city._add_name(prob_adj + " " + name)
     if (alt_name is not None):
         city._add_name(alt_name)
         if (prob_adj is not None):
             city._add_name(prob_adj + " " + alt_name)
     if (typ is not None):
         city._add_typ(typ)
     elif (not city.is_city):
         city._add_typ_city(li[0].kit.base_language)
     if (typ2 is not None):
         city._add_typ(typ2.lower())
     if (li[0].higher_geo is not None
             and GeoOwnerHelper.can_be_higher(li[0].higher_geo, city)):
         city.higher = li[0].higher_geo
     if (li[0].typ == CityItemToken.ItemType.MISC):
         del li[0]
     res = ReferentToken._new734(city, li[0].begin_token,
                                 li[len(li) - 1].end_token, mc)
     if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen
             and (isinstance(res.end_token.next0_.next0_, NumberToken))):
         num = Utils.asObjectOrNull(res.end_token.next0_.next0_,
                                    NumberToken)
         if ((num.typ == NumberSpellingType.DIGIT
              and not num.morph.class0_.is_adjective
              and num.int_value is not None) and (num.int_value < 50)):
             for s in city.slots:
                 if (s.type_name == GeoReferent.ATTR_NAME):
                     city.upload_slot(s,
                                      "{0}-{1}".format(s.value, num.value))
             res.end_token = num
     if (li[0].begin_token == li[0].end_token
             and li[0].begin_token.is_value("ГОРОДОК", None)):
         if (AddressItemToken.check_house_after(res.end_token.next0_, True,
                                                False)):
             return None
     return res
예제 #14
0
 def __try_attach(self, t : 'Token', key_word : bool) -> 'ReferentToken':
     if (t is None): 
         return None
     t0 = t
     t1 = t
     uris_keys = None
     uris = None
     org0_ = None
     cor_org = None
     org_is_bank = False
     empty = 0
     last_uri = None
     first_pass3017 = True
     while True:
         if first_pass3017: first_pass3017 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char and t != t0): 
             break
         if (t.is_comma or t.morph.class0_.is_preposition or t.is_char_of("/\\")): 
             continue
         bank_keyword = False
         if (t.is_value("ПОЛНЫЙ", None) and t.next0_ is not None and ((t.next0_.is_value("НАИМЕНОВАНИЕ", None) or t.next0_.is_value("НАЗВАНИЕ", None)))): 
             t = t.next0_.next0_
             if (t is None): 
                 break
         if (t.is_value("БАНК", None)): 
             if ((isinstance(t, ReferentToken)) and t.get_referent().type_name == "ORGANIZATION"): 
                 bank_keyword = True
             tt = t.next0_
             npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
             if (npt is not None): 
                 tt = npt.end_token.next0_
             if (tt is not None and tt.is_char(':')): 
                 tt = tt.next0_
             if (tt is not None): 
                 if (not bank_keyword): 
                     t = tt
                     bank_keyword = True
                 elif (tt.get_referent() is not None and tt.get_referent().type_name == "ORGANIZATION"): 
                     t = tt
         r = t.get_referent()
         if (r is not None and r.type_name == "ORGANIZATION"): 
             is_bank = False
             kk = 0
             rr = r
             while rr is not None and (kk < 4): 
                 is_bank = Utils.compareStrings(Utils.ifNotNull(rr.get_string_value("KIND"), ""), "Bank", True) == 0
                 if (is_bank): 
                     break
                 rr = rr.parent_referent; kk += 1
             if (not is_bank and bank_keyword): 
                 is_bank = True
             if (not is_bank and uris is not None and "ИНН" in uris_keys): 
                 return None
             if ((last_uri is not None and last_uri.scheme == "К/С" and t.previous is not None) and t.previous.is_value("В", None)): 
                 cor_org = r
                 t1 = t
             elif (org0_ is None or ((not org_is_bank and is_bank))): 
                 org0_ = r
                 t1 = t
                 org_is_bank = is_bank
                 if (is_bank): 
                     continue
             if (uris is None and not key_word): 
                 return None
             continue
         if (isinstance(r, UriReferent)): 
             u = Utils.asObjectOrNull(r, UriReferent)
             if (uris is None): 
                 if (not BankAnalyzer.__is_bank_req(u.scheme)): 
                     return None
                 if (u.scheme == "ИНН" and t.is_newline_after): 
                     return None
                 uris = list()
                 uris_keys = list()
             else: 
                 if (not BankAnalyzer.__is_bank_req(u.scheme)): 
                     break
                 if (u.scheme in uris_keys): 
                     break
                 if (u.scheme == "ИНН"): 
                     if (empty > 0): 
                         break
             uris_keys.append(u.scheme)
             uris.append(u)
             last_uri = u
             t1 = t
             empty = 0
             continue
         elif (uris is None and not key_word and not org_is_bank): 
             return None
         if (r is not None and ((r.type_name == "GEO" or r.type_name == "ADDRESS"))): 
             empty += 1
             continue
         if (isinstance(t, TextToken)): 
             if (t.is_value("ПОЛНЫЙ", None) or t.is_value("НАИМЕНОВАНИЕ", None) or t.is_value("НАЗВАНИЕ", None)): 
                 pass
             elif (t.chars.is_letter): 
                 tok = BankAnalyzer.__m_ontology.try_parse(t, TerminParseAttr.NO)
                 if (tok is not None): 
                     t = tok.end_token
                     empty = 0
                 else: 
                     empty += 1
                     if (t.is_newline_before): 
                         nnn = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
                         if (nnn is not None and nnn.end_token.next0_ is not None and nnn.end_token.next0_.is_char(':')): 
                             break
                 if (uris is None): 
                     break
         if (empty > 2): 
             break
         if (empty > 0 and t.is_char(':') and t.is_newline_after): 
             break
         if (((isinstance(t, NumberToken)) and t.is_newline_before and t.next0_ is not None) and not t.next0_.chars.is_letter): 
             break
     if (uris is None): 
         return None
     if (not "Р/С" in uris_keys and not "Л/С" in uris_keys): 
         return None
     ok = False
     if ((len(uris) < 2) and org0_ is None): 
         return None
     bdr = BankDataReferent()
     for u in uris: 
         bdr.add_slot(BankDataReferent.ATTR_ITEM, u, False, 0)
     if (org0_ is not None): 
         bdr.add_slot(BankDataReferent.ATTR_BANK, org0_, False, 0)
     if (cor_org is not None): 
         bdr.add_slot(BankDataReferent.ATTR_CORBANK, cor_org, False, 0)
     org0 = (None if t0.previous is None else t0.previous.get_referent())
     if (org0 is not None and org0.type_name == "ORGANIZATION"): 
         for s in org0.slots: 
             if (isinstance(s.value, UriReferent)): 
                 u = Utils.asObjectOrNull(s.value, UriReferent)
                 if (BankAnalyzer.__is_bank_req(u.scheme)): 
                     if (not u.scheme in uris_keys): 
                         bdr.add_slot(BankDataReferent.ATTR_ITEM, u, False, 0)
     return ReferentToken(bdr, t0, t1)
예제 #15
0
 def process(self, kit : 'AnalysisKit') -> None:
     # Основная функция выделения телефонов
     ad = kit.get_analyzer_data(self)
     has_denoms = False
     for a in kit.processor.analyzers: 
         if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): 
             has_denoms = True
     if (not has_denoms): 
         a = DenominationAnalyzer()
         a.process(kit)
     li = list()
     tmp = io.StringIO()
     tmp2 = list()
     max0_ = 0
     t = kit.first_token
     while t is not None: 
         max0_ += 1
         t = t.next0_
     cur = 0
     t = kit.first_token
     first_pass3292 = True
     while True:
         if first_pass3292: first_pass3292 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         r = t.get_referent()
         if (r is not None): 
             t = self.__add_referents(ad, t, cur, max0_)
             continue
         if (not (isinstance(t, TextToken))): 
             continue
         if (not t.chars.is_letter or (t.length_char < 3)): 
             continue
         term = t.term
         if (term == "ЕСТЬ"): 
             if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): 
                 pass
             else: 
                 continue
         npt = None
         npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
         if (npt is None): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_verb and not mc.is_preposition): 
                 if (t.is_verb_be): 
                     continue
                 if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): 
                     continue
                 kref = KeywordReferent._new1595(KeywordType.PREDICATE)
                 norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                 if (norm is None): 
                     norm = t.lemma
                 if (norm.endswith("ЬСЯ")): 
                     norm = norm[0:0+len(norm) - 2]
                 kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
                 drv = DerivateService.find_derivates(norm, True, t.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
                 kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
                 KeywordAnalyzer.__set_rank(kref, cur, max0_)
                 rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph)
                 kit.embed_token(rt1)
                 t = (rt1)
                 continue
             continue
         if (npt.internal_noun is not None): 
             continue
         if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): 
             if (npt.preposition is not None): 
                 t = npt.end_token
                 continue
         if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): 
             t = npt.end_token
             continue
         if (npt.begin_token == npt.end_token): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_preposition): 
                 continue
             elif (mc.is_adverb): 
                 if (t.is_value("ПОТОМ", None)): 
                     continue
         else: 
             pass
         li.clear()
         t0 = t
         tt = t
         first_pass3293 = True
         while True:
             if first_pass3293: first_pass3293 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= npt.end_char)): break
             if (not (isinstance(tt, TextToken))): 
                 continue
             if (tt.is_value("NATURAL", None)): 
                 pass
             if ((tt.length_char < 3) or not tt.chars.is_letter): 
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): 
                 if (tt.is_value("ОТНОШЕНИЕ", None)): 
                     pass
                 else: 
                     continue
             if (mc.is_misc): 
                 if (MiscHelper.is_eng_article(tt)): 
                     continue
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             norm = tt.lemma
             kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
             if (norm != "ЕСТЬ"): 
                 drv = DerivateService.find_derivates(norm, True, tt.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, tt, tt, tt.morph)
             kit.embed_token(rt1)
             if (tt == t and len(li) == 0): 
                 t0 = (rt1)
             t = (rt1)
             li.append(kref)
         if (len(li) > 1): 
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.clear()
             has_norm = False
             for kw in li: 
                 s = kw.get_string_value(KeywordReferent.ATTR_VALUE)
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
                 n = kw.get_string_value(KeywordReferent.ATTR_NORMAL)
                 if (n is not None): 
                     has_norm = True
                     tmp2.append(n)
                 else: 
                     tmp2.append(s)
                 kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0)
             val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.sort()
             for s in tmp2: 
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
             norm = Utils.toStringStringIO(tmp)
             if (norm != val): 
                 kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, t0, t, npt.morph)
             kit.embed_token(rt1)
             t = (rt1)
     cur = 0
     t = kit.first_token
     first_pass3294 = True
     while True:
         if first_pass3294: first_pass3294 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent)
         if (kw is None or kw.typ != KeywordType.OBJECT): 
             continue
         if (t.next0_ is None or kw.child_words > 2): 
             continue
         t1 = t.next0_
         if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): 
             t1 = t1.next0_
             if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): 
                 t1 = t1.next0_
         elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): 
             continue
         kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent)
         if (kw2 is None): 
             continue
         if (kw == kw2): 
             continue
         if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): 
             continue
         kw_un = KeywordReferent()
         kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO))
         kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent))
         KeywordAnalyzer.__set_rank(kw_un, cur, max0_)
         rt1 = ReferentToken._new734(kw_un, t, t1, t.morph)
         kit.embed_token(rt1)
         t = (rt1)
     if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): 
         all0_ = list(ad.referents)
         all0_.sort(key=operator.attrgetter('rank'), reverse=True)
         ad.referents = all0_
     if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): 
         ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES)
         if (ano is not None): 
             ad.register_referent(ano)
예제 #16
0
 def try_parse(t: 'Token', items: typing.List['NounPhraseItem'],
               attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token
             and (isinstance(rt.begin_token, TextToken))):
         res = NounPhraseItem.try_parse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             res.can_be_noun = True
             return res
     if (rt is not None):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.get_referent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = t.term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and wf.is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.get_morph_class_in_dictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.ends_with_ex(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.contains_attr("к.ф.", None)):
                             if (t.get_morph_class_in_dictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.ends_with_ex(
                             s, "ИН", "ЕН", "ЫН", None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.ends_with(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.contains_attr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.get_indexer_item(0).contains_attr(
                     "в.ср.ст.", None)):
                 return None
         mc1 = t.get_morph_class_in_dictionary()
         if (mc1 == MorphClass.VERB and t.morph.case_.is_undefined):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.contains_attr("дейст.з.", None)):
                         if (LanguageHelper.ends_with(t.term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if (((isinstance(t0, TextToken)) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun and
                         not (isinstance(t0.next0_.next0_, NumberToken))):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.ends_with(t0.term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new404(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_verb and not v.case_.is_undefined):
                 it.can_be_adj = True
                 it.adj_morph.append(NounPhraseItemTextVar(v, t))
                 continue
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun
                          and not v.contains_attr("неизм.", None))) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.try_accord_variant(
                         items, (0 if items is None else len(items)), v,
                         False)):
                     is_doub = False
                     if (v.contains_attr("к.ф.", None)):
                         continue
                     if (v.contains_attr("собир.", None)
                             and not (isinstance(t, NumberToken))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.contains_attr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = t.term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.ends_with(s, "ОВ") and
                               t.get_morph_class_in_dictionary().is_noun):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
                         if (wf is not None and wf.class0_.is_pronoun):
                             it.can_be_noun = True
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             if (len(items) == 1
                                     and not v.case_.is_nominative):
                                 can_be_noun_ = True
                             else:
                                 return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif (
                 (v.class0_.is_pronoun and
                  ((items is None or len(items) == 0 or
                    ((len(items) == 1
                      and items[0].can_be_adj_for_personal_pronoun))))
                  and wf is not None) and
                 (((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                      or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                     or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                    or wf.normal_case == "КТО") or wf.normal_full
                   == "КОТОРЫЙ" or wf.normal_case == "КОТОРЫЙ"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.is_value("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"
                   and (((attrs) & (NounPhraseParseAttr.PARSEPRONOUNS)))
                   == (NounPhraseParseAttr.NO)):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 added = False
                 if (items is not None and len(items) > 1 and
                     (((attrs) & (NounPhraseParseAttr.MULTINOUNS))) !=
                     (NounPhraseParseAttr.NO)):
                     ok1 = True
                     ii = 1
                     while ii < len(items):
                         if (not items[ii].conj_before):
                             ok1 = False
                             break
                         ii += 1
                     if (ok1):
                         if (NounPhraseItem.try_accord_variant(
                                 items,
                             (0 if items is None else len(items)), v,
                                 True)):
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
                             it.can_be_noun = True
                             it.multi_nouns = True
                             added = True
                 if (not added):
                     if (NounPhraseItem.try_accord_variant(
                             items, (0 if items is None else len(items)), v,
                             False)):
                         it.noun_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_noun = True
                         if (v.class0_.is_personal_pronoun
                                 and t.morph.contains_attr("неизм.", None)
                                 and not it.can_be_adj):
                             itt = NounPhraseItemTextVar(v, t)
                             itt.case_ = MorphCase.ALL_CASES
                             itt.number = MorphNumber.UNDEFINED
                             if (itt.normal_value is None):
                                 pass
                             it.adj_morph.append(itt)
                             it.can_be_adj = True
                     elif ((len(items) > 0 and len(items[0].adj_morph) > 0
                            and items[0].adj_morph[0].number
                            == MorphNumber.PLURAL)
                           and not ((items[0].adj_morph[0].case_)
                                    & v.case_).is_undefined
                           and not items[0].adj_morph[0].class0_.is_verb):
                         if (t.next0_ is not None and t.next0_.is_comma_and
                                 and
                             (isinstance(t.next0_.next0_, TextToken))):
                             npt2 = NounPhraseHelper.try_parse(
                                 t.next0_.next0_, attrs, 0, None)
                             if (npt2 is not None
                                     and npt2.preposition is None
                                     and not ((npt2.morph.case_) & v.case_
                                              & items[0].adj_morph[0].case_
                                              ).is_undefined):
                                 it.noun_morph.append(
                                     NounPhraseItemTextVar(v, t))
                                 it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken),
                                  False)
             for v in it.noun_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.get_normal_case_text(
                             None, MorphNumber.UNDEFINED,
                             MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.try_parse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.try_parse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.try_parse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None):
                     mc = LanguageHelper.get_case_after_preposition(t.lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = tt2.term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.is_value("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None
예제 #17
0
 def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken':
     if (not (isinstance(t, TextToken))): 
         return None
     if (t.is_table_control_char): 
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1625 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625)
     minmax = wrapminmax1625.value
     if (tt is not None): 
         t = tt.next0_
     npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None)
     if (npt is None): 
         whd = NumbersWithUnitToken._try_parsewhl(t)
         if (whd is not None): 
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.is_value("КПД", None)): 
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): 
             npt = NounPhraseToken(t0, t0)
         elif (t0.is_value("T", None) and t0.chars.is_all_lower): 
             npt = NounPhraseToken(t0, t0)
             t = t0
             if (t.next0_ is not None and t.next0_.is_char('=')): 
                 npt.end_token = t.next0_
         elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): 
             if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                 return None
             npt = NounPhraseToken(t0, t0)
             t = t0.next0_
             while t is not None: 
                 if (t.whitespaces_before_count > 2): 
                     break
                 elif (not (isinstance(t, TextToken))): 
                     break
                 elif (not t.chars.is_letter): 
                     br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
                     if (br is not None): 
                         t = br.end_token
                         npt.end_token = t
                     else: 
                         break
                 elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                     break
                 else: 
                     npt.end_token = t
                 t = t.next0_
         else: 
             return None
     elif (NumberHelper.try_parse_real_number(t, True, False) is not None): 
         return None
     else: 
         dtok = DateItemToken.try_attach(t, None, False)
         if (dtok is not None): 
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3305 = True
     while True:
         if first_pass3305: first_pass3305 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before): 
             break
         if (tt.is_table_control_char): 
             break
         wrapminmax1617 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617)
         minmax = wrapminmax1617.value
         if (tt2 is not None): 
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): 
             t = tt
             t1 = t
             if (tt.previous.is_value("НЕ", None)): 
                 not0_ = True
             continue
         www = NumbersWithUnitToken._try_parsewhl(tt)
         if (www is not None): 
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (tt.is_value("ПРИ", None)): 
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
             n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False)
             if (n1 is not None and len(n1.units) > 0): 
                 mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1)
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): 
             tt = tt.next0_
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0): 
             if (tt.is_char(':')): 
                 break
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None and mt1.reliable): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): 
             npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None)
             if (npt3 is not None): 
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
         if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt.next0_
             tt = t
             t1 = tt
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt
             t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): 
             tt = tt.next0_.next0_
             t = tt
             t1 = t
             npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
             if (npt1 is not None and npt1.end_char > tt.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((isinstance(tt, NumberToken)) and tt.previous is not None): 
             if (tt.previous.is_value("USB", None)): 
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 ttt = tt.next0_
                 while ttt is not None: 
                     if (ttt.is_whitespace_before): 
                         break
                     if (ttt.is_char_of(",:")): 
                         break
                     tt = ttt
                     t = tt
                     t1 = t
                     if (len(internals_) == 0): 
                         name_.end_token = t1
                     ttt = ttt.next0_
                 continue
         mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False)
         if (mt0 is not None): 
             npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
             if (npt1 is not None and npt1.end_char > mt0.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
             break
         if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): 
             www = NumbersWithUnitToken._try_parsewhl(tt.next0_)
             if (www is not None): 
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma): 
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.is_char(')')): 
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
             if (uu is not None): 
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): 
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.is_char('(')): 
                     uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): 
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                     www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
                     if (www is not None): 
                         whd = www
                         tt = www.end_token
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): 
                     break
                 if (t1.next0_ is not None): 
                     if (t1.next0_.is_table_control_char or t1.is_newline_after): 
                         break
                 units = (None)
         if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): 
             br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
             if (br is not None): 
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("НЕ", None) and tt.next0_ is not None): 
             mc = tt.next0_.get_morph_class_in_dictionary()
             if (mc.is_adverb or mc.is_misc): 
                 break
             continue
         if (tt.is_value("ЯМЗ", None)): 
             pass
         npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None)
         if (npt2 is None): 
             if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): 
                 to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO)
                 if (to is not None): 
                     if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): 
                         pass
                     else: 
                         break
                 t1 = tt
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): 
                 uu = UnitToken.try_parse_list(tt, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 1 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = tt
                 continue
             if (tt.is_comma): 
                 continue
             if (tt.is_char('.')): 
                 if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): 
                     continue
                 uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 2 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0): 
             pass
         elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): 
             pass
         elif (t.chars.is_letter): 
             name_.end_token = t1
     t11 = t1
     t1 = t1.next0_
     first_pass3306 = True
     while True:
         if first_pass3306: first_pass3306 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char): 
             pass
         elif (t1.is_char_of(":,_")): 
             if (is_resctriction): 
                 return None
             www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
             if (www is not None): 
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
             uu = UnitToken.try_parse_list(t1.next0_, add_units, False)
             if (uu is not None): 
                 if (uu[0].length_char > 1 or len(uu) > 1): 
                     units = uu
                     t = uu[len(uu) - 1].end_token
                     t1 = t
                     continue
             if (t1.is_char(':')): 
                 li = list()
                 ttt = t1.next0_
                 first_pass3307 = True
                 while True:
                     if first_pass3307: first_pass3307 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_hiphen or ttt.is_table_control_char): 
                         continue
                     if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): 
                         continue
                     mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True)
                     if (mt1 is None): 
                         break
                     li.append(mt1)
                     ttt = mt1.end_token
                     if (ttt.next0_ is not None and ttt.next0_.is_char(';')): 
                         ttt = ttt.next0_
                     if (ttt.is_char(';')): 
                         pass
                     elif (ttt.is_newline_after and mt1.is_newline_before): 
                         pass
                     else: 
                         break
                 if (len(li) > 1): 
                     res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True)
                     if (internals_ is not None and len(internals_) > 0): 
                         res0.internal_ex = internals_[0]
                     nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                     li[0].begin_token = t0
                     for v in li: 
                         v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip()
                         if (v.nums is not None and len(v.nums.units) == 0 and units is not None): 
                             v.nums.units = units
                     return res0
         elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): 
             pass
         elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): 
             pass
         else: 
             break
     if (t1 is None): 
         return None
     mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction)
     if (mts is None): 
         if (units is not None and len(units) > 0): 
             if (t1 is None or t1.previous.is_char(':')): 
                 mts = list()
                 if (t1 is None): 
                     t1 = t11
                     while t1 is not None and t1.next0_ is not None: 
                         pass
                         t1 = t1.next0_
                 else: 
                     t1 = t1.previous
                 mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan))
         if (mts is None): 
             return None
     mt = mts[0]
     if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): 
         return None
     if (not is_subval and name_.begin_token.morph.class0_.is_preposition): 
         name_.begin_token = name_.begin_token.next0_
     if (mt.whl is not None): 
         whd = mt.whl
     for kk in range(10):
         if (whd is not None and whd.end_token == name_.end_token): 
             name_.end_token = whd.begin_token.previous
             continue
         if (units is not None): 
             if (units[len(units) - 1].end_token == name_.end_token): 
                 name_.end_token = units[0].begin_token.previous
                 continue
         break
     if (len(mts) > 1 and len(internals_) == 0): 
         if (len(mt.units) == 0): 
             if (units is not None): 
                 for m in mts: 
                     m.units = units
         res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True)
         res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts): 
             ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k])
             if (whd is not None): 
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)): 
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.is_char('±')): 
             nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False)
             if (nn is not None and nn.plus_minus_percent): 
                 res1.end_token = nn.end_token
                 res1.nums = nn
                 if (len(nn.units) > 0 and units is None and len(mt.units) == 0): 
                     for m in mts: 
                         m.units = nn.units
         return res1
     if (not mt.is_whitespace_before): 
         if (mt.begin_token.previous is None): 
             return None
         if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): 
             pass
         elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): 
             pass
         else: 
             return None
     if (len(mt.units) == 0 and units is not None): 
         mt.units = units
         if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): 
             i = 1
             while i < len(units): 
                 if (units[i].pow0_ == -1): 
                     j = i
                     while j < len(units): 
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (- units[j].pow0_)
                         j += 1
                     del mt.units[i:i+len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None): 
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None): 
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0): 
         units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True)
         if (units is None): 
             if (can_units_absent): 
                 pass
             else: 
                 return None
         else: 
             mt.units = units
     res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): 
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO))
     res.nums = mt
     for u in res.nums.units: 
         if (u.keyword is not None): 
             if (u.keyword.begin_char >= res.begin_char): 
                 res.reliable = True
     res.__parse_internals(add_units)
     if (len(res.internals) > 0 or not can_be_set): 
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and): 
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False)
     if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): 
         res.is_set = True
         res.nums = (None)
         res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt))
         res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0]))
         res.end_token = mts1[0].end_token
     return res
예제 #18
0
 def __get_name_without_brackets(begin: 'Token',
                                 end: 'Token',
                                 normalize_first_noun_group: bool = False,
                                 normal_first_group_single: bool = False,
                                 ignore_geo_referent: bool = False) -> str:
     res = None
     if (BracketHelper.can_be_start_of_sequence(begin, False, False)
             and BracketHelper.can_be_end_of_sequence(
                 end, False, begin, False)):
         begin = begin.next0_
         end = end.previous
     if (normalize_first_noun_group
             and not begin.morph.class0_.is_preposition):
         npt = NounPhraseHelper.try_parse(
             begin, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
         if (npt is not None):
             if (npt.noun.get_morph_class_in_dictionary().is_undefined
                     and len(npt.adjectives) == 0):
                 npt = (None)
         if (npt is not None and npt.end_token.end_char > end.end_char):
             npt = (None)
         if (npt is not None):
             res = npt.get_normal_case_text(
                 None, (MorphNumber.SINGULAR if normal_first_group_single
                        else MorphNumber.UNDEFINED), MorphGender.UNDEFINED,
                 False)
             te = npt.end_token.next0_
             if (((te is not None and te.next0_ is not None and te.is_comma)
                  and (isinstance(te.next0_, TextToken))
                  and te.next0_.end_char <= end.end_char)
                     and te.next0_.morph.class0_.is_verb
                     and te.next0_.morph.class0_.is_adjective):
                 for it in te.next0_.morph.items:
                     if (it.gender == npt.morph.gender
                             or ((it.gender) & (npt.morph.gender)) !=
                         (MorphGender.UNDEFINED)):
                         if (not (
                             (it.case_) & npt.morph.case_).is_undefined):
                             if (it.number == npt.morph.number or
                                 ((it.number) & (npt.morph.number)) !=
                                 (MorphNumber.UNDEFINED)):
                                 var = te.next0_.term
                                 if (isinstance(it, MorphWordForm)):
                                     var = it.normal_case
                                 bi = MorphBaseInfo._new492(
                                     MorphClass.ADJECTIVE, npt.morph.gender,
                                     npt.morph.number, npt.morph.language)
                                 var = MorphologyService.get_wordform(
                                     var, bi)
                                 if (var is not None):
                                     res = "{0}, {1}".format(res, var)
                                     te = te.next0_.next0_
                                 break
             if (te is not None and te.end_char <= end.end_char):
                 s = ProperNameHelper.get_name_ex(te, end,
                                                  MorphClass.UNDEFINED,
                                                  MorphCase.UNDEFINED,
                                                  MorphGender.UNDEFINED,
                                                  True, ignore_geo_referent)
                 if (not Utils.isNullOrEmpty(s)):
                     if (not str.isalnum(s[0])):
                         res = "{0}{1}".format(res, s)
                     else:
                         res = "{0} {1}".format(res, s)
         elif ((isinstance(begin, TextToken))
               and begin.chars.is_cyrillic_letter):
             mm = begin.get_morph_class_in_dictionary()
             if (not mm.is_undefined):
                 res = begin.get_normal_case_text(mm, MorphNumber.UNDEFINED,
                                                  MorphGender.UNDEFINED,
                                                  False)
                 if (begin.end_char < end.end_char):
                     res = "{0} {1}".format(
                         res,
                         ProperNameHelper.get_name_ex(
                             begin.next0_, end, MorphClass.UNDEFINED,
                             MorphCase.UNDEFINED, MorphGender.UNDEFINED,
                             True, False))
     if (res is None):
         res = ProperNameHelper.get_name_ex(begin, end,
                                            MorphClass.UNDEFINED,
                                            MorphCase.UNDEFINED,
                                            MorphGender.UNDEFINED, True,
                                            ignore_geo_referent)
     if (not Utils.isNullOrEmpty(res)):
         k = 0
         i = len(res) - 1
         while i >= 0:
             if (res[i] == '*' or Utils.isWhitespace(res[i])):
                 pass
             else:
                 break
             i -= 1
             k += 1
         if (k > 0):
             if (k == len(res)):
                 return None
             res = res[0:0 + len(res) - k]
     return res
예제 #19
0
 def create(t: 'Token', names: 'TerminCollection') -> 'BlockLine':
     if (t is None):
         return None
     res = BlockLine(t, t)
     tt = t
     while tt is not None:
         if (tt != t and tt.is_newline_before):
             break
         else:
             res.end_token = tt
         tt = tt.next0_
     nums = 0
     while t is not None and t.next0_ is not None and t.end_char <= res.end_char:
         if (isinstance(t, NumberToken)):
             pass
         else:
             rom = NumberHelper.try_parse_roman(t)
             if (rom is not None and rom.end_token.next0_ is not None):
                 t = rom.end_token
             else:
                 break
         if (t.next0_.is_char('.')):
             pass
         elif ((isinstance(t.next0_, TextToken))
               and not t.next0_.chars.is_all_lower):
             pass
         else:
             break
         res.number_end = t
         t = t.next0_
         if (t.is_char('.') and t.next0_ is not None):
             res.number_end = t
             t = t.next0_
         if (t.is_newline_before):
             return res
         nums += 1
     tok = BlockLine.__m_ontology.try_parse(t, TerminParseAttr.NO)
     if (tok is None):
         npt1 = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0,
                                           None)
         if (npt1 is not None and npt1.end_token != npt1.begin_token):
             tok = BlockLine.__m_ontology.try_parse(npt1.noun.begin_token,
                                                    TerminParseAttr.NO)
     if (tok is not None):
         if (t.previous is not None and t.previous.is_char(':')):
             tok = (None)
     if (tok is not None):
         typ_ = Utils.valToEnum(tok.termin.tag, BlkTyps)
         if (typ_ == BlkTyps.CONSLUSION):
             if (t.is_newline_after):
                 pass
             elif (t.next0_ is not None
                   and t.next0_.morph.class0_.is_preposition
                   and t.next0_.next0_ is not None):
                 tok2 = BlockLine.__m_ontology.try_parse(
                     t.next0_.next0_, TerminParseAttr.NO)
                 if (tok2 is not None and (Utils.valToEnum(
                         tok2.termin.tag, BlkTyps)) == BlkTyps.CHAPTER):
                     pass
                 else:
                     tok = (None)
             else:
                 tok = (None)
         if (t.kit.base_language != t.morph.language):
             tok = (None)
         if (typ_ == BlkTyps.INDEX and not t.is_value("ОГЛАВЛЕНИЕ", None)):
             if (not t.is_newline_after and t.next0_ is not None):
                 npt = NounPhraseHelper.try_parse(t.next0_,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.is_newline_after
                         and npt.morph.case_.is_genitive):
                     tok = (None)
                 elif (npt is None):
                     tok = (None)
         if ((typ_ == BlkTyps.INTRO and tok is not None
              and not tok.is_newline_after)
                 and t.is_value("ВВЕДЕНИЕ", None)):
             npt = NounPhraseHelper.try_parse(t.next0_,
                                              NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None and npt.morph.case_.is_genitive):
                 tok = (None)
         if (tok is not None):
             if (res.number_end is None):
                 res.number_end = tok.end_token
                 if (res.number_end.end_char > res.end_char):
                     res.end_token = res.number_end
             res.typ = typ_
             t = tok.end_token
             if (t.next0_ is not None and t.next0_.is_char_of(":.")):
                 t = t.next0_
                 res.end_token = t
             if (t.is_newline_after or t.next0_ is None):
                 return res
             t = t.next0_
     if (t.is_char('§') and (isinstance(t.next0_, NumberToken))):
         res.typ = BlkTyps.CHAPTER
         res.number_end = t
         t = t.next0_
     if (names is not None):
         tok2 = names.try_parse(t, TerminParseAttr.NO)
         if (tok2 is not None and tok2.end_token.is_newline_after):
             res.end_token = tok2.end_token
             res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED):
                 li2 = BlockLine.create((None if res.number_end is None else
                                         res.number_end.next0_), None)
                 if (li2 is not None
                         and ((li2.typ == BlkTyps.LITERATURE
                               or li2.typ == BlkTyps.INTRO
                               or li2.typ == BlkTyps.CONSLUSION))):
                     res.typ = li2.typ
                 else:
                     res.typ = BlkTyps.CHAPTER
             return res
     t1 = res.end_token
     if ((((isinstance(t1, NumberToken)) or t1.is_char('.')))
             and t1.previous is not None):
         t1 = t1.previous
         if (t1.is_char('.')):
             res.has_content_item_tail = True
             while t1 is not None and t1.begin_char > res.begin_char:
                 if (not t1.is_char('.')):
                     break
                 t1 = t1.previous
     res.is_all_upper = True
     while t is not None and t.end_char <= t1.end_char:
         if (not (isinstance(t, TextToken)) or not t.chars.is_letter):
             res.not_words += 1
         else:
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_undefined):
                 res.not_words += 1
             elif (t.length_char > 2):
                 res.words += 1
             if (not t.chars.is_all_upper):
                 res.is_all_upper = False
             if (t.is_pure_verb):
                 if (not t.term.endswith("ING")):
                     res.has_verb = True
         t = t.next0_
     if (res.typ == BlkTyps.UNDEFINED):
         npt = NounPhraseHelper.try_parse(
             (res.begin_token if res.number_end is None else
              res.number_end.next0_), NounPhraseParseAttr.NO, 0, None)
         if (npt is not None):
             if (npt.noun.is_value("ХАРАКТЕРИСТИКА", None)
                     or npt.noun.is_value("СОДЕРЖАНИЕ", "ЗМІСТ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass3032 = True
                 while True:
                     if first_pass3032: first_pass3032 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.is_char('.')):
                         continue
                     npt2 = NounPhraseHelper.try_parse(
                         tt, NounPhraseParseAttr.NO, 0, None)
                     if (npt2 is None or not npt2.morph.case_.is_genitive):
                         ok = False
                         break
                     tt = npt2.end_token
                     if (tt.end_char > res.end_char):
                         res.end_token = tt
                         if (not tt.is_newline_after):
                             while res.end_token.next0_ is not None:
                                 if (res.end_token.is_newline_after):
                                     break
                                 res.end_token = res.end_token.next0_
                 if (ok):
                     res.typ = BlkTyps.INTRO
                     res.is_exist_name = True
             elif (npt.noun.is_value("ВЫВОД", "ВИСНОВОК")
                   or npt.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass3033 = True
                 while True:
                     if first_pass3033: first_pass3033 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.is_char_of(",.") or tt.is_and):
                         continue
                     npt1 = NounPhraseHelper.try_parse(
                         tt, NounPhraseParseAttr.NO, 0, None)
                     if (npt1 is not None):
                         if (npt1.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")
                                 or npt1.noun.is_value(
                                     "РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ")
                                 or npt1.noun.is_value(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")):
                             tt = npt1.end_token
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                     ok = False
                     break
                 if (ok):
                     res.typ = BlkTyps.CONSLUSION
                     res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED and npt is not None
                     and npt.end_char <= res.end_char):
                 ok = False
                 publ = 0
                 if (BlockLine.__is_pub(npt)):
                     ok = True
                     publ = 1
                 elif ((npt.noun.is_value("СПИСОК", None)
                        or npt.noun.is_value("УКАЗАТЕЛЬ", "ПОКАЖЧИК")
                        or npt.noun.is_value("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ"))
                       or npt.noun.is_value("ВЫВОД", "ВИСНОВОК")
                       or npt.noun.is_value("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                     if (npt.end_char == res.end_char):
                         return None
                     ok = True
                 if (ok):
                     if (npt.begin_token == npt.end_token
                             and npt.noun.is_value("СПИСОК", None)
                             and npt.end_char == res.end_char):
                         ok = False
                     tt = npt.end_token.next0_
                     first_pass3034 = True
                     while True:
                         if first_pass3034: first_pass3034 = False
                         else: tt = tt.next0_
                         if (not (tt is not None
                                  and tt.end_char <= res.end_char)):
                             break
                         if (tt.is_char_of(",.:") or tt.is_and
                                 or tt.morph.class0_.is_preposition):
                             continue
                         if (tt.is_value("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")):
                             continue
                         npt = NounPhraseHelper.try_parse(
                             tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is None):
                             ok = False
                             break
                         if (((BlockLine.__is_pub(npt) or npt.noun.is_value(
                                 "РАБОТА", "РОБОТА") or npt.noun.is_value(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                              or npt.noun.is_value("АВТОР", None)
                              or npt.noun.is_value("ТРУД", "ПРАЦЯ"))
                                 or npt.noun.is_value("ТЕМА", None)
                                 or npt.noun.is_value(
                                     "ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")):
                             tt = npt.end_token
                             if (BlockLine.__is_pub(npt)):
                                 publ += 1
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                         ok = False
                         break
                     if (ok):
                         res.typ = BlkTyps.LITERATURE
                         res.is_exist_name = True
                         if (publ == 0 and (res.end_char < ((math.floor(
                             (len(res.kit.sofa.text) * 2) / 3))))):
                             if (res.number_end is not None):
                                 res.typ = BlkTyps.MISC
                             else:
                                 res.typ = BlkTyps.UNDEFINED
     return res
예제 #20
0
 def try_parse(t: 'Token',
               add_units: 'TerminCollection',
               prev: 'UnitToken',
               parse_unknown_units: bool = False) -> 'UnitToken':
     if (t is None):
         return None
     t0 = t
     pow0__ = 1
     is_neg = False
     if ((t.is_char_of("\\/") or t.is_value("НА", None)
          or t.is_value("OF", None)) or t.is_value("PER", None)):
         is_neg = True
         t = t.next0_
     elif (t.is_value("В", None) and prev is not None):
         is_neg = True
         t = t.next0_
     elif (MeasureHelper.is_mult_char(t)):
         t = t.next0_
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is None):
         return None
     if (tt.term == "КВ" or tt.term == "КВАДР"
             or tt.is_value("КВАДРАТНЫЙ", None)):
         pow0__ = 2
         tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is not None and tt.is_char('.')):
             tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is None):
             return None
     elif (tt.term == "КУБ" or tt.term == "КУБИЧ"
           or tt.is_value("КУБИЧЕСКИЙ", None)):
         pow0__ = 3
         tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is not None and tt.is_char('.')):
             tt = (Utils.asObjectOrNull(tt.next0_, TextToken))
         if (tt is None):
             return None
     elif (tt.term == "µ"):
         res = UnitToken.try_parse(tt.next0_, add_units, prev, False)
         if (res is not None):
             for u in UnitsHelper.UNITS:
                 if (u.factor == UnitsFactors.MICRO
                         and Utils.compareStrings("мк" + u.name_cyr,
                                                  res.unit.name_cyr, True)
                         == 0):
                     res.unit = u
                     res.begin_token = tt
                     res.pow0_ = pow0__
                     if (is_neg):
                         res.pow0_ = (-pow0__)
                     return res
     toks = UnitsHelper.TERMINS.try_parse_all(tt, TerminParseAttr.NO)
     if (toks is not None):
         if ((prev is not None and tt == t0 and len(toks) == 1)
                 and t.is_whitespace_before):
             return None
         if (toks[0].begin_token == toks[0].end_token
                 and tt.morph.class0_.is_preposition
                 and (tt.whitespaces_after_count < 3)):
             if (NounPhraseHelper.try_parse(
                     tt, NounPhraseParseAttr.PARSEPREPOSITION, 0, None)
                     is not None):
                 return None
             if (isinstance(tt.next0_, NumberToken)):
                 if (tt.next0_.typ != NumberSpellingType.DIGIT):
                     return None
             nex = UnitToken.try_parse(tt.next0_, add_units, None, False)
             if (nex is not None):
                 return None
         if (toks[0].begin_token == toks[0].end_token
                 and ((toks[0].begin_token.is_value("М", None)
                       or toks[0].begin_token.is_value("M", None)))
                 and toks[0].begin_token.chars.is_all_lower):
             if (prev is not None and prev.unit is not None
                     and prev.unit.kind == MeasureKind.LENGTH):
                 res = UnitToken._new1626(t0, toks[0].end_token,
                                          UnitsHelper.UMINUTE)
                 res.pow0_ = pow0__
                 if (is_neg):
                     res.pow0_ = (-pow0__)
                 return res
         uts = list()
         for tok in toks:
             res = UnitToken._new1626(
                 t0, tok.end_token,
                 Utils.asObjectOrNull(tok.termin.tag, Unit))
             res.pow0_ = pow0__
             if (is_neg):
                 res.pow0_ = (-pow0__)
             if (res.unit.base_multiplier == 1000000
                     and (isinstance(t0, TextToken))
                     and str.islower(t0.get_source_text()[0])):
                 for u in UnitsHelper.UNITS:
                     if (u.factor == UnitsFactors.MILLI
                             and Utils.compareStrings(
                                 u.name_cyr, res.unit.name_cyr, True) == 0):
                         res.unit = u
                         break
             res.__correct()
             res.__check_doubt()
             uts.append(res)
         max0_ = 0
         best = None
         for ut in uts:
             if (ut.keyword is not None):
                 if (ut.keyword.begin_char >= max0_):
                     max0_ = ut.keyword.begin_char
                     best = ut
         if (best is not None):
             return best
         for ut in uts:
             if (not ut.is_doubt):
                 return ut
         return uts[0]
     t1 = None
     if (t.is_char_of("º°")):
         t1 = t
     elif ((t.is_char('<') and t.next0_ is not None
            and t.next0_.next0_ is not None)
           and t.next0_.next0_.is_char('>') and
           ((t.next0_.is_value("О", None) or t.next0_.is_value("O", None) or
             (((isinstance(t.next0_, NumberToken))
               and t.next0_.value == "0"))))):
         t1 = t.next0_.next0_
     if (t1 is not None):
         res = UnitToken._new1626(t0, t1, UnitsHelper.UGRADUS)
         res.__check_doubt()
         t = t1.next0_
         if (t is not None and t.is_comma):
             t = t.next0_
         if (t is not None and t.is_value("ПО", None)):
             t = t.next0_
         if (isinstance(t, TextToken)):
             vv = t.term
             if (vv == "C" or vv == "С" or vv.startswith("ЦЕЛЬС")):
                 res.unit = UnitsHelper.UGRADUSC
                 res.is_doubt = False
                 res.end_token = t
             if (vv == "F" or vv.startswith("ФАР")):
                 res.unit = UnitsHelper.UGRADUSF
                 res.is_doubt = False
                 res.end_token = t
         return res
     if ((isinstance(t, TextToken))
             and ((t.is_value("ОС", None) or t.is_value("OC", None)))):
         str0_ = t.get_source_text()
         if (str0_ == "оС" or str0_ == "oC"):
             res = UnitToken._new1738(t, t, UnitsHelper.UGRADUSC, False)
             return res
     if (t.is_char('%')):
         tt1 = t.next0_
         if (tt1 is not None and tt1.is_char('(')):
             tt1 = tt1.next0_
         if ((isinstance(tt1, TextToken)) and tt1.term.startswith("ОБ")):
             re = UnitToken._new1626(t, tt1, UnitsHelper.UALCO)
             if (re.end_token.next0_ is not None
                     and re.end_token.next0_.is_char('.')):
                 re.end_token = re.end_token.next0_
             if (re.end_token.next0_ is not None
                     and re.end_token.next0_.is_char(')')
                     and t.next0_.is_char('(')):
                 re.end_token = re.end_token.next0_
             return re
         return UnitToken._new1626(t, t, UnitsHelper.UPERCENT)
     if (add_units is not None):
         tok = add_units.try_parse(t, TerminParseAttr.NO)
         if (tok is not None):
             res = UnitToken._new1741(
                 t0, tok.end_token,
                 Utils.asObjectOrNull(tok.termin.tag, UnitReferent))
             if (tok.end_token.next0_ is not None
                     and tok.end_token.next0_.is_char('.')):
                 tok.end_token = tok.end_token.next0_
             res.pow0_ = pow0__
             if (is_neg):
                 res.pow0_ = (-pow0__)
             res.__correct()
             return res
     if (not parse_unknown_units):
         return None
     if ((t.whitespaces_before_count > 2 or not t.chars.is_letter
          or t.length_char > 5) or not (isinstance(t, TextToken))):
         return None
     if (MiscHelper.can_be_start_of_sentence(t)):
         return None
     t1 = t
     if (t.next0_ is not None and t.next0_.is_char('.')):
         t1 = t
     ok = False
     if (t1.next0_ is None or t1.whitespaces_after_count > 2):
         ok = True
     elif (t1.next0_.is_comma or t1.next0_.is_char_of("\\/")
           or t1.next0_.is_table_control_char):
         ok = True
     elif (MeasureHelper.is_mult_char(t1.next0_)):
         ok = True
     if (not ok):
         return None
     mc = t.get_morph_class_in_dictionary()
     if (mc.is_undefined):
         pass
     elif (t.length_char > 7):
         return None
     res1 = UnitToken._new1742(t0, t1, pow0__, True)
     res1.unknown_name = t.get_source_text()
     res1.__correct()
     return res1
예제 #21
0
 def main(args: typing.List[str]) -> None:
     sw = Stopwatch()
     # инициализация - необходимо проводить один раз до обработки текстов
     print("Initializing ... ", end="", flush=True)
     # инициализируются движок и все имеющиеся анализаторы
     Sdk.initialize((MorphLang.RU) | MorphLang.EN)
     sw.stop()
     print("OK (by {0} ms), version {1}".format(
         sw.elapsedMilliseconds, ProcessorService.get_version()),
           flush=True)
     # анализируемый текст
     txt = "Единственным конкурентом «Трансмаша» на этом сомнительном тендере было ООО «Плассер Алека Рейл Сервис», основным владельцем которого является австрийская компания «СТЦ-Холдинг ГМБХ». До конца 2011 г. эта же фирма была совладельцем «Трансмаша» вместе с «Тако» Краснова. Зато совладельцем «Плассера», также до конца 2011 г., был тот самый Карл Контрус, который имеет четверть акций «Трансмаша». "
     print("Text: {0}".format(txt), flush=True)
     # запускаем обработку на пустом процессоре (без анализаторов NER)
     are = ProcessorService.get_empty_processor().process(
         SourceOfAnalysis(txt), None, None)
     print("Noun groups: ", end="", flush=True)
     t = are.first_token
     # перебираем токены
     first_pass2879 = True
     while True:
         if first_pass2879: first_pass2879 = False
         else: t = t.next0_
         if (not (t is not None)): break
         # выделяем именную группу с текущего токена
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0)
         # не получилось
         if (npt is None):
             continue
         # получилось, выводим в нормализованном виде
         print("[{0}=>{1}] ".format(
             npt.get_source_text(),
             npt.get_normal_case_text(None, True, MorphGender.UNDEFINED,
                                      False)),
               end="",
               flush=True)
         # указатель на последний токен именной группы
         t = npt.end_token
     with ProcessorService.create_processor() as proc:
         # анализируем текст
         ar = proc.process(SourceOfAnalysis(txt), None, None)
         # результирующие сущности
         print(
             "\r\n==========================================\r\nEntities: ",
             flush=True)
         for e0_ in ar.entities:
             print("{0}: {1}".format(e0_.type_name, str(e0_)), flush=True)
             for s in e0_.slots:
                 print("   {0}: {1}".format(s.type_name, s.value),
                       flush=True)
         # пример выделения именных групп
         print(
             "\r\n==========================================\r\nNoun groups: ",
             flush=True)
         t = ar.first_token
         first_pass2880 = True
         while True:
             if first_pass2880: first_pass2880 = False
             else: t = t.next0_
             if (not (t is not None)): break
             # токены с сущностями игнорируем
             if (t.get_referent() is not None):
                 continue
             # пробуем создать именную группу
             npt = NounPhraseHelper.try_parse(
                 t, NounPhraseParseAttr.ADJECTIVECANBELAST, 0)
             # не получилось
             if (npt is None):
                 continue
             print(npt, flush=True)
             # указатель перемещаем на последний токен группы
             t = npt.end_token
     with ProcessorService.create_specific_processor(
             KeywordAnalyzer.ANALYZER_NAME) as proc:
         ar = proc.process(SourceOfAnalysis(txt), None, None)
         print(
             "\r\n==========================================\r\nKeywords1: ",
             flush=True)
         for e0_ in ar.entities:
             if (isinstance(e0_, KeywordReferent)):
                 print(e0_, flush=True)
         print(
             "\r\n==========================================\r\nKeywords2: ",
             flush=True)
         t = ar.first_token
         first_pass2881 = True
         while True:
             if first_pass2881: first_pass2881 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, ReferentToken)):
                 kw = Utils.asObjectOrNull(t.get_referent(),
                                           KeywordReferent)
                 if (kw is None):
                     continue
                 kwstr = MiscHelper.get_text_value_of_meta_token(
                     Utils.asObjectOrNull(t, ReferentToken),
                     Utils.valToEnum(
                         (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVESINGLE) |
                         (GetTextAttr.KEEPREGISTER), GetTextAttr))
                 print("{0} = {1}".format(kwstr, kw), flush=True)
     print("Over!", flush=True)
예제 #22
0
 def main(args: typing.List[str]) -> None:
     sw = Stopwatch()
     # инициализация - необходимо проводить один раз до обработки текстов
     print("Initializing SDK Pullenti ver {0} ({1}) ... ".format(
         Sdk.get_version(), Sdk.get_version_date()),
           end="",
           flush=True)
     # инициализируются движок и все имеющиеся анализаторы
     Sdk.initialize_all()
     sw.stop()
     print("OK (by {0} ms), version {1}".format(
         sw.elapsedMilliseconds, ProcessorService.get_version()),
           flush=True)
     # посмотрим, какие анализаторы доступны
     for a in ProcessorService.get_analyzers():
         print("   {0} {1} \"{2}\"".format(
             ("Specific analyzer" if a.is_specific else "Common analyzer"),
             a.name, a.caption),
               flush=True)
     # анализируемый текст
     txt = "Система разрабатывается с 2011 года российским программистом Михаилом Жуковым, проживающим в Москве на Красной площади в доме номер один на втором этаже. Конкурентов у него много: Abbyy, Yandex, ООО \"Russian Context Optimizer\" (RCO) и другие компании. Он планирует продать SDK за 1.120.000.001,99 (миллиард сто двадцать миллионов один рубль 99 копеек) рублей, без НДС."
     print("Text: {0}".format(txt), flush=True)
     # запускаем обработку на пустом процессоре (без анализаторов NER)
     are = ProcessorService.get_empty_processor().process(
         SourceOfAnalysis(txt), None, None)
     print("Noun groups: ", end="", flush=True)
     t = are.first_token
     # перебираем токены
     first_pass2974 = True
     while True:
         if first_pass2974: first_pass2974 = False
         else: t = t.next0_
         if (not (t is not None)): break
         # выделяем именную группу с текущего токена
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0,
                                          None)
         # не получилось
         if (npt is None):
             continue
         # получилось, выводим в нормализованном виде
         print("[{0}=>{1}] ".format(
             npt.get_source_text(),
             npt.get_normal_case_text(None, MorphNumber.SINGULAR,
                                      MorphGender.UNDEFINED, False)),
               end="",
               flush=True)
         # указатель на последний токен именной группы
         t = npt.end_token
     with ProcessorService.create_processor() as proc:
         # анализируем текст
         ar = proc.process(SourceOfAnalysis(txt), None, None)
         # результирующие сущности
         print(
             "\r\n==========================================\r\nEntities: ",
             flush=True)
         for e0_ in ar.entities:
             print("{0}: {1}".format(e0_.type_name, str(e0_)), flush=True)
             for s in e0_.slots:
                 print("   {0}: {1}".format(s.type_name, s.value),
                       flush=True)
         # пример выделения именных групп
         print(
             "\r\n==========================================\r\nNoun groups: ",
             flush=True)
         t = ar.first_token
         first_pass2975 = True
         while True:
             if first_pass2975: first_pass2975 = False
             else: t = t.next0_
             if (not (t is not None)): break
             # токены с сущностями игнорируем
             if (t.get_referent() is not None):
                 continue
             # пробуем создать именную группу
             npt = NounPhraseHelper.try_parse(
                 t, NounPhraseParseAttr.ADJECTIVECANBELAST, 0, None)
             # не получилось
             if (npt is None):
                 continue
             print(npt, flush=True)
             # указатель перемещаем на последний токен группы
             t = npt.end_token
     with ProcessorService.create_specific_processor(
             KeywordAnalyzer.ANALYZER_NAME) as proc:
         ar = proc.process(SourceOfAnalysis(txt), None, None)
         print(
             "\r\n==========================================\r\nKeywords1: ",
             flush=True)
         for e0_ in ar.entities:
             if (isinstance(e0_, KeywordReferent)):
                 print(e0_, flush=True)
         print(
             "\r\n==========================================\r\nKeywords2: ",
             flush=True)
         t = ar.first_token
         first_pass2976 = True
         while True:
             if first_pass2976: first_pass2976 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, ReferentToken)):
                 kw = Utils.asObjectOrNull(t.get_referent(),
                                           KeywordReferent)
                 if (kw is None):
                     continue
                 kwstr = MiscHelper.get_text_value_of_meta_token(
                     Utils.asObjectOrNull(t, ReferentToken),
                     Utils.valToEnum(
                         (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVESINGLE) |
                         (GetTextAttr.KEEPREGISTER), GetTextAttr))
                 print("{0} = {1}".format(kwstr, kw), flush=True)
     print("Over!", flush=True)
예제 #23
0
 def try_attach_list(t: 'Token',
                     max_count: int = 20) -> typing.List['DateItemToken']:
     p = DateItemToken.try_attach(t, None, False)
     if (p is None):
         return None
     if (p.typ == DateItemToken.DateItemType.DELIM):
         return None
     res = list()
     res.append(p)
     tt = p.end_token.next0_
     while tt is not None:
         if (isinstance(tt, TextToken)):
             if (tt.check_value(DateItemToken.M_EMPTY_WORDS) is not None):
                 tt = tt.next0_
                 continue
         p0 = DateItemToken.try_attach(tt, res, False)
         if (p0 is None):
             if (tt.is_newline_before):
                 break
             if (tt.chars.is_latin_letter):
                 break
             if (tt.morph is not None
                     and tt.morph.check((MorphClass.ADJECTIVE)
                                        | MorphClass.PRONOUN)):
                 tt = tt.next0_
                 continue
             break
         if (tt.is_newline_before):
             if (p.typ == DateItemToken.DateItemType.MONTH
                     and p0.can_be_year):
                 pass
             elif (p.typ == DateItemToken.DateItemType.NUMBER
                   and p.can_be_day
                   and p0.typ == DateItemToken.DateItemType.MONTH):
                 pass
             else:
                 break
         if (p0.can_be_year
                 and p0.typ == DateItemToken.DateItemType.NUMBER):
             if (p.typ == DateItemToken.DateItemType.HALFYEAR
                     or p.typ == DateItemToken.DateItemType.QUARTAL):
                 p0.typ = DateItemToken.DateItemType.YEAR
             elif (p.typ == DateItemToken.DateItemType.POINTER
                   and p0.int_value > 1990):
                 p0.typ = DateItemToken.DateItemType.YEAR
         p = p0
         res.append(p)
         if (max_count > 0 and len(res) >= max_count):
             break
         tt = p.end_token.next0_
     for i in range(len(res) - 1, -1, -1):
         if (res[i].typ == DateItemToken.DateItemType.DELIM):
             del res[i]
         else:
             break
     if (len(res) > 0 and res[len(res) - 1].typ
             == DateItemToken.DateItemType.NUMBER):
         nex = NumberHelper.try_parse_number_with_postfix(
             res[len(res) - 1].begin_token)
         if (nex is not None and nex.ex_typ != NumberExType.HOUR):
             if (len(res) > 3 and res[len(res) - 2].typ
                     == DateItemToken.DateItemType.DELIM
                     and res[len(res) - 2].string_value == ":"):
                 pass
             else:
                 del res[len(res) - 1]
     if (len(res) == 0):
         return None
     i = 1
     while i < (len(res) - 1):
         if (res[i].typ == DateItemToken.DateItemType.DELIM
                 and res[i].begin_token.is_comma):
             if ((i == 1
                  and res[i - 1].typ == DateItemToken.DateItemType.MONTH
                  and res[i + 1].can_be_year)
                     and (i + 1) == (len(res) - 1)):
                 del res[i]
         i += 1
     if (res[len(res) - 1].typ == DateItemToken.DateItemType.NUMBER):
         rr = res[len(res) - 1]
         npt = NounPhraseHelper.try_parse(rr.begin_token,
                                          NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and npt.end_char > rr.end_char):
             del res[len(res) - 1]
             if (len(res) > 0 and res[len(res) - 1].typ
                     == DateItemToken.DateItemType.DELIM):
                 del res[len(res) - 1]
     if (len(res) == 0):
         return None
     if (len(res) == 2 and not res[0].is_whitespace_after):
         if (not res[0].is_whitespace_before
                 and not res[1].is_whitespace_after):
             return None
     return res
예제 #24
0
 def try_parse(t: 'Token') -> 'DefinitionWithNumericToken':
     if (not MiscHelper.can_be_start_of_sentence(t)):
         return None
     tt = t
     noun_ = None
     num = None
     first_pass3146 = True
     while True:
         if first_pass3146: first_pass3146 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt != t and MiscHelper.can_be_start_of_sentence(tt)):
             return None
         if (not (isinstance(tt, NumberToken))):
             continue
         if (tt.whitespaces_after_count > 2 or tt == t):
             continue
         if (tt.morph.class0_.is_adjective):
             continue
         nn = NounPhraseHelper.try_parse(tt.next0_, NounPhraseParseAttr.NO,
                                         0, None)
         if (nn is None):
             continue
         num = (Utils.asObjectOrNull(tt, NumberToken))
         noun_ = nn
         break
     if (num is None or num.int_value is None):
         return None
     res = DefinitionWithNumericToken(t, noun_.end_token)
     res.number = num.int_value
     res.number_begin_char = num.begin_char
     res.number_end_char = num.end_char
     res.noun = noun_.get_normal_case_text(None, MorphNumber.SINGULAR,
                                           MorphGender.UNDEFINED, False)
     res.nouns_genetive = (Utils.ifNotNull(
         noun_.get_morph_variant(MorphCase.GENITIVE, True),
         (res.noun if res is not None else None)))
     res.text = MiscHelper.get_text_value(
         t, num.previous,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     if (num.is_whitespace_before):
         res.text += " "
     res.number_substring = MiscHelper.get_text_value(
         num, noun_.end_token,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     res.text += res.number_substring
     tt = noun_.end_token
     while tt is not None:
         if (MiscHelper.can_be_start_of_sentence(tt)):
             break
         res.end_token = tt
         tt = tt.next0_
     if (res.end_token != noun_.end_token):
         if (noun_.is_whitespace_after):
             res.text += " "
         res.text += MiscHelper.get_text_value(
             noun_.end_token.next0_, res.end_token,
             Utils.valToEnum(
                 (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
                 GetTextAttr))
     return res
예제 #25
0
 def __try_parse_ru(first: 'Token',
                    typ: 'NounPhraseParseAttr',
                    max_char_pos: int,
                    def_noun: 'NounPhraseItem' = None) -> 'NounPhraseToken':
     if (first is None):
         return None
     items = None
     adverbs = None
     prep = None
     kak = False
     t0 = first
     if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) !=
         (NounPhraseParseAttr.NO) and t0.is_value("КАК", None)):
         t0 = t0.next0_
         prep = PrepositionHelper.try_parse(t0)
         if (prep is not None):
             t0 = prep.end_token.next0_
         kak = True
     internal_noun_prase = None
     conj_before = False
     t = t0
     first_pass3041 = True
     while True:
         if first_pass3041: first_pass3041 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos):
             break
         if ((t.morph.class0_.is_conjunction
              and not t.morph.class0_.is_adjective
              and not t.morph.class0_.is_pronoun)
                 and not t.morph.class0_.is_noun):
             if (conj_before):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             if (items is not None and ((t.is_and or t.is_or))):
                 conj_before = True
                 if ((t.next0_ is not None and t.next0_.is_char_of("\\/")
                      and t.next0_.next0_ is not None)
                         and t.next0_.next0_.is_or):
                     t = t.next0_.next0_
                 if (((t.next0_ is not None and t.next0_.is_char('(')
                       and t.next0_.next0_ is not None)
                      and t.next0_.next0_.is_or
                      and t.next0_.next0_.next0_ is not None)
                         and t.next0_.next0_.next0_.is_char(')')):
                     t = t.next0_.next0_.next0_
                 continue
             break
         elif (t.is_comma):
             if (conj_before or items is None):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             mc = t.previous.get_morph_class_in_dictionary()
             if (mc.is_proper_surname or mc.is_proper_secname):
                 break
             conj_before = True
             if (kak and t.next0_ is not None
                     and t.next0_.is_value("ТАК", None)):
                 t = t.next0_
                 if (t.next0_ is not None and t.next0_.is_and):
                     t = t.next0_
                 pr = PrepositionHelper.try_parse(t.next0_)
                 if (pr is not None):
                     t = pr.end_token
             if (items[len(items) - 1].can_be_noun
                     and items[len(items) -
                               1].end_token.morph.class0_.is_pronoun):
                 break
             continue
         elif (t.is_char('(')):
             if (items is None):
                 return None
             brr = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (brr is None):
                 break
             if (brr.length_char > 100):
                 break
             t = brr.end_token
             continue
         if (isinstance(t, ReferentToken)):
             if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == (
                     NounPhraseParseAttr.NO)):
                 break
         elif (t.chars.is_latin_letter):
             break
         it = NounPhraseItem.try_parse(t, items, typ)
         if (it is None or ((not it.can_be_adj and not it.can_be_noun))):
             if (((it is not None and items is not None
                   and t.chars.is_capital_upper) and
                  (t.whitespaces_before_count < 3) and t.length_char > 3)
                     and not t.get_morph_class_in_dictionary().is_noun and
                     not t.get_morph_class_in_dictionary().is_adjective):
                 it.can_be_noun = True
                 items.append(it)
                 break
             if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) !=
                 (NounPhraseParseAttr.NO) and (isinstance(t, TextToken))
                     and t.morph.class0_.is_adverb):
                 if (adverbs is None):
                     adverbs = list()
                 adverbs.append(Utils.asObjectOrNull(t, TextToken))
                 continue
             break
         it.conj_before = conj_before
         conj_before = False
         if (not it.can_be_adj and not it.can_be_noun):
             break
         if (t.is_newline_before and t != first):
             if ((((typ) & (NounPhraseParseAttr.MULTILINES))) !=
                 (NounPhraseParseAttr.NO)):
                 pass
             elif (items is not None
                   and t.chars != items[len(items) - 1].chars):
                 if (t.chars.is_all_lower
                         and items[len(items) - 1].chars.is_capital_upper):
                     pass
                 else:
                     break
         if (items is None):
             items = list()
         else:
             it0 = items[len(items) - 1]
             if (it0.can_be_noun and it0.is_personal_pronoun):
                 if (it.is_pronoun):
                     break
                 if ((it0.begin_token.previous is not None
                      and it0.begin_token.previous.
                      get_morph_class_in_dictionary().is_verb
                      and not it0.begin_token.previous.
                      get_morph_class_in_dictionary().is_adjective)
                         and not it0.begin_token.previous.
                         get_morph_class_in_dictionary().is_preposition):
                     if (t.morph.case_.is_nominative
                             or t.morph.case_.is_accusative):
                         pass
                     else:
                         break
                 if (it.can_be_noun and it.is_verb):
                     if (it0.previous is None):
                         pass
                     elif ((isinstance(it0.previous, TextToken))
                           and not it0.previous.chars.is_letter):
                         pass
                     else:
                         break
         items.append(it)
         t = it.end_token
         if (t.is_newline_after and not t.chars.is_all_lower):
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_proper_surname):
                 break
             if (t.morph.class0_.is_proper_surname and mc.is_undefined):
                 break
     if (items is None):
         return None
     tt1 = None
     if (len(items) == 1 and items[0].can_be_adj):
         and0_ = False
         tt1 = items[0].end_token.next0_
         first_pass3042 = True
         while True:
             if first_pass3042: first_pass3042 = False
             else: tt1 = tt1.next0_
             if (not (tt1 is not None)): break
             if (tt1.is_and or tt1.is_or):
                 and0_ = True
                 break
             if (tt1.is_comma or tt1.is_value("НО", None)
                     or tt1.is_value("ТАК", None)):
                 continue
             break
         if (and0_):
             if (items[0].can_be_noun and items[0].is_personal_pronoun):
                 and0_ = False
         if (and0_):
             tt2 = tt1.next0_
             if (tt2 is not None and tt2.morph.class0_.is_preposition):
                 tt2 = tt2.next0_
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 tt2, typ, max_char_pos, None)
             if (npt1 is not None and len(npt1.adjectives) > 0):
                 ok1 = False
                 for av in items[0].adj_morph:
                     for v in npt1.noun.noun_morph:
                         if (v.check_accord(av, False, False)):
                             items[0].morph.add_item(av)
                             ok1 = True
                 if (ok1):
                     npt1.begin_token = items[0].begin_token
                     npt1.end_token = tt1.previous
                     npt1.adjectives.clear()
                     npt1.adjectives.append(items[0])
                     return npt1
     if (def_noun is not None):
         items.append(def_noun)
     last1 = items[len(items) - 1]
     check = True
     for it in items:
         if (not it.can_be_adj):
             check = False
             break
         elif (it.can_be_noun and it.is_personal_pronoun):
             check = False
             break
     tt1 = last1.end_token.next0_
     if ((tt1 is not None and check and
          ((tt1.morph.class0_.is_preposition
            or tt1.morph.case_.is_instrumental)))
             and (tt1.whitespaces_before_count < 2)):
         inp = NounPhraseHelper.try_parse(
             tt1,
             Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION),
                             NounPhraseParseAttr), max_char_pos, None)
         if (inp is not None):
             tt1 = inp.end_token.next0_
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 tt1, typ, max_char_pos, None)
             if (npt1 is not None):
                 ok = True
                 ii = 0
                 first_pass3043 = True
                 while True:
                     if first_pass3043: first_pass3043 = False
                     else: ii += 1
                     if (not (ii < len(items))): break
                     it = items[ii]
                     if (NounPhraseItem.try_accord_adj_and_noun(
                             it,
                             Utils.asObjectOrNull(npt1.noun,
                                                  NounPhraseItem))):
                         continue
                     if (ii > 0):
                         inp2 = NounPhraseHelper.try_parse(
                             it.begin_token, typ, max_char_pos, None)
                         if (inp2 is not None
                                 and inp2.end_token == inp.end_token):
                             del items[ii:ii + len(items) - ii]
                             inp = inp2
                             break
                     ok = False
                     break
                 if (ok):
                     if (npt1.morph.case_.is_genitive
                             and not inp.morph.case_.is_instrumental):
                         ok = False
                 if (ok):
                     i = 0
                     while i < len(items):
                         npt1.adjectives.insert(i, items[i])
                         i += 1
                     npt1.internal_noun = inp
                     mmm = MorphCollection(npt1.morph)
                     for it in items:
                         mmm.remove_items(it.adj_morph[0], False)
                     if (mmm.gender != MorphGender.UNDEFINED
                             or mmm.number != MorphNumber.UNDEFINED
                             or not mmm.case_.is_undefined):
                         npt1.morph = mmm
                     if (adverbs is not None):
                         if (npt1.adverbs is None):
                             npt1.adverbs = adverbs
                         else:
                             npt1.adverbs[0:0] = adverbs
                     npt1.begin_token = first
                     return npt1
             if (tt1 is not None and tt1.morph.class0_.is_noun
                     and not tt1.morph.case_.is_genitive):
                 it = NounPhraseItem.try_parse(tt1, items, typ)
                 if (it is not None and it.can_be_noun):
                     internal_noun_prase = inp
                     inp.begin_token = items[0].end_token.next0_
                     items.append(it)
     i = 0
     first_pass3044 = True
     while True:
         if first_pass3044: first_pass3044 = False
         else: i += 1
         if (not (i < len(items))): break
         if (items[i].can_be_adj
                 and items[i].begin_token.morph.class0_.is_verb):
             it = items[i].begin_token
             if (not it.get_morph_class_in_dictionary().is_verb):
                 continue
             if (it.is_value("УПОЛНОМОЧЕННЫЙ", None)):
                 continue
             if ((((typ) & (NounPhraseParseAttr.PARSEVERBS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
             inp = _NounPraseHelperInt.__try_parse_ru(
                 items[i].end_token.next0_, NounPhraseParseAttr.NO,
                 max_char_pos, None)
             if (inp is None):
                 continue
             if (inp.anafor is not None and i == (len(items) - 1)
                     and NounPhraseItem.try_accord_adj_and_noun(
                         items[i],
                         Utils.asObjectOrNull(inp.noun, NounPhraseItem))):
                 inp.begin_token = first
                 ii = 0
                 while ii < len(items):
                     inp.adjectives.insert(ii, items[ii])
                     ii += 1
                 return inp
             if (inp.end_token.whitespaces_after_count > 3):
                 continue
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 inp.end_token.next0_, NounPhraseParseAttr.NO, max_char_pos,
                 None)
             if (npt1 is None):
                 continue
             ok = True
             j = 0
             while j <= i:
                 if (not NounPhraseItem.try_accord_adj_and_noun(
                         items[j],
                         Utils.asObjectOrNull(npt1.noun, NounPhraseItem))):
                     ok = False
                     break
                 j += 1
             if (not ok):
                 continue
             verb = VerbPhraseHelper.try_parse(it, True, False, False)
             if (verb is None):
                 continue
             vlinks = SemanticHelper.try_create_links(verb, inp, None)
             nlinks = SemanticHelper.try_create_links(inp, npt1, None)
             if (len(vlinks) == 0 and len(nlinks) > 0):
                 continue
             j = 0
             while j <= i:
                 npt1.adjectives.insert(j, items[j])
                 j += 1
             items[i].end_token = inp.end_token
             mmm = MorphCollection(npt1.morph)
             bil = list()
             j = 0
             while j <= i:
                 bil.clear()
                 for m in items[j].adj_morph:
                     bil.append(m)
                 mmm.remove_items_list_cla(bil, None)
                 j += 1
             if (mmm.gender != MorphGender.UNDEFINED
                     or mmm.number != MorphNumber.UNDEFINED
                     or not mmm.case_.is_undefined):
                 npt1.morph = mmm
             if (adverbs is not None):
                 if (npt1.adverbs is None):
                     npt1.adverbs = adverbs
                 else:
                     npt1.adverbs[0:0] = adverbs
             npt1.begin_token = first
             return npt1
     ok2 = False
     if ((len(items) == 1 and
          (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) !=
          (NounPhraseParseAttr.NO) and
          (items[0].whitespaces_after_count < 3))
             and not items[0].is_adverb):
         if (not items[0].can_be_adj):
             ok2 = True
         elif (items[0].is_personal_pronoun and items[0].can_be_noun):
             ok2 = True
     if (ok2):
         it = NounPhraseItem.try_parse(items[0].end_token.next0_, None, typ)
         if (it is not None and it.can_be_adj
                 and it.begin_token.chars.is_all_lower):
             ok2 = True
             if (it.is_adverb or it.is_verb):
                 ok2 = False
             if (it.is_pronoun and items[0].is_pronoun):
                 ok2 = False
                 if (it.can_be_adj_for_personal_pronoun
                         and items[0].is_personal_pronoun):
                     ok2 = True
             if (ok2 and NounPhraseItem.try_accord_adj_and_noun(
                     it, items[0])):
                 npt1 = _NounPraseHelperInt.__try_parse_ru(
                     it.begin_token, typ, max_char_pos, None)
                 if (npt1 is not None and ((npt1.end_char > it.end_char
                                            or len(npt1.adjectives) > 0))):
                     pass
                 else:
                     items.insert(0, it)
     noun = None
     adj_after = None
     for i in range(len(items) - 1, -1, -1):
         if (items[i].can_be_noun):
             if (items[i].conj_before):
                 continue
             if (i > 0 and not items[i - 1].can_be_adj):
                 continue
             if (i > 0 and items[i - 1].can_be_noun):
                 if (items[i - 1].is_doubt_adjective):
                     continue
                 if (items[i - 1].is_pronoun and items[i].is_pronoun):
                     if (items[i].is_pronoun and
                             items[i - 1].can_be_adj_for_personal_pronoun):
                         pass
                     else:
                         continue
             noun = items[i]
             del items[i:i + len(items) - i]
             if (adj_after is not None):
                 items.append(adj_after)
             elif (len(items) > 0 and items[0].can_be_noun
                   and not items[0].can_be_adj):
                 noun = items[0]
                 items.clear()
             break
     if (noun is None):
         return None
     res = NounPhraseToken._new466(first, noun.end_token, prep)
     if (adverbs is not None):
         for a in adverbs:
             if (a.begin_char < noun.begin_char):
                 if (len(items) == 0 and prep is None):
                     return None
                 if (res.adverbs is None):
                     res.adverbs = list()
                 res.adverbs.append(a)
     res.noun = (noun)
     res.multi_nouns = noun.multi_nouns
     if (kak):
         res.multi_nouns = True
     res.internal_noun = internal_noun_prase
     for v in noun.noun_morph:
         noun.morph.add_item(v)
     res.morph = noun.morph
     if (res.morph.case_.is_nominative and first.previous is not None
             and first.previous.morph.class0_.is_preposition):
         res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE
     if ((((typ) &
           (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO)
             and ((res.morph.class0_.is_pronoun
                   or res.morph.class0_.is_personal_pronoun))):
         return None
     stat = None
     if (len(items) > 1):
         stat = dict()
     need_update_morph = False
     if (len(items) > 0):
         ok_list = list()
         is_num_not = False
         for vv in noun.noun_morph:
             i = 0
             v = vv
             i = 0
             while i < len(items):
                 ok = False
                 for av in items[i].adj_morph:
                     if (v.check_accord(av, False, False)):
                         ok = True
                         if (not ((av.case_) & v.case_).is_undefined
                                 and av.case_ != v.case_):
                             v.case_ = av.case_ = (av.case_) & v.case_
                         break
                 if (not ok):
                     if (items[i].can_be_numeric_adj
                             and items[i].try_accord_var(v, False)):
                         ok = True
                         v1 = NounPhraseItemTextVar()
                         v1.copy_from_item(v)
                         v1.number = MorphNumber.PLURAL
                         is_num_not = True
                         v1.case_ = MorphCase()
                         for a in items[i].adj_morph:
                             v1.case_ = (v1.case_) | a.case_
                         v = v1
                     else:
                         break
                 i += 1
             if (i >= len(items)):
                 ok_list.append(v)
         if (len(ok_list) > 0 and
             (((len(ok_list) < res.morph.items_count) or is_num_not))):
             res.morph = MorphCollection()
             for v in ok_list:
                 res.morph.add_item(v)
             if (not is_num_not):
                 noun.morph = res.morph
     i = 0
     first_pass3045 = True
     while True:
         if first_pass3045: first_pass3045 = False
         else: i += 1
         if (not (i < len(items))): break
         for av in items[i].adj_morph:
             for v in noun.noun_morph:
                 if (v.check_accord(av, False, False)):
                     if (not ((av.case_) & v.case_).is_undefined
                             and av.case_ != v.case_):
                         v.case_ = av.case_ = (av.case_) & v.case_
                         need_update_morph = True
                     items[i].morph.add_item(av)
                     if (stat is not None and av.normal_value is not None
                             and len(av.normal_value) > 1):
                         last = av.normal_value[len(av.normal_value) - 1]
                         if (not last in stat):
                             stat[last] = 1
                         else:
                             stat[last] += 1
         if (items[i].is_pronoun or items[i].is_personal_pronoun):
             res.anafor = items[i].begin_token
             if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
         tt = Utils.asObjectOrNull(items[i].begin_token, TextToken)
         if (tt is not None and not tt.term.startswith("ВЫСШ")):
             err = False
             for wf in tt.morph.items:
                 if (wf.class0_.is_adjective):
                     if (wf.contains_attr("прев.", None)):
                         if ((((typ) &
                               (NounPhraseParseAttr.IGNOREADJBEST))) !=
                             (NounPhraseParseAttr.NO)):
                             err = True
                     if (wf.contains_attr("к.ф.", None)
                             and tt.morph.class0_.is_personal_pronoun):
                         return None
             if (err):
                 continue
         if (res.morph.case_.is_nominative):
             v = MiscHelper.get_text_value_of_meta_token(
                 items[i], GetTextAttr.KEEPQUOTES)
             if (not Utils.isNullOrEmpty(v)):
                 if (items[i].get_normal_case_text(
                         None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED,
                         False) != v):
                     wf = NounPhraseItemTextVar(items[i].morph, None)
                     wf.normal_value = v
                     wf.class0_ = MorphClass.ADJECTIVE
                     wf.case_ = res.morph.case_
                     if (res.morph.case_.is_prepositional
                             or res.morph.gender == MorphGender.NEUTER
                             or res.morph.gender == MorphGender.FEMINIE):
                         items[i].morph.add_item(wf)
                     else:
                         items[i].morph.insert_item(0, wf)
         res.adjectives.append(items[i])
         if (items[i].end_char > res.end_char):
             res.end_token = items[i].end_token
     i = 0
     first_pass3046 = True
     while True:
         if first_pass3046: first_pass3046 = False
         else: i += 1
         if (not (i < (len(res.adjectives) - 1))): break
         if (res.adjectives[i].whitespaces_after_count > 5):
             if (res.adjectives[i].chars != res.adjectives[i + 1].chars):
                 if (not res.adjectives[i + 1].chars.is_all_lower):
                     return None
                 if (res.adjectives[i].chars.is_all_upper
                         and res.adjectives[i + 1].chars.is_capital_upper):
                     return None
                 if (res.adjectives[i].chars.is_capital_upper
                         and res.adjectives[i + 1].chars.is_all_upper):
                     return None
             if (res.adjectives[i].whitespaces_after_count > 10):
                 if (res.adjectives[i].newlines_after_count == 1):
                     if (res.adjectives[i].chars.is_capital_upper and i == 0
                             and res.adjectives[i + 1].chars.is_all_lower):
                         continue
                     if (res.adjectives[i].chars == res.adjectives[
                             i + 1].chars):
                         continue
                 return None
     if (need_update_morph):
         noun.morph = MorphCollection()
         for v in noun.noun_morph:
             noun.morph.add_item(v)
         res.morph = noun.morph
     if (len(res.adjectives) > 0):
         if (noun.begin_token.previous is not None):
             if (noun.begin_token.previous.is_comma_and):
                 if (res.adjectives[0].begin_char > noun.begin_char):
                     pass
                 else:
                     return None
         zap = 0
         and0_ = 0
         cou = 0
         last_and = False
         i = 0
         while i < (len(res.adjectives) - 1):
             te = res.adjectives[i].end_token.next0_
             if (te is None):
                 return None
             if (te.is_char('(')):
                 pass
             elif (te.is_comma):
                 zap += 1
                 last_and = False
             elif (te.is_and or te.is_or):
                 and0_ += 1
                 last_and = True
             if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun
                 ):
                 cou += 1
             i += 1
         if ((zap + and0_) > 0):
             if (and0_ > 1):
                 return None
             elif (and0_ == 1 and not last_and):
                 return None
             if ((zap + and0_) != cou):
                 if (and0_ == 1):
                     pass
                 else:
                     return None
             last = Utils.asObjectOrNull(
                 res.adjectives[len(res.adjectives) - 1], NounPhraseItem)
             if (last.is_pronoun and not last_and):
                 return None
     if (stat is not None):
         for adj in items:
             if (adj.morph.items_count > 1):
                 w1 = Utils.asObjectOrNull(adj.morph.get_indexer_item(0),
                                           NounPhraseItemTextVar)
                 w2 = Utils.asObjectOrNull(adj.morph.get_indexer_item(1),
                                           NounPhraseItemTextVar)
                 if ((len(w1.normal_value) < 2)
                         or (len(w2.normal_value) < 2)):
                     break
                 l1 = w1.normal_value[len(w1.normal_value) - 1]
                 l2 = w2.normal_value[len(w2.normal_value) - 1]
                 i1 = 0
                 i2 = 0
                 wrapi1468 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l1, wrapi1468)
                 i1 = wrapi1468.value
                 wrapi2467 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l2, wrapi2467)
                 i2 = wrapi2467.value
                 if (i1 < i2):
                     adj.morph.remove_item(1)
                     adj.morph.insert_item(0, w2)
     if (res.begin_token.get_morph_class_in_dictionary().is_verb
             and len(items) > 0):
         if (not res.begin_token.chars.is_all_lower
                 or res.begin_token.previous is None):
             pass
         elif (res.begin_token.previous.morph.class0_.is_preposition):
             pass
         else:
             comma = False
             tt = res.begin_token.previous
             first_pass3047 = True
             while True:
                 if first_pass3047: first_pass3047 = False
                 else: tt = tt.previous
                 if (not (tt is not None and tt.end_char <= res.end_char)):
                     break
                 if (tt.morph.class0_.is_adverb):
                     continue
                 if (tt.is_char_of(".;")):
                     break
                 if (tt.is_comma):
                     comma = True
                     continue
                 if (tt.is_value("НЕ", None)):
                     continue
                 if (((tt.morph.class0_.is_noun
                       or tt.morph.class0_.is_proper)) and comma):
                     for it in res.begin_token.morph.items:
                         if (it.class0_.is_verb
                                 and (isinstance(it, MorphWordForm))):
                             if (tt.morph.check_accord(it, False, False)):
                                 if (res.morph.case_.is_instrumental):
                                     return None
                 break
     if (res.begin_token == res.end_token):
         mc = res.begin_token.get_morph_class_in_dictionary()
         if (mc.is_adverb):
             if (res.begin_token.previous is not None and
                     res.begin_token.previous.morph.class0_.is_preposition):
                 pass
             elif (mc.is_noun and not mc.is_preposition
                   and not mc.is_conjunction):
                 pass
             elif (res.begin_token.is_value("ВЕСЬ", None)):
                 pass
             else:
                 return None
     if (def_noun is not None and def_noun.end_token == res.end_token
             and len(res.adjectives) > 0):
         res.end_token = res.adjectives[len(res.adjectives) - 1].end_token
     return res
예제 #26
0
 def __calc_rank_and_value(self, min_newlines_count: int) -> bool:
     self.rank = 0
     if (self.begin_token.chars.is_all_lower):
         self.rank -= 30
     words = 0
     up_words = 0
     notwords = 0
     line_number = 0
     tstart = self.begin_token
     tend = self.end_token
     t = self.begin_token
     first_pass3396 = True
     while True:
         if first_pass3396: first_pass3396 = False
         else: t = t.next0_
         if (not (t != self.end_token.next0_ and t is not None
                  and t.end_char <= self.end_token.end_char)):
             break
         if (t.is_newline_before):
             pass
         tit = TitleItemToken.try_attach(t)
         if (tit is not None):
             if (tit.typ == TitleItemToken.Types.THEME
                     or tit.typ == TitleItemToken.Types.TYPANDTHEME):
                 if (t != self.begin_token):
                     if (line_number > 0):
                         return False
                     notwords = 0
                     up_words = notwords
                     words = up_words
                     tstart = tit.end_token.next0_
                 t = tit.end_token
                 if (t.next0_ is None):
                     return False
                 if (t.next0_.chars.is_letter
                         and t.next0_.chars.is_all_lower):
                     self.rank += 20
                 else:
                     self.rank += 100
                 tstart = t.next0_
                 if (tit.typ == TitleItemToken.Types.TYPANDTHEME):
                     self.type_value = tit.value
                 continue
             if (tit.typ == TitleItemToken.Types.TYP):
                 if (t == self.begin_token):
                     if (tit.end_token.is_newline_after):
                         self.type_value = tit.value
                         self.rank += 5
                         tstart = tit.end_token.next0_
                 t = tit.end_token
                 words += 1
                 if (tit.begin_token != tit.end_token):
                     words += 1
                 if (tit.chars.is_all_upper):
                     up_words += 1
                 continue
             if (tit.typ == TitleItemToken.Types.DUST
                     or tit.typ == TitleItemToken.Types.SPECIALITY):
                 if (t == self.begin_token):
                     return False
                 self.rank -= 20
                 if (tit.typ == TitleItemToken.Types.SPECIALITY):
                     self.speciality = tit.value
                 t = tit.end_token
                 continue
             if (tit.typ == TitleItemToken.Types.CONSULTANT
                     or tit.typ == TitleItemToken.Types.BOSS
                     or tit.typ == TitleItemToken.Types.EDITOR):
                 t = tit.end_token
                 if (t.next0_ is not None and
                     ((t.next0_.is_char_of(":") or t.next0_.is_hiphen
                       or t.whitespaces_after_count > 4))):
                     self.rank -= 10
                 else:
                     self.rank -= 2
                 continue
             return False
         blt = BookLinkToken.try_parse(t, 0)
         if (blt is not None):
             if (blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.N
                     or blt.typ == BookLinkTyp.PAGES):
                 self.rank -= 10
             elif (blt.typ == BookLinkTyp.N
                   or blt.typ == BookLinkTyp.PAGERANGE):
                 self.rank -= 20
         if (t == self.begin_token and BookLinkToken.try_parse_author(
                 t, FioTemplateType.UNDEFINED) is not None):
             self.rank -= 20
         if (t.is_newline_before and t != self.begin_token):
             line_number += 1
             if (line_number > 4):
                 return False
             if (t.chars.is_all_lower):
                 self.rank += 10
             elif (t.previous.is_char('.')):
                 self.rank -= 10
             elif (t.previous.is_char_of(",-")):
                 self.rank += 10
             else:
                 npt = NounPhraseHelper.try_parse(t.previous,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.end_char >= t.end_char):
                     self.rank += 10
         if (t != self.begin_token
                 and t.newlines_before_count > min_newlines_count):
             self.rank -= (t.newlines_before_count - min_newlines_count)
         bst = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (bst is not None and bst.is_quote_type
                 and bst.end_token.end_char <= self.end_token.end_char):
             if (words == 0):
                 tstart = bst.begin_token
                 self.rank += 10
                 if (bst.end_token == self.end_token):
                     tend = self.end_token
                     self.rank += 10
         rli = t.get_referents()
         if (rli is not None):
             for r in rli:
                 if (isinstance(r, OrganizationReferent)):
                     if (t.is_newline_before):
                         self.rank -= 10
                     else:
                         self.rank -= 4
                     continue
                 if ((isinstance(r, GeoReferent))
                         or (isinstance(r, PersonReferent))):
                     if (t.is_newline_before):
                         self.rank -= 5
                         if (t.is_newline_after or t.next0_ is None):
                             self.rank -= 20
                         elif (t.next0_.is_hiphen
                               or (isinstance(t.next0_, NumberToken))
                               or (isinstance(t.next0_.get_referent(),
                                              DateReferent))):
                             self.rank -= 20
                         elif (t != self.begin_token):
                             self.rank -= 20
                     continue
                 if ((isinstance(r, GeoReferent))
                         or (isinstance(r, DenominationReferent))):
                     continue
                 if ((isinstance(r, UriReferent))
                         or (isinstance(r, PhoneReferent))):
                     return False
                 if (t.is_newline_before):
                     self.rank -= 4
                 else:
                     self.rank -= 2
                 if (t == self.begin_token and (isinstance(
                         self.end_token.get_referent(), PersonReferent))):
                     self.rank -= 10
             words += 1
             if (t.chars.is_all_upper):
                 up_words += 1
             if (t == self.begin_token):
                 if (t.is_newline_after):
                     self.rank -= 10
                 elif (t.next0_ is not None and t.next0_.is_char('.')
                       and t.next0_.is_newline_after):
                     self.rank -= 10
             continue
         if (isinstance(t, NumberToken)):
             if (t.typ == NumberSpellingType.WORDS):
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
             else:
                 notwords += 1
             continue
         pat = PersonAttrToken.try_attach(
             t, None, PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (pat is not None):
             if (t.is_newline_before):
                 if (not pat.morph.case_.is_undefined
                         and not pat.morph.case_.is_nominative):
                     pass
                 elif (pat.chars.is_all_upper):
                     pass
                 else:
                     self.rank -= 20
             elif (t.chars.is_all_lower):
                 self.rank -= 1
             while t is not None:
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
                 if (t == pat.end_token):
                     break
                 t = t.next0_
             continue
         oitt = OrgItemTypeToken.try_attach(t, True, None)
         if (oitt is not None):
             if (oitt.morph.number != MorphNumber.PLURAL
                     and not oitt.is_doubt_root_word):
                 if (not oitt.morph.case_.is_undefined
                         and not oitt.morph.case_.is_nominative):
                     words += 1
                     if (t.chars.is_all_upper):
                         up_words += 1
                 else:
                     self.rank -= 4
                     if (t == self.begin_token):
                         self.rank -= 5
             else:
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
             t = oitt.end_token
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (tt.is_char('©')):
                 self.rank -= 10
             if (tt.is_char('_')):
                 self.rank -= 1
             if (tt.chars.is_letter):
                 if (tt.length_char > 2):
                     words += 1
                     if (t.chars.is_all_upper):
                         up_words += 1
             elif (not tt.is_char(',')):
                 notwords += 1
             if (tt.is_pure_verb):
                 self.rank -= 30
                 words -= 1
                 break
             if (tt == self.end_token):
                 if (tt.morph.class0_.is_preposition
                         or tt.morph.class0_.is_conjunction):
                     self.rank -= 10
                 elif (tt.is_char('.')):
                     self.rank += 5
             elif (tt.is_char_of("._")):
                 self.rank -= 5
     self.rank += words
     self.rank -= notwords
     if ((words < 1) and (self.rank < 50)):
         return False
     if (tstart is None or tend is None):
         return False
     if (tstart.end_char > tend.end_char):
         return False
     tit1 = TitleItemToken.try_attach(self.end_token.next0_)
     if (tit1 is not None
             and ((tit1.typ == TitleItemToken.Types.TYP
                   or tit1.typ == TitleItemToken.Types.SPECIALITY))):
         if (tit1.end_token.is_newline_after):
             self.rank += 15
         else:
             self.rank += 10
         if (tit1.typ == TitleItemToken.Types.SPECIALITY):
             self.speciality = tit1.value
     if (up_words > 4 and up_words > (math.floor((0.8 * (words))))):
         if (tstart.previous is not None and
             (isinstance(tstart.previous.get_referent(), PersonReferent))):
             self.rank += (5 + up_words)
     self.begin_name_token = tstart
     self.end_name_token = tend
     return True
예제 #27
0
 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.get_analyzer_data(self)
     t = kit.first_token
     first_pass3419 = True
     while True:
         if first_pass3419: first_pass3419 = False
         else: t = t.next0_
         if (not (t is not None)): break
         tt = t
         i = 0
         tok = UriAnalyzer._m_schemes.try_parse(t, TerminParseAttr.NO)
         if (tok is not None):
             i = (tok.termin.tag)
             tt = tok.end_token
             if (tt.next0_ is not None and tt.next0_.is_char('(')):
                 tok1 = UriAnalyzer._m_schemes.try_parse(
                     tt.next0_.next0_, TerminParseAttr.NO)
                 if ((tok1 is not None and tok1.termin.canonic_text
                      == tok.termin.canonic_text
                      and tok1.end_token.next0_ is not None)
                         and tok1.end_token.next0_.is_char(')')):
                     tt = tok1.end_token.next0_
             if (i == 0):
                 if ((tt.next0_ is None or
                      ((not tt.next0_.is_char_of(":|")
                        and not tt.is_table_control_char))
                      or tt.next0_.is_whitespace_before)
                         or tt.next0_.whitespaces_after_count > 2):
                     continue
                 t1 = tt.next0_.next0_
                 while t1 is not None and t1.is_char_of("/\\"):
                     t1 = t1.next0_
                 if (t1 is None or t1.whitespaces_before_count > 2):
                     continue
                 ut = UriItemToken.attach_uri_content(t1, False)
                 if (ut is None):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2714(
                             tok.termin.canonic_text.lower(), ut.value)),
                     UriReferent)
                 rt = ReferentToken(ad.register_referent(ur), t,
                                    ut.end_token)
                 rt.begin_token = Utils.ifNotNull(
                     UriAnalyzer.__site_before(t.previous), t)
                 if (rt.end_token.next0_ is not None
                         and rt.end_token.next0_.is_char_of("/\\")):
                     rt.end_token = rt.end_token.next0_
                 kit.embed_token(rt)
                 t = (rt)
                 continue
             if (i == 10):
                 tt = tt.next0_
                 if (tt is None or not tt.is_char(':')):
                     continue
                 tt = tt.next0_
                 while tt is not None:
                     if (tt.is_char_of("/\\")):
                         pass
                     else:
                         break
                     tt = tt.next0_
                 if (tt is None):
                     continue
                 if (tt.is_value("WWW", None) and tt.next0_ is not None
                         and tt.next0_.is_char('.')):
                     tt = tt.next0_.next0_
                 if (tt is None or tt.is_newline_before):
                     continue
                 ut = UriItemToken.attach_uri_content(tt, True)
                 if (ut is None):
                     continue
                 if (len(ut.value) < 4):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2714(
                             tok.termin.canonic_text.lower(), ut.value)),
                     UriReferent)
                 rt = ReferentToken(ad.register_referent(ur), t,
                                    ut.end_token)
                 rt.begin_token = Utils.ifNotNull(
                     UriAnalyzer.__site_before(t.previous), t)
                 if (rt.end_token.next0_ is not None
                         and rt.end_token.next0_.is_char_of("/\\")):
                     rt.end_token = rt.end_token.next0_
                 kit.embed_token(rt)
                 t = (rt)
                 continue
             if (i == 2):
                 if (tt.next0_ is None or not tt.next0_.is_char('.')
                         or tt.next0_.is_whitespace_before):
                     continue
                 if (tt.next0_.is_whitespace_after
                         and tok.termin.canonic_text != "WWW"):
                     continue
                 ut = UriItemToken.attach_uri_content(
                     tt.next0_.next0_, True)
                 if (ut is None):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2714("http", ut.value)),
                     UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 rt.begin_token = Utils.ifNotNull(
                     UriAnalyzer.__site_before(t.previous), t)
                 if (rt.end_token.next0_ is not None
                         and rt.end_token.next0_.is_char_of("/\\")):
                     rt.end_token = rt.end_token.next0_
                 kit.embed_token(rt)
                 t = (rt)
                 continue
             if (i == 1):
                 sch = tok.termin.canonic_text
                 ut = None
                 if (sch == "ISBN"):
                     ut = UriItemToken.attachisbn(tt.next0_)
                     if ((ut is None and t.previous is not None
                          and t.previous.is_char('('))
                             and t.next0_ is not None
                             and t.next0_.is_char(')')):
                         tt0 = t.previous.previous
                         while tt0 is not None:
                             if (tt0.whitespaces_after_count > 2):
                                 break
                             if (tt0.is_whitespace_before):
                                 ut = UriItemToken.attachisbn(tt0)
                                 if (ut is not None and
                                         ut.end_token.next0_ != t.previous):
                                     ut = (None)
                                 break
                             tt0 = tt0.previous
                 elif ((sch == "RFC" or sch == "ISO" or sch == "ОКФС")
                       or sch == "ОКОПФ"):
                     ut = UriItemToken.attachisocontent(tt.next0_, ":")
                 elif (sch == "ГОСТ"):
                     ut = UriItemToken.attachisocontent(tt.next0_, "-.")
                 elif (sch == "ТУ"):
                     if (tok.chars.is_all_upper):
                         ut = UriItemToken.attachisocontent(tt.next0_, "-.")
                         if (ut is not None and (ut.length_char < 10)):
                             ut = (None)
                 else:
                     ut = UriItemToken.attachbbk(tt.next0_)
                 if (ut is None):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2717(ut.value, sch)), UriReferent)
                 rt = None
                 if (ut.begin_char < t.begin_char):
                     rt = ReferentToken(ur, ut.begin_token, t)
                     if (t.next0_ is not None and t.next0_.is_char(')')):
                         rt.end_token = t.next0_
                 else:
                     rt = ReferentToken(ur, t, ut.end_token)
                 if (t.previous is not None
                         and t.previous.is_value("КОД", None)):
                     rt.begin_token = t.previous
                 if (ur.scheme.startswith("ОК")):
                     UriAnalyzer.__check_detail(rt)
                 kit.embed_token(rt)
                 t = (rt)
                 if (ur.scheme.startswith("ОК")):
                     while t.next0_ is not None:
                         if (t.next0_.is_comma_and and
                             (isinstance(t.next0_.next0_, NumberToken))):
                             pass
                         else:
                             break
                         ut = UriItemToken.attachbbk(t.next0_.next0_)
                         if (ut is None):
                             break
                         ur = (Utils.asObjectOrNull(
                             ad.register_referent(
                                 UriReferent._new2717(ut.value, sch)),
                             UriReferent))
                         rt = ReferentToken(ur, t.next0_.next0_,
                                            ut.end_token)
                         UriAnalyzer.__check_detail(rt)
                         kit.embed_token(rt)
                         t = (rt)
                 continue
             if (i == 3):
                 t0 = tt.next0_
                 while t0 is not None:
                     if (t0.is_char_of(":|") or t0.is_table_control_char
                             or t0.is_hiphen):
                         t0 = t0.next0_
                     else:
                         break
                 if (t0 is None):
                     continue
                 ut = UriItemToken.attach_skype(t0)
                 if (ut is None):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2717(
                             ut.value.lower(),
                             ("skype" if tok.termin.canonic_text == "SKYPE"
                              else tok.termin.canonic_text))), UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 kit.embed_token(rt)
                 t = (rt)
                 continue
             if (i == 4):
                 t0 = tt.next0_
                 if (t0 is not None
                         and ((t0.is_char(':') or t0.is_hiphen))):
                     t0 = t0.next0_
                 if (t0 is None):
                     continue
                 ut = UriItemToken.attach_icq_content(t0)
                 if (ut is None):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2717(ut.value, "ICQ")),
                     UriReferent)
                 rt = ReferentToken(ur, t, t0)
                 kit.embed_token(rt)
                 t = (rt)
                 continue
             if (i == 5 or i == 6):
                 t0 = tt.next0_
                 has_tab_cel = False
                 is_iban = False
                 first_pass3420 = True
                 while True:
                     if first_pass3420: first_pass3420 = False
                     else: t0 = t0.next0_
                     if (not (t0 is not None)): break
                     if ((((t0.is_value("БАНК", None) or
                            t0.morph.class0_.is_preposition or t0.is_hiphen)
                           or t0.is_char_of(".:") or t0.is_value(
                               "РУБЛЬ", None)) or t0.is_value("РУБ", None)
                          or t0.is_value("ДОЛЛАР", None))
                             or t0.is_value("№", None)
                             or t0.is_value("N", None)):
                         pass
                     elif (t0.is_table_control_char):
                         has_tab_cel = True
                     elif (t0.is_char_of("\\/") and t0.next0_ is not None
                           and t0.next0_.is_value("IBAN", None)):
                         is_iban = True
                         t0 = t0.next0_
                     elif (t0.is_value("IBAN", None)):
                         is_iban = True
                     elif (isinstance(t0, TextToken)):
                         npt = NounPhraseHelper.try_parse(
                             t0, NounPhraseParseAttr.NO, 0, None)
                         if (npt is not None
                                 and npt.morph.case_.is_genitive):
                             t0 = npt.end_token
                             continue
                         break
                     else:
                         break
                 if (t0 is None):
                     continue
                 ur2 = None
                 ur2begin = None
                 ur2end = None
                 t00 = t0
                 val = t0.get_source_text()
                 if (str.isdigit(val[0])
                         and ((((i == 6 or tok.termin.canonic_text == "ИНН"
                                 or tok.termin.canonic_text == "БИК")
                                or tok.termin.canonic_text == "ОГРН"
                                or tok.termin.canonic_text == "СНИЛС")
                               or tok.termin.canonic_text == "ОКПО"))):
                     if (t0.chars.is_letter):
                         continue
                     if (Utils.isNullOrEmpty(val)
                             or not str.isdigit(val[0])):
                         continue
                     if (t0.length_char < 9):
                         tmp = io.StringIO()
                         print(val, end="", file=tmp)
                         ttt = t0.next0_
                         first_pass3421 = True
                         while True:
                             if first_pass3421: first_pass3421 = False
                             else: ttt = ttt.next0_
                             if (not (ttt is not None)): break
                             if (ttt.whitespaces_before_count > 1):
                                 break
                             if (isinstance(ttt, NumberToken)):
                                 print(ttt.get_source_text(),
                                       end="",
                                       file=tmp)
                                 t0 = ttt
                                 continue
                             if (ttt.is_hiphen or ttt.is_char('.')):
                                 if (ttt.next0_ is None or not (isinstance(
                                         ttt.next0_, NumberToken))):
                                     break
                                 if (ttt.is_whitespace_after
                                         or ttt.is_whitespace_before):
                                     break
                                 continue
                             break
                         val = (None)
                         if (tmp.tell() == 20):
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() == 9
                               and tok.termin.canonic_text == "БИК"):
                             val = Utils.toStringStringIO(tmp)
                         elif (((tmp.tell() == 10 or tmp.tell() == 12))
                               and tok.termin.canonic_text == "ИНН"):
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() >= 15
                               and tok.termin.canonic_text == "Л/С"):
                             val = Utils.toStringStringIO(tmp)
                         elif (tmp.tell() >= 11 and
                               ((tok.termin.canonic_text == "ОГРН"
                                 or tok.termin.canonic_text == "СНИЛС"))):
                             val = Utils.toStringStringIO(tmp)
                         elif (tok.termin.canonic_text == "ОКПО"):
                             val = Utils.toStringStringIO(tmp)
                     if (val is None):
                         continue
                 elif (not (isinstance(t0, NumberToken))):
                     if ((isinstance(t0, TextToken)) and is_iban):
                         tmp1 = io.StringIO()
                         t1 = None
                         ttt = t0
                         first_pass3422 = True
                         while True:
                             if first_pass3422: first_pass3422 = False
                             else: ttt = ttt.next0_
                             if (not (ttt is not None)): break
                             if (ttt.is_newline_before and ttt != t0):
                                 break
                             if (ttt.is_hiphen):
                                 continue
                             if (not (isinstance(ttt, NumberToken))):
                                 if (not (isinstance(ttt, TextToken))
                                         or not ttt.chars.is_latin_letter):
                                     break
                             print(ttt.get_source_text(), end="", file=tmp1)
                             t1 = ttt
                             if (tmp1.tell() >= 34):
                                 break
                         if (tmp1.tell() < 10):
                             continue
                         ur1 = UriReferent._new2717(
                             Utils.toStringStringIO(tmp1),
                             tok.termin.canonic_text)
                         ur1.add_slot(UriReferent.ATTR_DETAIL, "IBAN",
                                      False, 0)
                         rt1 = ReferentToken(ad.register_referent(ur1), t,
                                             t1)
                         kit.embed_token(rt1)
                         t = (rt1)
                         continue
                     if (not t0.is_char_of("/\\") or t0.next0_ is None):
                         continue
                     tok2 = UriAnalyzer._m_schemes.try_parse(
                         t0.next0_, TerminParseAttr.NO)
                     if (tok2 is None
                             or not (isinstance(tok2.termin.tag, int))
                             or (tok2.termin.tag) != i):
                         continue
                     t0 = tok2.end_token.next0_
                     while t0 is not None:
                         if (t0.is_char_of(":N№")):
                             t0 = t0.next0_
                         elif (t0.is_table_control_char):
                             t0 = t0.next0_
                             t00 = t0
                             has_tab_cel = True
                         else:
                             break
                     if (not (isinstance(t0, NumberToken))):
                         continue
                     tmp = io.StringIO()
                     while t0 is not None:
                         if (not (isinstance(t0, NumberToken))):
                             break
                         print(t0.get_source_text(), end="", file=tmp)
                         t0 = t0.next0_
                     if (t0 is None or not t0.is_char_of("/\\,")
                             or not (isinstance(t0.next0_, NumberToken))):
                         continue
                     val = Utils.toStringStringIO(tmp)
                     Utils.setLengthStringIO(tmp, 0)
                     ur2begin = t0.next0_
                     t0 = t0.next0_
                     while t0 is not None:
                         if (not (isinstance(t0, NumberToken))):
                             break
                         if (t0.whitespaces_before_count > 4
                                 and tmp.tell() > 0):
                             break
                         print(t0.get_source_text(), end="", file=tmp)
                         ur2end = t0
                         t0 = t0.next0_
                     ur2 = (Utils.asObjectOrNull(
                         ad.register_referent(
                             UriReferent._new2714(
                                 tok2.termin.canonic_text,
                                 Utils.toStringStringIO(tmp))),
                         UriReferent))
                 if (len(val) < 5):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2717(val,
                                              tok.termin.canonic_text)),
                     UriReferent)
                 rt = ReferentToken(
                     ur, t, (t0 if ur2begin is None else ur2begin.previous))
                 if (has_tab_cel):
                     rt.begin_token = t00
                 if (ur.scheme.startswith("ОК")):
                     UriAnalyzer.__check_detail(rt)
                 ttt = t.previous
                 first_pass3423 = True
                 while True:
                     if first_pass3423: first_pass3423 = False
                     else: ttt = ttt.previous
                     if (not (ttt is not None)): break
                     if (ttt.is_table_control_char):
                         break
                     if (ttt.morph.class0_.is_preposition):
                         continue
                     if (ttt.is_value("ОРГАНИЗАЦИЯ", None)):
                         continue
                     if (ttt.is_value("НОМЕР", None)
                             or ttt.is_value("КОД", None)):
                         rt.begin_token = ttt
                         t = rt.begin_token
                     break
                 kit.embed_token(rt)
                 t = (rt)
                 if (ur2 is not None):
                     rt2 = ReferentToken(ur2, ur2begin, ur2end)
                     kit.embed_token(rt2)
                     t = (rt2)
                 while (
                         t.next0_ is not None and t.next0_.is_comma_and and
                     (isinstance(t.next0_.next0_, NumberToken))
                 ) and t.next0_.next0_.length_char == len(
                         val
                 ) and t.next0_.next0_.typ == NumberSpellingType.DIGIT:
                     val2 = t.next0_.next0_.get_source_text()
                     ur2 = UriReferent()
                     ur2.scheme = ur.scheme
                     ur2.value = val2
                     ur2 = (Utils.asObjectOrNull(ad.register_referent(ur2),
                                                 UriReferent))
                     rt2 = ReferentToken(ur2, t.next0_, t.next0_.next0_)
                     kit.embed_token(rt2)
                     t = (rt2)
                 continue
             continue
         if (t.is_char('@')):
             u1s = UriItemToken.attach_mail_users(t.previous)
             if (u1s is None):
                 continue
             u2 = UriItemToken.attach_domain_name(t.next0_, False, True)
             if (u2 is None):
                 continue
             for ii in range(len(u1s) - 1, -1, -1):
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2717(
                             "{0}@{1}".format(u1s[ii].value,
                                              u2.value).lower(), "mailto")),
                     UriReferent)
                 b = u1s[ii].begin_token
                 t0 = b.previous
                 if (t0 is not None and t0.is_char(':')):
                     t0 = t0.previous
                 if (t0 is not None and ii == 0):
                     br = False
                     ttt = t0
                     first_pass3424 = True
                     while True:
                         if first_pass3424: first_pass3424 = False
                         else: ttt = ttt.previous
                         if (not (ttt is not None)): break
                         if (not (isinstance(ttt, TextToken))):
                             break
                         if (ttt != t0 and ttt.whitespaces_after_count > 1):
                             break
                         if (ttt.is_char(')')):
                             br = True
                             continue
                         if (ttt.is_char('(')):
                             if (not br):
                                 break
                             br = False
                             continue
                         if (ttt.is_value("EMAIL", None)
                                 or ttt.is_value("MAILTO", None)):
                             b = ttt
                             break
                         if (ttt.is_value("MAIL", None)):
                             b = ttt
                             if ((ttt.previous is not None
                                  and ttt.previous.is_hiphen
                                  and ttt.previous.previous is not None) and
                                 ((ttt.previous.previous.is_value(
                                     "E", None)
                                   or ttt.previous.previous.is_value(
                                       "Е", None)))):
                                 b = ttt.previous.previous
                             break
                         if (ttt.is_value("ПОЧТА", None)
                                 or ttt.is_value("АДРЕС", None)):
                             b = t0
                             ttt = ttt.previous
                             if (ttt is not None and ttt.is_char('.')):
                                 ttt = ttt.previous
                             if (ttt is not None and
                                 ((t0.is_value("ЭЛ", None)
                                   or ttt.is_value("ЭЛЕКТРОННЫЙ", None)))):
                                 b = ttt
                             if (b.previous is not None and
                                     b.previous.is_value("АДРЕС", None)):
                                 b = b.previous
                             break
                         if (ttt.morph.class0_.is_preposition):
                             continue
                 rt = ReferentToken(
                     ur, b,
                     (u2.end_token if ii == (len(u1s) -
                                             1) else u1s[ii].end_token))
                 kit.embed_token(rt)
                 t = (rt)
             continue
         if (not t.chars.is_cyrillic_letter):
             if (t.is_whitespace_before
                     or ((t.previous is not None
                          and t.previous.is_char_of(",(")))):
                 u1 = UriItemToken.attach_url(t)
                 if (u1 is not None):
                     if (u1.is_whitespace_after
                             or u1.end_token.next0_ is None
                             or not u1.end_token.next0_.is_char('@')):
                         if (u1.end_token.next0_ is not None
                                 and u1.end_token.next0_.is_char_of("\\/")):
                             u2 = UriItemToken.attach_uri_content(t, False)
                             if (u2 is not None):
                                 u1 = u2
                         ur = Utils.asObjectOrNull(
                             ad.register_referent(
                                 UriReferent._new2714("http", u1.value)),
                             UriReferent)
                         rt = ReferentToken(ur, u1.begin_token,
                                            u1.end_token)
                         rt.begin_token = Utils.ifNotNull(
                             UriAnalyzer.__site_before(
                                 u1.begin_token.previous), u1.begin_token)
                         kit.embed_token(rt)
                         t = (rt)
                         continue
         if ((isinstance(t, TextToken)) and not t.is_whitespace_after
                 and t.length_char > 2):
             if (UriAnalyzer.__site_before(t.previous) is not None):
                 ut = UriItemToken.attach_uri_content(t, True)
                 if (ut is None or ut.value.find('.') <= 0
                         or ut.value.find('@') > 0):
                     continue
                 ur = Utils.asObjectOrNull(
                     ad.register_referent(
                         UriReferent._new2714("http", ut.value)),
                     UriReferent)
                 rt = ReferentToken(ur, t, ut.end_token)
                 rt.begin_token = UriAnalyzer.__site_before(t.previous)
                 if (rt.end_token.next0_ is not None
                         and rt.end_token.next0_.is_char_of("/\\")):
                     rt.end_token = rt.end_token.next0_
                 kit.embed_token(rt)
                 t = (rt)
                 continue
         if ((t.chars.is_latin_letter and not t.chars.is_all_lower
              and t.next0_ is not None) and not t.is_whitespace_after):
             if (t.next0_.is_char('/')):
                 rt = UriAnalyzer.__try_attach_lotus(
                     Utils.asObjectOrNull(t, TextToken))
                 if (rt is not None):
                     rt.referent = ad.register_referent(rt.referent)
                     kit.embed_token(rt)
                     t = (rt)
                     continue
예제 #28
0
 def __try_name_exist(li: typing.List['CityItemToken'],
                      oi: 'IntOntologyItem',
                      always: bool) -> 'ReferentToken':
     oi.value = (None)
     if (li is None or li[0].typ != CityItemToken.ItemType.CITY):
         return None
     oi.value = li[0].onto_item
     tt = Utils.asObjectOrNull(li[0].begin_token, TextToken)
     if (tt is None):
         return None
     ok = False
     nam = (li[0].value if oi.value is None else oi.value.canonic_text)
     if (nam is None):
         return None
     if (nam == "РИМ"):
         if (tt.term == "РИМ"):
             if ((isinstance(tt.next0_, TextToken)) and tt.next0_.
                     get_morph_class_in_dictionary().is_proper_secname):
                 pass
             else:
                 ok = True
         elif (tt.previous is not None and tt.previous.is_value("В", None)
               and tt.term == "РИМЕ"):
             ok = True
     elif (oi.value is not None and oi.value.referent is not None
           and oi.value.owner.is_ext_ontology):
         ok = True
     elif (nam.endswith("ГРАД") or nam.endswith("СК")):
         ok = True
     elif (nam.endswith("TOWN") or nam.startswith("SAN")):
         ok = True
     elif (li[0].chars.is_latin_letter
           and li[0].begin_token.previous is not None
           and ((li[0].begin_token.previous.is_value("IN", None)
                 or li[0].begin_token.previous.is_value("FROM", None)))):
         ok = True
     else:
         tt2 = li[0].end_token.next0_
         first_pass3150 = True
         while True:
             if first_pass3150: first_pass3150 = False
             else: tt2 = tt2.next0_
             if (not (tt2 is not None)): break
             if (tt2.is_newline_before):
                 break
             if ((tt2.is_char_of(",(") or tt2.morph.class0_.is_preposition
                  or tt2.morph.class0_.is_conjunction)
                     or tt2.morph.class0_.is_misc):
                 continue
             if ((isinstance(tt2.get_referent(), GeoReferent))
                     and tt2.chars.is_cyrillic_letter
                     == li[0].chars.is_cyrillic_letter):
                 ok = True
             break
         if (not ok):
             tt2 = li[0].begin_token.previous
             first_pass3151 = True
             while True:
                 if first_pass3151: first_pass3151 = False
                 else: tt2 = tt2.previous
                 if (not (tt2 is not None)): break
                 if (tt2.is_newline_after):
                     break
                 if ((tt2.is_char_of(",)")
                      or tt2.morph.class0_.is_preposition
                      or tt2.morph.class0_.is_conjunction)
                         or tt2.morph.class0_.is_misc):
                     continue
                 if ((isinstance(tt2.get_referent(), GeoReferent))
                         and tt2.chars.is_cyrillic_letter
                         == li[0].chars.is_cyrillic_letter):
                     ok = True
                 if (ok):
                     sits = StreetItemToken.try_parse_list(
                         li[0].begin_token, None, 10)
                     if (sits is not None and len(sits) > 1):
                         ss = StreetDefineHelper._try_parse_street(
                             sits, False, False)
                         if (ss is not None):
                             del sits[0]
                             if (StreetDefineHelper._try_parse_street(
                                     sits, False, False) is None):
                                 ok = False
                 if (ok):
                     if (len(li) > 1 and li[1].typ
                             == CityItemToken.ItemType.PROPERNAME
                             and (li[1].whitespaces_before_count < 3)):
                         ok = False
                     else:
                         mc = li[
                             0].begin_token.get_morph_class_in_dictionary()
                         if (mc.is_proper_name or mc.is_proper_surname
                                 or mc.is_adjective):
                             ok = False
                         else:
                             npt = NounPhraseHelper.try_parse(
                                 li[0].begin_token, NounPhraseParseAttr.NO,
                                 0, None)
                             if (npt is not None
                                     and npt.end_char > li[0].end_char):
                                 ok = False
                 if (AddressItemToken.try_attach_org(li[0].begin_token)
                         is not None):
                     ok = False
                     break
                 break
     if (always):
         if (li[0].whitespaces_before_count > 3 and li[0].doubtful
                 and li[0].begin_token.get_morph_class_in_dictionary(
                 ).is_proper_surname):
             pp = li[0].kit.process_referent("PERSON", li[0].begin_token)
             if (pp is not None):
                 always = False
     if (li[0].begin_token.chars.is_latin_letter
             and li[0].begin_token == li[0].end_token):
         tt1 = li[0].end_token.next0_
         if (tt1 is not None and tt1.is_char(',')):
             tt1 = tt1.next0_
         if (((isinstance(tt1, TextToken)) and tt1.chars.is_latin_letter and
              (tt1.length_char < 3)) and not tt1.chars.is_all_lower):
             ok = False
     if (not ok and not always):
         return None
     city = None
     if (oi.value is not None
             and (isinstance(oi.value.referent, GeoReferent))
             and not oi.value.owner.is_ext_ontology):
         city = (Utils.asObjectOrNull(oi.value.referent.clone(),
                                      GeoReferent))
         city.occurrence.clear()
     else:
         city = GeoReferent()
         city._add_name(nam)
         if (oi.value is not None
                 and (isinstance(oi.value.referent, GeoReferent))):
             city._merge_slots2(
                 Utils.asObjectOrNull(oi.value.referent, GeoReferent),
                 li[0].kit.base_language)
         if (not city.is_city):
             city._add_typ_city(li[0].kit.base_language)
     return ReferentToken._new734(city, li[0].begin_token, li[0].end_token,
                                  li[0].morph)
예제 #29
0
 def try_attach(t: 'Token') -> 'ParenthesisToken':
     if (t is None):
         return None
     tok = ParenthesisToken.__m_termins.try_parse(t, TerminParseAttr.NO)
     if (tok is not None):
         res = ParenthesisToken(t, tok.end_token)
         return res
     if (not (isinstance(t, TextToken))):
         return None
     mc = t.get_morph_class_in_dictionary()
     ok = False
     t1 = None
     if (mc.is_adverb):
         ok = True
     elif (mc.is_adjective):
         if (t.morph.contains_attr("сравн.", None)
                 and t.morph.contains_attr("кач.прил.", None)):
             ok = True
     if (ok and t.next0_ is not None):
         if (t.next0_.is_char(',')):
             return ParenthesisToken(t, t)
         t1 = t.next0_
         if (t1.get_morph_class_in_dictionary() == MorphClass.VERB):
             if (t1.morph.contains_attr("н.вр.", None)
                     and t1.morph.contains_attr("нес.в.", None)
                     and t1.morph.contains_attr("дейст.з.", None)):
                 return ParenthesisToken(t, t1)
     t1 = (None)
     if ((t.is_value("В", None) and t.next0_ is not None
          and t.next0_.is_value("СООТВЕТСТВИЕ", None))
             and t.next0_.next0_ is not None
             and t.next0_.next0_.morph.class0_.is_preposition):
         t1 = t.next0_.next0_.next0_
     elif (t.is_value("СОГЛАСНО", None)):
         t1 = t.next0_
     elif (t.is_value("В", None) and t.next0_ is not None):
         if (t.next0_.is_value("СИЛА", None)):
             t1 = t.next0_.next0_
         elif (t.next0_.morph.class0_.is_adjective
               or t.next0_.morph.class0_.is_pronoun):
             npt = NounPhraseHelper.try_parse(t.next0_,
                                              NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None):
                 if (npt.noun.is_value("ВИД", None)
                         or npt.noun.is_value("СЛУЧАЙ", None)
                         or npt.noun.is_value("СФЕРА", None)):
                     return ParenthesisToken(t, npt.end_token)
     if (t1 is not None):
         if (t1.next0_ is not None):
             npt1 = NounPhraseHelper.try_parse(t1, NounPhraseParseAttr.NO,
                                               0, None)
             if (npt1 is not None):
                 if (npt1.noun.is_value("НОРМА", None)
                         or npt1.noun.is_value("ПОЛОЖЕНИЕ", None)
                         or npt1.noun.is_value("УКАЗАНИЕ", None)):
                     t1 = npt1.end_token.next0_
         r = t1.get_referent()
         if (r is not None):
             res = ParenthesisToken._new1115(t, t1, r)
             if (t1.next0_ is not None and t1.next0_.is_comma):
                 sila = False
                 ttt = t1.next0_.next0_
                 first_pass3133 = True
                 while True:
                     if first_pass3133: first_pass3133 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_value("СИЛА", None)
                             or ttt.is_value("ДЕЙСТВИЕ", None)):
                         sila = True
                         continue
                     if (ttt.is_comma):
                         if (sila):
                             res.end_token = ttt.previous
                         break
                     if (BracketHelper.can_be_start_of_sequence(
                             ttt, False, False)):
                         break
             return res
         npt = NounPhraseHelper.try_parse(t1, NounPhraseParseAttr.NO, 0,
                                          None)
         if (npt is not None):
             return ParenthesisToken(t, npt.end_token)
     tt = t
     if (tt.is_value("НЕ", None) and t is not None):
         tt = tt.next0_
     if (tt.morph.class0_.is_preposition and tt is not None):
         tt = tt.next0_
         npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0,
                                           None)
         if (npt1 is not None):
             tt = npt1.end_token
             if (tt.next0_ is not None and tt.next0_.is_comma):
                 return ParenthesisToken(t, tt.next0_)
             if (npt1.noun.is_value("ОЧЕРЕДЬ", None)):
                 return ParenthesisToken(t, tt)
     if (t.is_value("ВЕДЬ", None)):
         return ParenthesisToken(t, t)
     return None
예제 #30
0
 def try_attach(t: 'Token',
                must_has_prefix: bool = False) -> 'OrgItemEponymToken':
     from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is None):
         if (t is None):
             return None
         r1 = t.get_referent()
         if (r1 is not None and r1.type_name == "DATE"):
             str0_ = str(r1).upper()
             if ((str0_ == "1 МАЯ" or str0_ == "7 ОКТЯБРЯ"
                  or str0_ == "9 МАЯ") or str0_ == "8 МАРТА"):
                 dt = OrgItemEponymToken._new1797(t, t, list())
                 dt.eponyms.append(str0_)
                 return dt
         age = NumberHelper.try_parse_age(t)
         if ((age is not None and
              (((isinstance(age.end_token.next0_, TextToken)) or
                (isinstance(age.end_token.next0_, ReferentToken)))) and
              (age.whitespaces_after_count < 3))
                 and not age.end_token.next0_.chars.is_all_lower
                 and age.end_token.next0_.chars.is_cyrillic_letter):
             dt = OrgItemEponymToken._new1797(t, age.end_token.next0_,
                                              list())
             dt.eponyms.append("{0} {1}".format(
                 age.value,
                 dt.end_token.get_source_text().upper()))
             return dt
         return None
     t1 = None
     full = False
     has_name = False
     if (tt.term == "ИМЕНИ" or tt.term == "ІМЕНІ"):
         t1 = t.next0_
         full = True
         has_name = True
     elif (((tt.term == "ИМ" or tt.term == "ІМ"))
           and tt.next0_ is not None):
         if (tt.next0_.is_char('.')):
             t1 = tt.next0_.next0_
             full = True
         elif ((isinstance(tt.next0_, TextToken)) and tt.chars.is_all_lower
               and not tt.next0_.chars.is_all_lower):
             t1 = tt.next0_
         has_name = True
     elif (tt.previous is not None
           and ((tt.previous.is_value("ФОНД", None)
                 or tt.previous.is_value("ХРАМ", None)
                 or tt.previous.is_value("ЦЕРКОВЬ", "ЦЕРКВА")))):
         if ((not tt.chars.is_cyrillic_letter
              or tt.morph.class0_.is_preposition
              or tt.morph.class0_.is_conjunction)
                 or not tt.chars.is_letter):
             return None
         if (tt.whitespaces_before_count != 1):
             return None
         if (tt.chars.is_all_lower):
             return None
         if (tt.morph.class0_.is_adjective):
             npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None and npt.begin_token != npt.end_token):
                 return None
         na = OrgItemNameToken.try_attach(tt, None, False, True)
         if (na is not None):
             if (na.is_empty_word or na.is_std_name or na.is_std_tail):
                 return None
         t1 = (tt)
     if (t1 is None or ((t1.is_newline_before and not full))):
         return None
     if (tt.previous is not None
             and tt.previous.morph.class0_.is_preposition):
         return None
     if (must_has_prefix and not has_name):
         return None
     r = t1.get_referent()
     if ((r is not None and r.type_name == "DATE" and full)
             and r.find_slot("DAY", None, True) is not None
             and r.find_slot("YEAR", None, True) is None):
         dt = OrgItemEponymToken._new1797(t, t1, list())
         dt.eponyms.append(str(r).upper())
         return dt
     holy = False
     if ((t1.is_value("СВЯТОЙ", None) or t1.is_value("СВЯТИЙ", None)
          or t1.is_value("СВ", None)) or t1.is_value("СВЯТ", None)):
         t1 = t1.next0_
         holy = True
         if (t1 is not None and t1.is_char('.')):
             t1 = t1.next0_
     if (t1 is None):
         return None
     cl = t1.get_morph_class_in_dictionary()
     if (cl.is_noun or cl.is_adjective):
         rt = t1.kit.process_referent("PERSON", t1)
         if (rt is not None and rt.referent.type_name == "PERSON"
                 and rt.begin_token != rt.end_token):
             e0_ = rt.referent.get_string_value("LASTNAME")
             if (e0_ is not None):
                 if (rt.end_token.is_value(e0_, None)):
                     re = OrgItemEponymToken(t, rt.end_token)
                     re.eponyms.append(rt.end_token.get_source_text())
                     return re
     nt = NumberHelper.try_parse_anniversary(t1)
     if (nt is not None and nt.typ == NumberSpellingType.AGE):
         npt = NounPhraseHelper.try_parse(nt.end_token.next0_,
                                          NounPhraseParseAttr.NO, 0, None)
         if (npt is not None):
             s = "{0}-{1} {2}".format(
                 nt.value,
                 ("РОКІВ" if t.kit.base_language.is_ua else "ЛЕТ"),
                 MiscHelper.get_text_value(npt.begin_token, npt.end_token,
                                           GetTextAttr.NO))
             res = OrgItemEponymToken(t, npt.end_token)
             res.eponyms.append(s)
             return res
     its = OrgItemEponymToken.PersonItemToken.try_attach(t1)
     if (its is None):
         if ((isinstance(t1, ReferentToken))
                 and (isinstance(t1.get_referent(), GeoReferent))):
             s = MiscHelper.get_text_value(t1, t1, GetTextAttr.NO)
             re = OrgItemEponymToken(t, t1)
             re.eponyms.append(s)
             return re
         return None
     eponims = list()
     i = 0
     j = 0
     if (its[i].typ == OrgItemEponymToken.PersonItemType.LOCASEWORD):
         i += 1
     if (i >= len(its)):
         return None
     if (not full):
         if (its[i].begin_token.morph.class0_.is_adjective and
                 not its[i].begin_token.morph.class0_.is_proper_surname):
             return None
     if (its[i].typ == OrgItemEponymToken.PersonItemType.INITIAL):
         i += 1
         while True:
             if ((i < len(its)) and its[i].typ
                     == OrgItemEponymToken.PersonItemType.INITIAL):
                 i += 1
             if (i >= len(its) or
                 ((its[i].typ != OrgItemEponymToken.PersonItemType.SURNAME
                   and
                   its[i].typ != OrgItemEponymToken.PersonItemType.NAME))):
                 break
             eponims.append(its[i].value)
             t1 = its[i].end_token
             if ((i + 2) >= len(its) or
                     its[i + 1].typ != OrgItemEponymToken.PersonItemType.AND
                     or its[i + 2].typ !=
                     OrgItemEponymToken.PersonItemType.INITIAL):
                 break
             i += 3
     elif (((i + 1) < len(its))
           and its[i].typ == OrgItemEponymToken.PersonItemType.NAME
           and its[i + 1].typ == OrgItemEponymToken.PersonItemType.SURNAME):
         eponims.append(its[i + 1].value)
         t1 = its[i + 1].end_token
         i += 2
         if ((((i + 2) < len(its))
              and its[i].typ == OrgItemEponymToken.PersonItemType.AND
              and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME)
                 and its[i + 2].typ
                 == OrgItemEponymToken.PersonItemType.SURNAME):
             eponims.append(its[i + 2].value)
             t1 = its[i + 2].end_token
     elif (its[i].typ == OrgItemEponymToken.PersonItemType.SURNAME):
         if (len(its) == (i + 2) and its[i].chars == its[i + 1].chars):
             its[i].value += (" " + its[i + 1].value)
             its[i].end_token = its[i + 1].end_token
             del its[i + 1]
         eponims.append(its[i].value)
         if (((i + 1) < len(its)) and its[i + 1].typ
                 == OrgItemEponymToken.PersonItemType.NAME):
             if ((i + 2) == len(its)):
                 i += 1
             elif (its[i + 2].typ !=
                   OrgItemEponymToken.PersonItemType.SURNAME):
                 i += 1
         elif (((i + 1) < len(its)) and its[i + 1].typ
               == OrgItemEponymToken.PersonItemType.INITIAL):
             if ((i + 2) == len(its)):
                 i += 1
             elif (its[i + 2].typ
                   == OrgItemEponymToken.PersonItemType.INITIAL
                   and (i + 3) == len(its)):
                 i += 2
         elif (((i + 2) < len(its))
               and its[i + 1].typ == OrgItemEponymToken.PersonItemType.AND
               and its[i + 2].typ
               == OrgItemEponymToken.PersonItemType.SURNAME):
             ok = True
             npt = NounPhraseHelper.try_parse(its[i + 2].begin_token,
                                              NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None and not npt.morph.case_.is_genitive
                     and not npt.morph.case_.is_undefined):
                 ok = False
             if (ok):
                 eponims.append(its[i + 2].value)
                 i += 2
         t1 = its[i].end_token
     elif (its[i].typ == OrgItemEponymToken.PersonItemType.NAME and holy):
         t1 = its[i].end_token
         sec = False
         if (((i + 1) < len(its)) and its[i].chars == its[i + 1].chars
                 and its[i + 1].typ !=
                 OrgItemEponymToken.PersonItemType.INITIAL):
             sec = True
             t1 = its[i + 1].end_token
         if (sec):
             eponims.append("СВЯТ.{0} {1}".format(its[i].value,
                                                  its[i + 1].value))
         else:
             eponims.append("СВЯТ.{0}".format(its[i].value))
     elif (full and (i + 1) == len(its) and
           ((its[i].typ == OrgItemEponymToken.PersonItemType.NAME
             or its[i].typ == OrgItemEponymToken.PersonItemType.SURNAME))):
         t1 = its[i].end_token
         eponims.append(its[i].value)
     elif ((its[i].typ == OrgItemEponymToken.PersonItemType.NAME
            and len(its) == 3
            and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME)
           and its[i + 2].typ == OrgItemEponymToken.PersonItemType.SURNAME):
         t1 = its[i + 2].end_token
         eponims.append("{0} {1} {2}".format(its[i].value, its[i + 1].value,
                                             its[i + 2].value))
         i += 2
     if (len(eponims) == 0):
         return None
     return OrgItemEponymToken._new1797(t, t1, eponims)