Ejemplo n.º 1
0
 def __can_be_geo_after(tt: 'Token') -> bool:
     while tt is not None and ((tt.is_comma
                                or BracketHelper.is_bracket(tt, True))):
         tt = tt.next0_
     if (tt is None):
         return False
     if (isinstance(tt.get_referent(), GeoReferent)):
         return True
     tli = TerrItemToken.try_parse_list(tt, None, 2)
     if (tli is not None and len(tli) > 1):
         if (tli[0].termin_item is None and tli[1].termin_item is not None):
             return True
         elif (tli[0].termin_item is not None
               and tli[1].termin_item is None):
             return True
     if (CityAttachHelper.check_city_after(tt)):
         return True
     if (TerrAttachHelper.try_attach_stateusaterritory(tt) is not None):
         return True
     return False
Ejemplo n.º 2
0
 def __try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken':
     if (t is None): 
         return None
     if (BracketHelper.is_bracket(t, True)): 
         wit = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, attach_high)
         if (wit is not None): 
             if (wit.end_token.next0_ is None): 
                 wit.begin_token = t
                 return wit
             if (BracketHelper.is_bracket(wit.end_token.next0_, True)): 
                 wit.begin_token = t
                 wit.end_token = wit.end_token.next0_
                 return wit
     tok = WeaponItemToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
     if (tok is not None): 
         res = WeaponItemToken(t, tok.end_token)
         res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs))
         if (res.typ == WeaponItemToken.Typs.NOUN): 
             res.value = tok.termin.canonic_text
             if (tok.termin.tag2 is not None): 
                 res.is_doubt = True
             tt = res.end_token.next0_
             first_pass3426 = True
             while True:
                 if first_pass3426: first_pass3426 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.whitespaces_before_count > 2): 
                     break
                 wit = WeaponItemToken.__try_parse(tt, None, False, False)
                 if (wit is not None): 
                     if (wit.typ == WeaponItemToken.Typs.BRAND): 
                         res.__inner_tokens.append(wit)
                         tt = wit.end_token
                         res.end_token = tt
                         continue
                     break
                 if (not (isinstance(tt, TextToken))): 
                     break
                 mc = tt.get_morph_class_in_dictionary()
                 if (mc == MorphClass.ADJECTIVE): 
                     if (res.alt_value is None): 
                         res.alt_value = res.value
                     if (res.alt_value.endswith(res.value)): 
                         res.alt_value = res.alt_value[0:0+len(res.alt_value) - len(res.value)]
                     res.alt_value = "{0}{1} {2}".format(res.alt_value, tt.term, res.value)
                     res.end_token = tt
                     continue
                 break
             return res
         if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): 
             res.value = tok.termin.canonic_text
             return res
         if (res.typ == WeaponItemToken.Typs.MODEL): 
             res.value = tok.termin.canonic_text
             if (isinstance(tok.termin.tag2, list)): 
                 li = Utils.asObjectOrNull(tok.termin.tag2, list)
                 for to in li: 
                     wit = WeaponItemToken._new2758(t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token)
                     res.__inner_tokens.append(wit)
                     if (to.additional_vars is not None and len(to.additional_vars) > 0): 
                         wit.alt_value = to.additional_vars[0].canonic_text
             res.__correct_model()
             return res
     nnn = MiscHelper.check_number_prefix(t)
     if (nnn is not None): 
         tit = TransItemToken._attach_number(nnn, True)
         if (tit is not None): 
             res = WeaponItemToken._new2759(t, tit.end_token, WeaponItemToken.Typs.NUMBER)
             res.value = tit.value
             res.alt_value = tit.alt_value
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): 
         if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.is_char('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): 
             res = WeaponItemToken._new2760(t, t.next0_, WeaponItemToken.Typs.MODEL, True)
             res.value = t.term
             res.__correct_model()
             return res
         if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): 
             res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.MODEL, True)
             res.value = t.term
             res.__correct_model()
             return res
         if (t.term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): 
             pp = WeaponItemToken.__try_parse(t.next0_, None, False, False)
             if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): 
                 res = WeaponItemToken._new2759(t, t, WeaponItemToken.Typs.NOUN)
                 res.value = "ПИСТОЛЕТ"
                 res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"
                 return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): 
         ok = False
         if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): 
             ok = True
         elif (prev is None and t.previous is not None and t.previous.is_comma_and): 
             ok = True
         if (ok): 
             res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.NAME, True)
             res.value = t.term
             if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): 
                 res.value = "{0}-{1}".format(res.value, t.next0_.next0_.term)
                 res.end_token = t.next0_.next0_
             if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): 
                 res.typ = WeaponItemToken.Typs.BRAND
             if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): 
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correct_model()
             elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): 
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correct_model()
             return res
     if (t.is_value("МАРКА", None)): 
         res = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, False)
         if (res is not None and res.typ == WeaponItemToken.Typs.BRAND): 
             res.begin_token = t
             return res
         if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): 
             br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100)
             if (br is not None): 
                 return WeaponItemToken._new2764(t, br.end_token, WeaponItemToken.Typs.BRAND, MiscHelper.get_text_value(br.begin_token, br.end_token, GetTextAttr.NO))
         if (((isinstance(t, TextToken)) and (isinstance(t.next0_, TextToken)) and t.next0_.length_char > 1) and not t.next0_.chars.is_all_lower): 
             return WeaponItemToken._new2764(t, t.next0_, WeaponItemToken.Typs.BRAND, t.term)
     if (t.is_value("КАЛИБР", "КАЛІБР")): 
         tt1 = t.next0_
         if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): 
             tt1 = tt1.next0_
         num = NumbersWithUnitToken.try_parse(tt1, None, False, False, False, False)
         if (num is not None and num.single_val is not None): 
             return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
     if (isinstance(t, NumberToken)): 
         num = NumbersWithUnitToken.try_parse(t, None, False, False, False, False)
         if (num is not None and num.single_val is not None): 
             if (len(num.units) == 1 and num.units[0].unit is not None and num.units[0].unit.name_cyr == "мм"): 
                 return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
             if (num.end_token.next0_ is not None and num.end_token.next0_.is_value("КАЛИБР", "КАЛІБР")): 
                 return WeaponItemToken._new2764(t, num.end_token.next0_, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
     if (t.is_value("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")): 
         tt1 = t.next0_
         if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): 
             tt1 = tt1.next0_
         if (isinstance(tt1, ReferentToken)): 
             if ((isinstance(tt1.get_referent(), OrganizationReferent)) or (isinstance(tt1.get_referent(), GeoReferent))): 
                 return WeaponItemToken._new2769(t, tt1, WeaponItemToken.Typs.DEVELOPER, tt1.get_referent())
     return None
Ejemplo n.º 3
0
 def try_attach(t : 'Token', p1 : 'InstrumentParticipantReferent'=None, p2 : 'InstrumentParticipantReferent'=None, is_contract : bool=False) -> 'ParticipantToken':
     if (t is None): 
         return None
     tt = t
     br = False
     if (p1 is None and p2 is None and is_contract): 
         r1 = t.get_referent()
         if ((r1 is not None and t.next0_ is not None and t.next0_.is_comma_and) and (isinstance(t.next0_.next0_, ReferentToken))): 
             r2 = t.next0_.next0_.get_referent()
             if (r1.type_name == r2.type_name): 
                 ttt = t.next0_.next0_.next0_
                 refs = list()
                 refs.append(r1)
                 refs.append(r2)
                 first_pass3282 = True
                 while True:
                     if first_pass3282: first_pass3282 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if ((ttt.is_comma_and and ttt.next0_ is not None and ttt.next0_.get_referent() is not None) and ttt.next0_.get_referent().type_name == r1.type_name): 
                         ttt = ttt.next0_
                         if (not ttt.get_referent() in refs): 
                             refs.append(ttt.get_referent())
                         continue
                     break
                 first_pass3283 = True
                 while True:
                     if first_pass3283: first_pass3283 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_comma or ttt.morph.class0_.is_preposition): 
                         continue
                     if ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                         continue
                     if (ttt.is_value("ДОГОВАРИВАТЬСЯ", None)): 
                         continue
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None and npt.noun.is_value("СТОРОНА", None) and npt.morph.number != MorphNumber.SINGULAR): 
                         re = ParticipantToken._new1573(t, npt.end_token, ParticipantToken.Kinds.NAMEDASPARTS)
                         re.parts = refs
                         return re
                     break
         if ((isinstance(r1, OrganizationReferent)) or (isinstance(r1, PersonReferent))): 
             has_br = False
             has_named = False
             if (isinstance(r1, PersonReferent)): 
                 if (t.previous is not None and t.previous.is_value("ЛИЦО", None)): 
                     return None
             elif (t.previous is not None and ((t.previous.is_value("ВЫДАВАТЬ", None) or t.previous.is_value("ВЫДАТЬ", None)))): 
                 return None
             ttt = t.begin_token
             while ttt is not None and (ttt.end_char < t.end_char): 
                 if (ttt.is_char('(')): 
                     has_br = True
                 elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): 
                     pass
                 elif (isinstance(ttt, ReferentToken)): 
                     pass
                 elif (has_br or has_named): 
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
                     if (npt is None): 
                         break
                     if (has_br): 
                         if (npt.end_token.next0_ is None or not npt.end_token.next0_.is_char(')')): 
                             break
                     if (not has_named): 
                         if (ParticipantToken.M_ONTOLOGY.try_parse(ttt, TerminParseAttr.NO) is None): 
                             break
                     re = ParticipantToken._new1573(t, t, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 ttt = ttt.next0_
             has_br = False
             has_named = False
             end_side = None
             brr = None
             add_refs = None
             ttt = t.next0_
             first_pass3284 = True
             while True:
                 if first_pass3284: first_pass3284 = False
                 else: ttt = ttt.next0_
                 if (not (ttt is not None)): break
                 if ((isinstance(ttt, NumberToken)) and (isinstance(ttt.next0_, TextToken)) and ttt.next0_.term == "СТОРОНЫ"): 
                     ttt = ttt.next0_
                     end_side = ttt
                     if (ttt.next0_ is not None and ttt.next0_.is_comma): 
                         ttt = ttt.next0_
                     if (ttt.next0_ is not None and ttt.next0_.is_and): 
                         break
                 if (brr is not None and ttt.begin_char > brr.end_char): 
                     brr = (None)
                 if (BracketHelper.can_be_start_of_sequence(ttt, False, False)): 
                     brr = BracketHelper.try_parse(ttt, BracketParseAttr.NO, 100)
                     if (brr is not None and (brr.length_char < 7) and ttt.is_char('(')): 
                         ttt = brr.end_token
                         brr = (None)
                         continue
                 elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): 
                     pass
                 elif (brr is not None or has_named): 
                     if (BracketHelper.can_be_start_of_sequence(ttt, True, False)): 
                         ttt = ttt.next0_
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
                     typ22 = None
                     if (npt is not None): 
                         ttt = npt.end_token
                         if (npt.end_token.is_value("ДОГОВОР", None)): 
                             continue
                     else: 
                         ttok = None
                         if (isinstance(ttt, MetaToken)): 
                             ttok = ParticipantToken.M_ONTOLOGY.try_parse(ttt.begin_token, TerminParseAttr.NO)
                         if (ttok is not None): 
                             typ22 = ttok.termin.canonic_text
                         elif (has_named and ttt.morph.class0_.is_adjective): 
                             typ22 = ttt.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
                         elif (brr is not None): 
                             continue
                         else: 
                             break
                     if (BracketHelper.can_be_end_of_sequence(ttt.next0_, True, None, False)): 
                         ttt = ttt.next0_
                     if (brr is not None): 
                         if (ttt.next0_ is None): 
                             ttt = brr.end_token
                             continue
                         ttt = ttt.next0_
                     if (not has_named and typ22 is None): 
                         if (ParticipantToken.M_ONTOLOGY.try_parse(npt.begin_token, TerminParseAttr.NO) is None): 
                             break
                     re = ParticipantToken._new1573(t, ttt, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = (Utils.ifNotNull(typ22, npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)))
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 elif ((ttt.is_value("ЗАРЕГИСТРИРОВАННЫЙ", None) or ttt.is_value("КАЧЕСТВО", None) or ttt.is_value("ПРОЖИВАЮЩИЙ", None)) or ttt.is_value("ЗАРЕГ", None)): 
                     pass
                 elif (ttt.get_referent() == r1): 
                     pass
                 elif ((isinstance(ttt.get_referent(), PersonIdentityReferent)) or (isinstance(ttt.get_referent(), AddressReferent))): 
                     if (add_refs is None): 
                         add_refs = list()
                     add_refs.append(ttt.get_referent())
                 else: 
                     prr = ttt.kit.process_referent("PERSONPROPERTY", ttt)
                     if (prr is not None): 
                         ttt = prr.end_token
                         continue
                     if (isinstance(ttt.get_referent(), GeoReferent)): 
                         continue
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None): 
                         if ((npt.noun.is_value("МЕСТО", None) or npt.noun.is_value("ЖИТЕЛЬСТВО", None) or npt.noun.is_value("ПРЕДПРИНИМАТЕЛЬ", None)) or npt.noun.is_value("ПОЛ", None) or npt.noun.is_value("РОЖДЕНИЕ", None)): 
                             ttt = npt.end_token
                             continue
                     if (ttt.is_newline_before): 
                         break
                     if (ttt.length_char < 3): 
                         continue
                     mc = ttt.get_morph_class_in_dictionary()
                     if (mc.is_adverb or mc.is_adjective): 
                         continue
                     if (ttt.chars.is_all_upper): 
                         continue
                     break
             if (end_side is not None or ((add_refs is not None and t.previous is not None and t.previous.is_and))): 
                 re = ParticipantToken._new1573(t, Utils.ifNotNull(end_side, t), ParticipantToken.Kinds.NAMEDAS)
                 re.typ = (None)
                 re.parts = list()
                 re.parts.append(r1)
                 if (add_refs is not None): 
                     re.parts.extend(add_refs)
                 return re
         too = ParticipantToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
         if (too is not None): 
             if ((isinstance(t.previous, TextToken)) and t.previous.is_value("ЛИЦО", None)): 
                 too = (None)
         if (too is not None and too.termin.tag is not None and too.termin.canonic_text != "СТОРОНА"): 
             tt1 = too.end_token.next0_
             if (tt1 is not None): 
                 if (tt1.is_hiphen or tt1.is_char(':')): 
                     tt1 = tt1.next0_
             if (isinstance(tt1, ReferentToken)): 
                 r1 = tt1.get_referent()
                 if ((isinstance(r1, PersonReferent)) or (isinstance(r1, OrganizationReferent))): 
                     re = ParticipantToken._new1573(t, tt1, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = too.termin.canonic_text
                     re.parts = list()
                     re.parts.append(r1)
                     return re
     add_typ1 = (None if p1 is None else p1.typ)
     add_typ2 = (None if p2 is None else p2.typ)
     if (BracketHelper.can_be_start_of_sequence(tt, False, False) and tt.next0_ is not None): 
         br = True
         tt = tt.next0_
     term1 = None
     term2 = None
     if (add_typ1 is not None and add_typ1.find(' ') > 0 and not add_typ1.startswith("СТОРОНА")): 
         term1 = Termin(add_typ1)
     if (add_typ2 is not None and add_typ2.find(' ') > 0 and not add_typ2.startswith("СТОРОНА")): 
         term2 = Termin(add_typ2)
     named = False
     typ_ = None
     t1 = None
     t0 = tt
     first_pass3285 = True
     while True:
         if first_pass3285: first_pass3285 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition and typ_ is not None): 
             continue
         if (tt.is_char_of("(:)") or tt.is_hiphen): 
             continue
         if (tt.is_table_control_char): 
             break
         if (tt.is_newline_before and tt != t0): 
             if (isinstance(tt, NumberToken)): 
                 break
             if ((isinstance(tt, TextToken)) and (isinstance(tt.previous, TextToken))): 
                 if (tt.previous.is_value(tt.term, None)): 
                     break
         if (BracketHelper.is_bracket(tt, False)): 
             continue
         tok = (ParticipantToken.M_ONTOLOGY.try_parse(tt, TerminParseAttr.NO) if ParticipantToken.M_ONTOLOGY is not None else None)
         if (tok is not None and (isinstance(tt.previous, TextToken))): 
             if (tt.previous.is_value("ЛИЦО", None)): 
                 return None
         if (tok is None): 
             if (add_typ1 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ1, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ1, None)))))): 
                 if (typ_ is not None): 
                     if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): 
                         break
                 typ_ = add_typ1
                 t1 = tt
                 continue
             if (add_typ2 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ2, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ2, None)))))): 
                 if (typ_ is not None): 
                     if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): 
                         break
                 typ_ = add_typ2
                 t1 = tt
                 continue
             if (tt.chars.is_letter): 
                 if (term1 is not None): 
                     tok1 = term1.try_parse(tt, TerminParseAttr.NO)
                     if (tok1 is not None): 
                         if (typ_ is not None): 
                             if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): 
                                 break
                         typ_ = add_typ1
                         tt = tok1.end_token
                         t1 = tt
                         continue
                 if (term2 is not None): 
                     tok2 = term2.try_parse(tt, TerminParseAttr.NO)
                     if (tok2 is not None): 
                         if (typ_ is not None): 
                             if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): 
                                 break
                         typ_ = add_typ2
                         tt = tok2.end_token
                         t1 = tt
                         continue
                 if (named and tt.get_morph_class_in_dictionary().is_noun): 
                     if (not tt.chars.is_all_lower or BracketHelper.is_bracket(tt.previous, True)): 
                         if (DecreeToken.is_keyword(tt, False) is None): 
                             val = tt.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                             if (typ_ is not None): 
                                 if (not ParticipantToken.__is_types_equal(typ_, val)): 
                                     break
                             typ_ = val
                             t1 = tt
                             continue
             if (named and typ_ is None and is_contract): 
                 if ((isinstance(tt, TextToken)) and tt.chars.is_cyrillic_letter and tt.chars.is_capital_upper): 
                     dc = tt.get_morph_class_in_dictionary()
                     if (dc.is_undefined or dc.is_noun): 
                         dt = DecreeToken.try_attach(tt, None, False)
                         ok = True
                         if (dt is not None): 
                             ok = False
                         elif (tt.is_value("СТОРОНА", None)): 
                             ok = False
                         if (ok): 
                             typ_ = tt.lemma
                             t1 = tt
                             continue
                     if (dc.is_adjective): 
                         npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is not None and len(npt.adjectives) > 0 and npt.noun.get_morph_class_in_dictionary().is_noun): 
                             typ_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                             t1 = npt.end_token
                             continue
             if (tt == t): 
                 break
             if ((isinstance(tt, NumberToken)) or tt.is_char('.')): 
                 break
             if (tt.length_char < 4): 
                 if (typ_ is not None): 
                     continue
             break
         if (tok.termin.tag is None): 
             named = True
         else: 
             if (typ_ is not None): 
                 break
             if (tok.termin.canonic_text == "СТОРОНА"): 
                 tt1 = tt.next0_
                 if (tt1 is not None and tt1.is_hiphen): 
                     tt1 = tt1.next0_
                 if (not (isinstance(tt1, NumberToken))): 
                     break
                 if (tt1.is_newline_before): 
                     break
                 typ_ = "{0} {1}".format(tok.termin.canonic_text, tt1.value)
                 t1 = tt1
             else: 
                 typ_ = tok.termin.canonic_text
                 t1 = tok.end_token
             break
         tt = tok.end_token
     if (typ_ is None): 
         return None
     if (not named and t1 != t and not typ_.startswith("СТОРОНА")): 
         if (not ParticipantToken.__is_types_equal(typ_, add_typ1) and not ParticipantToken.__is_types_equal(typ_, add_typ2)): 
             return None
     if (BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
         t1 = t1.next0_
         if (not t.is_whitespace_before and BracketHelper.can_be_start_of_sequence(t.previous, False, False)): 
             t = t.previous
     elif (BracketHelper.can_be_start_of_sequence(t, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, True, t, True)): 
         t1 = t1.next0_
     if (br and t1.next0_ is not None and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
         t1 = t1.next0_
     res = ParticipantToken._new1578(t, t1, (ParticipantToken.Kinds.NAMEDAS if named else ParticipantToken.Kinds.PURE), typ_)
     if (t.is_char(':')): 
         res.begin_token = t.next0_
     return res
Ejemplo n.º 4
0
 def __try_attach(t: 'Token', prev: typing.List['DateItemToken'],
                  detail_regime: bool) -> 'DateItemToken':
     from pullenti.ner.measure.internal.MeasureToken import MeasureToken
     if (t is None):
         return None
     nt = Utils.asObjectOrNull(t, NumberToken)
     begin = t
     end = t
     is_in_brack = False
     if ((BracketHelper.can_be_start_of_sequence(t, False, False)
          and t.next0_ is not None and (isinstance(t.next0_, NumberToken)))
             and BracketHelper.can_be_end_of_sequence(
                 t.next0_.next0_, False, None, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if ((t.is_newline_before and BracketHelper.is_bracket(t, False) and
          (isinstance(t.next0_, NumberToken)))
             and BracketHelper.is_bracket(t.next0_.next0_, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if (nt is not None):
         if (nt.int_value is None):
             return None
         if (nt.typ == NumberSpellingType.WORDS):
             if (nt.morph.class0_.is_noun
                     and not nt.morph.class0_.is_adjective):
                 if (t.next0_ is not None
                         and ((t.next0_.is_value("КВАРТАЛ", None)
                               or t.next0_.is_value("ПОЛУГОДИЕ", None)
                               or t.next0_.is_value("ПІВРІЧЧЯ", None)))):
                     pass
                 else:
                     return None
         if (NumberHelper.try_parse_age(nt) is not None):
             return None
         tt = None
         res = DateItemToken._new628(begin, end,
                                     DateItemToken.DateItemType.NUMBER,
                                     nt.int_value, nt.morph)
         if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken))
              and nt.next0_.int_value is not None)
                 and nt.next0_.length_char == 2 and prev is not None):
             num = 2000 + nt.next0_.int_value
             if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ
                     == DateItemToken.DateItemType.MONTH):
                 ok = False
                 if (nt.whitespaces_after_count == 1):
                     ok = True
                 elif (nt.is_newline_after and nt.is_newline_after):
                     ok = True
                 if (ok):
                     nt = (Utils.asObjectOrNull(nt.next0_, NumberToken))
                     res.end_token = nt
                     res.int_value = num
         if (res.int_value == 20 or res.int_value == 201):
             tt = t.next0_
             if (tt is not None and tt.is_char('_')):
                 while tt is not None:
                     if (not tt.is_char('_')):
                         break
                     tt = tt.next0_
                 tt = DateItemToken.__test_year_rus_word(tt, False)
                 if (tt is not None):
                     res.int_value = 0
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.YEAR
                     return res
         if (res.int_value <= 12 and t.next0_ is not None
                 and (t.whitespaces_after_count < 3)):
             tt = t.next0_
             if (tt.is_value("ЧАС", None)):
                 if (((isinstance(t.previous, TextToken))
                      and not t.previous.chars.is_letter
                      and not t.is_whitespace_before)
                         and (isinstance(t.previous.previous, NumberToken))
                         and not t.previous.is_whitespace_before):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = tt
                     tt = tt.next0_
                     if (tt is not None and tt.is_char('.')):
                         res.end_token = tt
                         tt = tt.next0_
             first_pass3072 = True
             while True:
                 if first_pass3072: first_pass3072 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_value("УТРО", "РАНОК")):
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("ВЕЧЕР", "ВЕЧІР")):
                     res.end_token = tt
                     res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("ДЕНЬ", None)):
                     res.end_token = tt
                     if (res.int_value < 10):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("НОЧЬ", "НІЧ")):
                     res.end_token = tt
                     if (res.int_value == 12):
                         res.int_value = 0
                     elif (res.int_value > 9):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_comma or tt.morph.class0_.is_adverb):
                     continue
                 break
             if (res.typ == DateItemToken.DateItemType.HOUR):
                 return res
         can_be_year_ = True
         if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ
                 == DateItemToken.DateItemType.MONTH):
             pass
         elif ((prev is not None and len(prev) >= 4 and
                prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM)
               and prev[len(prev) - 2].can_by_month):
             pass
         elif (nt.next0_ is not None
               and ((nt.next0_.is_value("ГОД", None)
                     or nt.next0_.is_value("РІК", None)))):
             if (res.int_value < 1000):
                 can_be_year_ = False
         tt = DateItemToken.__test_year_rus_word(nt.next0_, False)
         if (tt is not None and DateItemToken.__is_new_age(tt.next0_)):
             res.typ = DateItemToken.DateItemType.YEAR
             res.end_token = tt
         elif (can_be_year_):
             if (res.can_be_year
                     or res.typ == DateItemToken.DateItemType.NUMBER):
                 tt = DateItemToken.__test_year_rus_word(
                     nt.next0_, res.is_newline_before)
                 if ((tt) is not None):
                     if ((tt.is_value("Г", None)
                          and not tt.is_whitespace_before
                          and t.previous is not None)
                             and ((t.previous.is_value("КОРПУС", None)
                                   or t.previous.is_value("КОРП", None)))):
                         pass
                     elif (
                         (((nt.next0_.is_value("Г", None) and
                            (t.whitespaces_before_count < 3) and t.previous
                            is not None) and t.previous.is_value("Я", None)
                           and t.previous.previous is not None)
                          and t.previous.previous.is_char_of("\\/")
                          and t.previous.previous.previous is not None)
                             and t.previous.previous.previous.is_value(
                                 "А", None)):
                         return None
                     elif (nt.next0_.length_char == 1
                           and not res.can_be_year
                           and ((prev is None or
                                 ((len(prev) > 0 and prev[len(prev) - 1].typ
                                   != DateItemToken.DateItemType.DELIM))))):
                         pass
                     else:
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
             elif (tt is not None and (nt.whitespaces_after_count < 2)
                   and (nt.end_char - nt.begin_char) == 1):
                 res.end_token = tt
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.lang = tt.morph.language
         if (nt.previous is not None):
             if (nt.previous.is_value("В", "У")
                     or nt.previous.is_value("К", None)
                     or nt.previous.is_value("ДО", None)):
                 tt = DateItemToken.__test_year_rus_word(nt.next0_, False)
                 if ((tt) is not None):
                     ok = False
                     if ((res.int_value < 100)
                             and (isinstance(tt, TextToken)) and
                         ((tt.term == "ГОДА" or tt.term == "РОКИ"))):
                         pass
                     else:
                         ok = True
                         if (nt.previous.is_value("ДО", None)
                                 and nt.next0_.is_value("Г", None)):
                             cou = 0
                             ttt = nt.previous.previous
                             while ttt is not None and (cou < 10):
                                 mt = MeasureToken.try_parse(
                                     ttt, None, False, False, False, False)
                                 if (mt is not None
                                         and mt.end_char > nt.end_char):
                                     ok = False
                                     break
                                 ttt = ttt.previous
                                 cou += 1
                     if (ok):
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
                         res.begin_token = nt.previous
             elif (((nt.previous.is_value("IN", None)
                     or nt.previous.is_value("SINCE", None)))
                   and res.can_be_year):
                 uu = (NumbersWithUnitToken.try_parse(
                     nt, None, False, False, False, False)
                       if nt.previous.is_value("IN", None) else None)
                 if (uu is not None and len(uu.units) > 0):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.begin_token = nt.previous
             elif (nt.previous.is_value("NEL", None)
                   or nt.previous.is_value("DEL", None)):
                 if (res.can_be_year):
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.lang = MorphLang.IT
                     res.begin_token = nt.previous
             elif (nt.previous.is_value("IL", None) and res.can_be_day):
                 res.lang = MorphLang.IT
                 res.begin_token = nt.previous
         t1 = res.end_token.next0_
         if (t1 is not None):
             if (t1.is_value("ЧАС", "ГОДИНА") or t1.is_value("HOUR", None)):
                 if ((((prev is not None and len(prev) == 2
                        and prev[0].can_be_hour)
                       and prev[1].typ == DateItemToken.DateItemType.DELIM
                       and not prev[1].is_whitespace_after)
                      and not prev[1].is_whitespace_after
                      and res.int_value >= 0) and (res.int_value < 59)):
                     prev[0].typ = DateItemToken.DateItemType.HOUR
                     res.typ = DateItemToken.DateItemType.MINUTE
                     res.end_token = t1
                 elif (res.int_value < 24):
                     if (t1.next0_ is not None and t1.next0_.is_char('.')):
                         t1 = t1.next0_
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = t1
             elif ((res.int_value < 60)
                   and ((t1.is_value("МИНУТА", "ХВИЛИНА") or t1.is_value(
                       "МИН", None) or t.is_value("MINUTE", None)))):
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.MINUTE
                 res.end_token = t1
             elif (
                 (res.int_value < 60) and
                 ((t1.is_value("СЕКУНДА", None) or t1.is_value("СЕК", None)
                   or t1.is_value("SECOND", None)))):
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.SECOND
                 res.end_token = t1
             elif ((res.int_value < 30)
                   and ((t1.is_value("ВЕК", "ВІК")
                         or t1.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")))):
                 res.typ = DateItemToken.DateItemType.CENTURY
                 res.end_token = t1
             elif (res.int_value <= 4 and t1.is_value("КВАРТАЛ", None)):
                 res.typ = DateItemToken.DateItemType.QUARTAL
                 res.end_token = t1
             elif (res.int_value <= 2
                   and ((t1.is_value("ПОЛУГОДИЕ", None)
                         or t1.is_value("ПІВРІЧЧЯ", None)))):
                 res.typ = DateItemToken.DateItemType.HALFYEAR
                 res.end_token = t1
         return res
     t0 = Utils.asObjectOrNull(t, TextToken)
     if (t0 is None):
         return None
     txt = t0.get_source_text()
     if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х')
             or txt[0] == 'V'):
         lat = NumberHelper.try_parse_roman(t)
         if (lat is not None and lat.end_token.next0_ is not None
                 and lat.int_value is not None):
             val = lat.int_value
             tt = lat.end_token.next0_
             if (tt.is_value("КВАРТАЛ", None) and val > 0 and val <= 4):
                 return DateItemToken._new629(
                     t, tt, DateItemToken.DateItemType.QUARTAL, val)
             if (tt.is_value("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0
                     and val <= 2):
                 return DateItemToken._new629(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.HALFYEAR, val)
             if (tt.is_value("ВЕК", "ВІК")
                     or tt.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")):
                 return DateItemToken._new629(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.CENTURY, val)
             if (tt.is_value("В", None) and tt.next0_ is not None
                     and tt.next0_.is_char('.')):
                 if (prev is not None and len(prev) > 0
                         and prev[len(prev) - 1].typ
                         == DateItemToken.DateItemType.POINTER):
                     return DateItemToken._new629(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
                 if (DateItemToken.__is_new_age(tt.next0_.next0_)):
                     return DateItemToken._new629(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
             if (tt.is_hiphen):
                 lat2 = NumberHelper.try_parse_roman(tt.next0_)
                 if (lat2 is not None and lat2.int_value is not None
                         and lat2.end_token.next0_ is not None):
                     if (lat2.end_token.next0_.is_value("ВЕК", "ВІК")
                             or lat2.end_token.next0_.is_value(
                                 "СТОЛЕТИЕ", "СТОЛІТТЯ")):
                         ddd = DateItemToken.try_attach(
                             tt.next0_, None, False)
                         return DateItemToken._new634(
                             t, lat.end_token,
                             DateItemToken.DateItemType.CENTURY, val,
                             ((ddd.new_age if ddd is not None else 0)))
     if (t is not None and t.is_value("НАПРИКІНЦІ", None)):
         return DateItemToken._new635(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "конец")
     if (t is not None and t.is_value("ДОНЕДАВНА", None)):
         return DateItemToken._new635(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "сегодня")
     if (prev is None):
         if (t is not None):
             if (t.is_value("ОКОЛО", "БІЛЯ")
                     or t.is_value("ПРИМЕРНО", "ПРИБЛИЗНО")
                     or t.is_value("ABOUT", None)):
                 return DateItemToken._new635(
                     t, t, DateItemToken.DateItemType.POINTER, "около")
         if (t.is_value("ОК", None) or t.is_value("OK", None)):
             if (t.next0_ is not None and t.next0_.is_char('.')):
                 return DateItemToken._new635(
                     t, t.next0_, DateItemToken.DateItemType.POINTER,
                     "около")
             return DateItemToken._new635(
                 t, t, DateItemToken.DateItemType.POINTER, "около")
     tok = DateItemToken.M_SEASONS.try_parse(t, TerminParseAttr.NO)
     if ((tok is not None and
          (Utils.valToEnum(tok.termin.tag, DatePointerType))
          == DatePointerType.SUMMER and t.morph.language.is_ru)
             and (isinstance(t, TextToken))):
         str0_ = t.term
         if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
             tok = (None)
     if (tok is not None):
         return DateItemToken._new629(
             t, tok.end_token, DateItemToken.DateItemType.POINTER,
             Utils.valToEnum(tok.termin.tag, DatePointerType))
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is not None):
         tok = DateItemToken.M_SEASONS.try_parse(npt.end_token,
                                                 TerminParseAttr.NO)
         if ((tok is not None and
              (Utils.valToEnum(tok.termin.tag, DatePointerType))
              == DatePointerType.SUMMER and t.morph.language.is_ru)
                 and (isinstance(t, TextToken))):
             str0_ = t.term
             if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
                 tok = (None)
         if (tok is not None):
             return DateItemToken._new629(
                 t, tok.end_token, DateItemToken.DateItemType.POINTER,
                 Utils.valToEnum(tok.termin.tag, DatePointerType))
         typ_ = DateItemToken.DateItemType.NUMBER
         if (npt.noun.is_value("КВАРТАЛ", None)):
             typ_ = DateItemToken.DateItemType.QUARTAL
         elif (npt.end_token.is_value("ПОЛУГОДИЕ", None)
               or npt.end_token.is_value("ПІВРІЧЧЯ", None)):
             typ_ = DateItemToken.DateItemType.HALFYEAR
         elif (npt.end_token.is_value("НАЧАЛО", None)
               or npt.end_token.is_value("ПОЧАТОК", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "начало")
         elif (npt.end_token.is_value("СЕРЕДИНА", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "середина")
         elif (npt.end_token.is_value("КОНЕЦ", None)
               or npt.end_token.is_value("КІНЕЦЬ", None)
               or npt.end_token.is_value("НАПРИКІНЕЦЬ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "конец")
         elif (npt.end_token.is_value("ВРЕМЯ", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.is_value("НАСТОЯЩЕЕ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         elif (npt.end_token.is_value("ЧАС", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.is_value("ДАНИЙ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         if (typ_ != DateItemToken.DateItemType.NUMBER or detail_regime):
             delta = 0
             if (len(npt.adjectives) > 0):
                 if (npt.adjectives[0].is_value("ПОСЛЕДНИЙ", "ОСТАННІЙ")):
                     return DateItemToken._new629(
                         t0, npt.end_token, typ_,
                         (4 if typ_ == DateItemToken.DateItemType.QUARTAL
                          else 2))
                 if (npt.adjectives[0].is_value("ПРЕДЫДУЩИЙ", "ПОПЕРЕДНІЙ")
                         or npt.adjectives[0].is_value("ПРОШЛЫЙ", None)):
                     delta = -1
                 elif (npt.adjectives[0].is_value("СЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].is_value("ПОСЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].is_value("НАСТУПНИЙ", None)):
                     delta = 1
                 else:
                     return None
             cou = 0
             tt = t.previous
             first_pass3073 = True
             while True:
                 if first_pass3073: first_pass3073 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (cou > 200):
                     break
                 dr = Utils.asObjectOrNull(tt.get_referent(),
                                           DateRangeReferent)
                 if (dr is None):
                     continue
                 if (typ_ == DateItemToken.DateItemType.QUARTAL):
                     ii = dr.quarter_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 4):
                         continue
                     return DateItemToken._new629(t0, npt.end_token, typ_,
                                                  ii)
                 if (typ_ == DateItemToken.DateItemType.HALFYEAR):
                     ii = dr.halfyear_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 2):
                         continue
                     return DateItemToken._new629(t0, npt.end_token, typ_,
                                                  ii)
     term = t0.term
     if (not str.isalnum(term[0])):
         if (t0.is_char_of(".\\/:") or t0.is_hiphen):
             return DateItemToken._new635(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         elif (t0.is_char(',')):
             return DateItemToken._new635(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         else:
             return None
     if (term == "O" or term == "О"):
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after
                 and len(t.next0_.value) == 1):
             return DateItemToken._new629(t, t.next0_,
                                          DateItemToken.DateItemType.NUMBER,
                                          t.next0_.int_value)
     if (str.isalpha(term[0])):
         inf = DateItemToken.M_MONTHES.try_parse(t, TerminParseAttr.NO)
         if (inf is not None and inf.termin.tag is None):
             inf = DateItemToken.M_MONTHES.try_parse(
                 inf.end_token.next0_, TerminParseAttr.NO)
         if (inf is not None and (isinstance(inf.termin.tag, int))):
             return DateItemToken._new653(inf.begin_token, inf.end_token,
                                          DateItemToken.DateItemType.MONTH,
                                          inf.termin.tag, inf.termin.lang)
     return None
Ejemplo n.º 5
0
 def create_nickname(pr : 'PersonReferent', t : 'Token') -> 'Token':
     has_keyw = False
     is_br = False
     first_pass3367 = True
     while True:
         if first_pass3367: first_pass3367 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_hiphen or t.is_comma or t.is_char_of(".:;")): 
             continue
         if (t.morph.class0_.is_preposition): 
             continue
         if (t.is_char('(')): 
             is_br = True
             continue
         if ((t.is_value("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.is_value("КЛИЧКА", None) or t.is_value("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.is_value("ПСЕВДО", None) or t.is_value("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): 
             has_keyw = True
             continue
         break
     if (not has_keyw or t is None): 
         return None
     if (BracketHelper.is_bracket(t, True)): 
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
             if (ni is not None): 
                 pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = br.end_token
                 tt = t.next0_
                 first_pass3368 = True
                 while True:
                     if first_pass3368: first_pass3368 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_comma_and): 
                         continue
                     if (not BracketHelper.is_bracket(tt, True)): 
                         break
                     br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
                     if (br is None): 
                         break
                     ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
                     if (ni is not None): 
                         pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     tt = br.end_token
                     t = tt
                 if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                     t = t.next0_
                 return t
     else: 
         ret = None
         first_pass3369 = True
         while True:
             if first_pass3369: first_pass3369 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (t.is_comma_and): 
                 continue
             if (ret is not None and t.chars.is_all_lower): 
                 break
             if (t.whitespaces_before_count > 2): 
                 break
             pli = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.NO, 10)
             if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): 
                 ni = MiscHelper.get_text_value(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO)
                 if (ni is not None): 
                     pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     t = pli[len(pli) - 1].end_token
                     if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                         t = t.next0_
                     ret = t
                     continue
             if ((isinstance(t, ReferentToken)) and not t.chars.is_all_lower and t.begin_token == t.end_token): 
                 val = MiscHelper.get_text_value_of_meta_token(Utils.asObjectOrNull(t, ReferentToken), GetTextAttr.NO)
                 pr.add_slot(PersonReferent.ATTR_NICKNAME, val, False, 0)
                 if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                     t = t.next0_
                 ret = t
                 continue
             break
         return ret
     return None
Ejemplo n.º 6
0
 def attach_first(self, p : 'InstrumentParticipantReferent', min_char : int, max_char : int) -> 'ReferentToken':
     t = None
     tt0 = self.begin_token
     refs = list()
     t = tt0.previous
     first_pass3287 = True
     while True:
         if first_pass3287: first_pass3287 = False
         else: t = t.previous
         if (not (t is not None and t.begin_char >= min_char)): break
         if (t.is_newline_after): 
             if (t.newlines_after_count > 1): 
                 break
             if (isinstance(t.next0_, NumberToken)): 
                 break
         tt = ParticipantToken.__try_attach_contract_ground(t, p, False)
         if (tt is not None): 
             continue
         r = t.get_referent()
         if (((((isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent)) or (isinstance(r, PersonReferent))) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, AddressReferent))) or (isinstance(r, UriReferent)) or (isinstance(r, PersonIdentityReferent))) or (isinstance(r, BankDataReferent))): 
             if (not r in refs): 
                 refs.insert(0, r)
             tt0 = t
     if (len(refs) > 0): 
         for r in refs: 
             if (r != refs[0] and (isinstance(refs[0], OrganizationReferent)) and (((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))))): 
                 p.add_slot(InstrumentParticipantReferent.ATTR_DELEGATE, r, False, 0)
             else: 
                 p.add_slot(InstrumentParticipantReferent.ATTR_REF, r, False, 0)
     rt = ReferentToken(p, tt0, self.end_token)
     t = self.end_token.next0_
     if (BracketHelper.is_bracket(t, False)): 
         t = t.next0_
     if (t is not None and t.is_char(',')): 
         t = t.next0_
     first_pass3288 = True
     while True:
         if first_pass3288: first_pass3288 = False
         else: t = t.next0_
         if (not (t is not None and ((max_char == 0 or t.begin_char <= max_char)))): break
         if (t.is_value("СТОРОНА", None)): 
             break
         r = t.get_referent()
         if (((((isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent)) or (isinstance(r, PersonReferent))) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, AddressReferent))) or (isinstance(r, UriReferent)) or (isinstance(r, PersonIdentityReferent))) or (isinstance(r, BankDataReferent))): 
             if ((((isinstance(r, PersonPropertyReferent)) and t.next0_ is not None and t.next0_.is_comma) and (isinstance(t.next0_.next0_, ReferentToken)) and (isinstance(t.next0_.next0_.get_referent(), PersonReferent))) and not t.next0_.is_newline_after): 
                 pe = Utils.asObjectOrNull(t.next0_.next0_.get_referent(), PersonReferent)
                 pe.add_slot(PersonReferent.ATTR_ATTR, r, False, 0)
                 r = (pe)
                 t = t.next0_.next0_
             is_delegate = False
             if (t.previous.is_value("ЛИЦО", None) or t.previous.is_value("ИМЯ", None)): 
                 is_delegate = True
             if (t.previous.is_value("КОТОРЫЙ", None) and t.previous.previous is not None and ((t.previous.previous.is_value("ИМЯ", None) or t.previous.previous.is_value("ЛИЦО", None)))): 
                 is_delegate = True
             p.add_slot((InstrumentParticipantReferent.ATTR_DELEGATE if (((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)))) and is_delegate else InstrumentParticipantReferent.ATTR_REF), r, False, 0)
             rt.end_token = t
             continue
         tt = ParticipantToken.__try_attach_contract_ground(t, p, False)
         if (tt is not None): 
             rt.end_token = tt
             t = rt.end_token
             if (rt.begin_char == tt.begin_char): 
                 rt.begin_token = tt
             continue
         if (t.is_value("В", None) and t.next0_ is not None and t.next0_.is_value("ЛИЦО", None)): 
             t = t.next0_
             continue
         if (t.is_value("ОТ", None) and t.next0_ is not None and t.next0_.is_value("ИМЯ", None)): 
             t = t.next0_
             continue
         if (t.is_value("ПО", None) and t.next0_ is not None and t.next0_.is_value("ПОРУЧЕНИЕ", None)): 
             t = t.next0_
             continue
         if (t.is_newline_before): 
             break
         if (t.get_morph_class_in_dictionary() == MorphClass.VERB): 
             if ((not t.is_value("УДОСТОВЕРЯТЬ", None) and not t.is_value("ПРОЖИВАТЬ", None) and not t.is_value("ЗАРЕГИСТРИРОВАТЬ", None)) and not t.is_value("ДЕЙСТВОВАТЬ", None)): 
                 break
         if (t.is_and and t.previous is not None and t.previous.is_comma): 
             break
         if (t.is_and and t.next0_.get_referent() is not None): 
             if (isinstance(t.next0_.get_referent(), OrganizationReferent)): 
                 break
             pe = Utils.asObjectOrNull(t.next0_.get_referent(), PersonReferent)
             if (pe is not None): 
                 has_ip = False
                 for s in pe.slots: 
                     if (s.type_name == PersonReferent.ATTR_ATTR): 
                         if (str(s.value).startswith("индивидуальный предприниматель")): 
                             has_ip = True
                             break
                 if (has_ip): 
                     break
     t = rt.begin_token
     while t is not None and t.end_char <= rt.end_char: 
         tt = ParticipantToken.__try_attach_contract_ground(t, p, True)
         if (tt is not None): 
             if (tt.end_char > rt.end_char): 
                 rt.end_token = tt
             t = tt
         t = t.next0_
     return rt
Ejemplo n.º 7
0
 def get_name_ex(begin: 'Token',
                 end: 'Token',
                 cla: 'MorphClass',
                 mc: 'MorphCase',
                 gender: 'MorphGender' = MorphGender.UNDEFINED,
                 ignore_brackets_and_hiphens: bool = False,
                 ignore_geo_referent: bool = False) -> str:
     if (end is None or begin is None):
         return None
     if (begin.end_char > end.begin_char and begin != end):
         return None
     res = io.StringIO()
     prefix = None
     t = begin
     first_pass3064 = True
     while True:
         if first_pass3064: first_pass3064 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= end.end_char)): break
         if (res.tell() > 1000):
             break
         if (t.is_table_control_char):
             continue
         if (ignore_brackets_and_hiphens):
             if (BracketHelper.is_bracket(t, False)):
                 if (t == end):
                     break
                 if (t.is_char_of("(<[")):
                     br = BracketHelper.try_parse(t, BracketParseAttr.NO,
                                                  100)
                     if (br is not None and br.end_char <= end.end_char):
                         tmp = ProperNameHelper.get_name_ex(
                             br.begin_token.next0_, br.end_token.previous,
                             MorphClass.UNDEFINED, MorphCase.UNDEFINED,
                             MorphGender.UNDEFINED,
                             ignore_brackets_and_hiphens, False)
                         if (tmp is not None):
                             if ((br.end_char == end.end_char
                                  and br.begin_token.next0_
                                  == br.end_token.previous and
                                  not br.begin_token.next0_.chars.is_letter)
                                     and not (isinstance(
                                         br.begin_token.next0_,
                                         ReferentToken))):
                                 pass
                             else:
                                 print(" {0}{1}{2}".format(
                                     t.get_source_text(), tmp,
                                     br.end_token.get_source_text()),
                                       end="",
                                       file=res,
                                       flush=True)
                         t = br.end_token
                 continue
             if (t.is_hiphen):
                 if (t == end):
                     break
                 elif (t.is_whitespace_before or t.is_whitespace_after):
                     continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (not ignore_brackets_and_hiphens):
                 if ((tt.next0_ is not None and tt.next0_.is_hiphen and
                      (isinstance(tt.next0_.next0_, TextToken)))
                         and tt != end and tt.next0_ != end):
                     if (prefix is None):
                         prefix = tt.term
                     else:
                         prefix = "{0}-{1}".format(prefix, tt.term)
                     t = tt.next0_
                     if (t == end):
                         break
                     else:
                         continue
             s = None
             if (cla.value != (0) or not mc.is_undefined
                     or gender != MorphGender.UNDEFINED):
                 for wff in tt.morph.items:
                     wf = Utils.asObjectOrNull(wff, MorphWordForm)
                     if (wf is None):
                         continue
                     if (cla.value != (0)):
                         if ((((wf.class0_.value) & (cla.value))) == 0):
                             continue
                     if (not mc.is_undefined):
                         if (((wf.case_) & mc).is_undefined):
                             continue
                     if (gender != MorphGender.UNDEFINED):
                         if (((wf.gender) &
                              (gender)) == (MorphGender.UNDEFINED)):
                             continue
                     if (s is None or wf.normal_case == tt.term):
                         s = wf.normal_case
                 if (s is None and gender != MorphGender.UNDEFINED):
                     for wff in tt.morph.items:
                         wf = Utils.asObjectOrNull(wff, MorphWordForm)
                         if (wf is None):
                             continue
                         if (cla.value != (0)):
                             if ((((wf.class0_.value) & (cla.value))) == 0):
                                 continue
                         if (not mc.is_undefined):
                             if (((wf.case_) & mc).is_undefined):
                                 continue
                         if (s is None or wf.normal_case == tt.term):
                             s = wf.normal_case
             if (s is None):
                 s = tt.term
                 if (tt.chars.is_last_lower and tt.length_char > 2):
                     s = tt.get_source_text()
                     for i in range(len(s) - 1, -1, -1):
                         if (str.isupper(s[i])):
                             s = s[0:0 + i + 1]
                             break
             if (prefix is not None):
                 delim = "-"
                 if (ignore_brackets_and_hiphens):
                     delim = " "
                 s = "{0}{1}{2}".format(prefix, delim, s)
             prefix = (None)
             if (res.tell() > 0 and len(s) > 0):
                 if (str.isalnum(s[0])):
                     ch0 = Utils.getCharAtStringIO(res, res.tell() - 1)
                     if (ch0 == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 elif (not ignore_brackets_and_hiphens
                       and BracketHelper.can_be_start_of_sequence(
                           tt, False, False)):
                     print(' ', end="", file=res)
             print(s, end="", file=res)
         elif (isinstance(t, NumberToken)):
             if (res.tell() > 0):
                 if (not t.is_whitespace_before and Utils.getCharAtStringIO(
                         res,
                         res.tell() - 1) == '-'):
                     pass
                 else:
                     print(' ', end="", file=res)
             nt = Utils.asObjectOrNull(t, NumberToken)
             if ((t.morph.class0_.is_adjective
                  and nt.typ == NumberSpellingType.WORDS
                  and nt.begin_token == nt.end_token)
                     and (isinstance(nt.begin_token, TextToken))):
                 print(nt.begin_token.term, end="", file=res)
             else:
                 print(nt.value, end="", file=res)
         elif (isinstance(t, MetaToken)):
             if ((ignore_geo_referent and t != begin
                  and t.get_referent() is not None)
                     and t.get_referent().type_name == "GEO"):
                 continue
             s = ProperNameHelper.get_name_ex(t.begin_token, t.end_token,
                                              cla, mc, gender,
                                              ignore_brackets_and_hiphens,
                                              ignore_geo_referent)
             if (not Utils.isNullOrEmpty(s)):
                 if (res.tell() > 0):
                     if (not t.is_whitespace_before
                             and Utils.getCharAtStringIO(
                                 res,
                                 res.tell() - 1) == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 print(s, end="", file=res)
         if (t == end):
             break
     if (res.tell() == 0):
         return None
     return Utils.toStringStringIO(res)
Ejemplo n.º 8
0
 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.get_analyzer_data(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3428 = True
     while True:
         if first_pass3428: first_pass3428 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = WeaponItemToken.try_parse_list(t, 10)
         if (its is None):
             continue
         rts = self.__try_attach(its, False)
         if (rts is not None):
             for rt in rts:
                 rt.referent = ad.register_referent(rt.referent)
                 kit.embed_token(rt)
                 t = (rt)
                 for s in rt.referent.slots:
                     if (s.type_name == WeaponReferent.ATTR_MODEL):
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])):
                                 li = []
                                 wrapli2804 = RefOutArgWrapper(None)
                                 inoutres2805 = Utils.tryGetValue(
                                     objs_by_model, mod, wrapli2804)
                                 li = wrapli2804.value
                                 if (not inoutres2805):
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li):
                                     li.append(rt.referent)
                                 models.add_string(mod, li, None, False)
                             if (k > 0):
                                 break
                             brand = rt.referent.get_string_value(
                                 WeaponReferent.ATTR_BRAND)
                             if (brand is None):
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == WeaponReferent.ATTR_NAME):
                         obj_by_names.add(
                             Termin._new100(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0):
         return
     t = kit.first_token
     first_pass3429 = True
     while True:
         if first_pass3429: first_pass3429 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 10)
         if (br is not None):
             toks = obj_by_names.try_parse(t.next0_, TerminParseAttr.NO)
             if (toks is not None
                     and toks.end_token.next0_ == br.end_token):
                 rt0 = ReferentToken(
                     Utils.asObjectOrNull(toks.termin.tag, Referent),
                     br.begin_token, br.end_token)
                 kit.embed_token(rt0)
                 t = (rt0)
                 continue
         if (not (isinstance(t, TextToken))):
             continue
         if (not t.chars.is_letter):
             continue
         tok = models.try_parse(t, TerminParseAttr.NO)
         if (tok is None):
             if (not t.chars.is_all_lower):
                 tok = obj_by_names.try_parse(t, TerminParseAttr.NO)
             if (tok is None):
                 continue
         if (not tok.is_whitespace_after):
             if (tok.end_token.next0_ is None
                     or not tok.end_token.next0_.is_char_of(",.)")):
                 if (not BracketHelper.is_bracket(tok.end_token.next0_,
                                                  False)):
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1):
             tr = li[0]
         else:
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None):
             tit = WeaponItemToken.try_parse(tok.begin_token.previous, None,
                                             False, True)
             if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND):
                 tr.add_slot(WeaponReferent.ATTR_BRAND, tit.value, False, 0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embed_token(rt0)
             t = (rt0)
             continue
Ejemplo n.º 9
0
 def parse(t : 'Token', max_char : int=0, prev : 'InstrToken'=None) -> 'InstrToken':
     from pullenti.ner.instrument.internal.InstrToken1 import InstrToken1
     is_start_of_line = False
     t00 = t
     if (t is not None): 
         is_start_of_line = t00.is_newline_before
         while t is not None:
             if (t.is_table_control_char and not t.is_char(chr(0x1F))): 
                 if (t.is_newline_after and not is_start_of_line): 
                     is_start_of_line = True
                 t = t.next0_
             else: 
                 break
     if (t is None): 
         return None
     if (t.is_newline_before): 
         is_start_of_line = True
     if (is_start_of_line): 
         if ((t.is_value("СОДЕРЖИМОЕ", "ВМІСТ") or t.is_value("СОДЕРЖАНИЕ", "ЗМІСТ") or t.is_value("ОГЛАВЛЕНИЕ", "ЗМІСТ")) or ((t.is_value("СПИСОК", None) and t.next0_ is not None and t.next0_.is_value("РАЗДЕЛ", None)))): 
             cont = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False)
             if (cont is not None and cont.typ == InstrToken1.Types.INDEX): 
                 return InstrToken(t, cont.end_token)
     t0 = t
     t1 = None
     has_word = False
     first_pass3255 = True
     while True:
         if first_pass3255: first_pass3255 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t != t0): 
             break
         if (max_char > 0 and t.begin_char > max_char): 
             break
         if (is_start_of_line and t == t0): 
             if (t.is_value("ГЛАВА", None)): 
                 next0__ = InstrToken.parse(t.next0_, 0, None)
                 if (next0__ is not None and next0__.typ == ILTypes.PERSON): 
                     next0__.begin_token = t
                     return next0__
             tt = None
             if ((isinstance(t.get_referent(), PersonReferent)) or (isinstance(t.get_referent(), PersonPropertyReferent)) or (isinstance(t.get_referent(), InstrumentParticipantReferent))): 
                 return InstrToken.__correct_person(InstrToken._new1511(t00, t, ILTypes.PERSON, t))
             is_ref = False
             if (isinstance(t.get_referent(), PersonPropertyReferent)): 
                 tt = t.next0_
                 is_ref = True
             elif (prev is not None and prev.typ == ILTypes.PERSON): 
                 rt = t.kit.process_referent(PersonAnalyzer.ANALYZER_NAME, t)
                 if (rt is not None): 
                     if (isinstance(rt.referent, PersonReferent)): 
                         return InstrToken._new1512(t00, rt.end_token, ILTypes.PERSON)
                     tt = rt.end_token.next0_
             cou = 0
             t11 = (None if tt is None else tt.previous)
             first_pass3256 = True
             while True:
                 if first_pass3256: first_pass3256 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_table_control_char): 
                     continue
                 re = tt.get_referent()
                 if (isinstance(re, PersonReferent)): 
                     return InstrToken._new1511(t00, tt, ILTypes.PERSON, tt)
                 if (isinstance(re, GeoReferent)): 
                     t11 = tt
                     continue
                 if (re is not None): 
                     break
                 if (DecreeToken.is_keyword(tt, False) is not None): 
                     break
                 if (tt.is_newline_before): 
                     cou += 1
                     if (cou > 4): 
                         break
             if (tt is None and is_ref): 
                 return InstrToken._new1511(t00, Utils.ifNotNull(t11, t), ILTypes.PERSON, t)
         dt = DecreeToken.try_attach(t, None, False)
         if (dt is not None): 
             if (dt.typ == DecreeToken.ItemType.TYP and not t.chars.is_all_lower): 
                 if (t != t0): 
                     break
                 has_verb_ = False
                 tt = dt.end_token
                 while tt is not None: 
                     if (tt.is_newline_before): 
                         break
                     elif ((isinstance(tt, TextToken)) and tt.is_pure_verb): 
                         has_verb_ = True
                         break
                     tt = tt.next0_
                 if (not has_verb_): 
                     res2 = InstrToken._new1515(t0, dt.end_token, ILTypes.TYP, Utils.ifNotNull(dt.full_value, dt.value))
                     if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ" or res2.value == "ДОДАТКОВА УГОДА"): 
                         if (res2.begin_char > 500 and res2.newlines_before_count > 1): 
                             res2.typ = ILTypes.APPENDIX
                     return res2
             if (dt.typ == DecreeToken.ItemType.NUMBER): 
                 if (t != t0): 
                     break
                 return InstrToken._new1515(t0, dt.end_token, ILTypes.REGNUMBER, dt.value)
             if (dt.typ == DecreeToken.ItemType.ORG): 
                 if (t != t0): 
                     break
                 return InstrToken._new1517(t0, dt.end_token, ILTypes.ORGANIZATION, dt.ref, dt.value)
             if (dt.typ == DecreeToken.ItemType.TERR): 
                 if (t != t0): 
                     break
                 re = InstrToken._new1517(t0, dt.end_token, ILTypes.GEO, dt.ref, dt.value)
                 t1 = re.end_token.next0_
                 if (t1 is not None and t1.is_char(',')): 
                     t1 = t1.next0_
                 if (t1 is not None and t1.is_value("КРЕМЛЬ", None)): 
                     re.end_token = t1
                 elif ((t1 is not None and t1.is_value("ДОМ", "БУДИНОК") and t1.next0_ is not None) and t1.next0_.is_value("СОВЕТ", "РАД")): 
                     re.end_token = t1.next0_
                     if (t1.next0_.next0_ is not None and (isinstance(t1.next0_.next0_.get_referent(), GeoReferent))): 
                         re.end_token = t1.next0_.next0_
                 return re
             if (dt.typ == DecreeToken.ItemType.OWNER): 
                 if (t != t0): 
                     break
                 if (dt.ref is not None and str(dt.ref.referent).startswith("агент")): 
                     dt = (None)
                 if (dt is not None): 
                     res1 = InstrToken._new1517(t0, dt.end_token, ILTypes.PERSON, dt.ref, dt.value)
                     return InstrToken.__correct_person(res1)
         if (BracketHelper.can_be_start_of_sequence(t, False, False)): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t1 = br.end_token
                 t = t1
                 continue
             if (t.next0_ is not None and BracketHelper.can_be_end_of_sequence(t.next0_, False, None, False)): 
                 t1 = t.next0_
                 t = t1
                 continue
         if (isinstance(t, TextToken)): 
             if (t.is_char('_')): 
                 t1 = t
                 continue
         r = t.get_referent()
         if (isinstance(r, DateReferent)): 
             tt = t
             if (tt.next0_ is not None and tt.next0_.is_char_of(",;")): 
                 tt = tt.next0_
             if (not t.is_newline_before and not tt.is_newline_after): 
                 t1 = tt
                 continue
             if (not has_word): 
                 return InstrToken._new1511(t, tt, ILTypes.DATE, t)
             if (t != t0): 
                 break
         has_word = True
         if (isinstance(r, InstrumentParticipantReferent)): 
             tt = t.begin_token
             first_pass3257 = True
             while True:
                 if first_pass3257: first_pass3257 = False
                 else: tt = tt.next0_
                 if (not (tt is not None and (tt.end_char < t.end_char))): break
                 rr = tt.get_referent()
                 if (rr is None): 
                     continue
                 if ((isinstance(rr, OrganizationReferent)) or (isinstance(rr, BankDataReferent)) or (isinstance(rr, UriReferent))): 
                     r = (None)
                     break
         if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, InstrumentParticipantReferent))): 
             if (t != t0): 
                 break
             if (isinstance(r, InstrumentParticipantReferent)): 
                 pass
             res1 = InstrToken._new1511(t, t, ILTypes.PERSON, t)
             return InstrToken.__correct_person(res1)
         if (isinstance(r, OrganizationReferent)): 
             if (t != t0): 
                 break
             return InstrToken._new1511(t, t, ILTypes.ORGANIZATION, t)
         if (isinstance(r, DecreePartReferent)): 
             dpr = Utils.asObjectOrNull(r, DecreePartReferent)
             if (dpr.appendix is not None): 
                 if (t.is_newline_before or is_start_of_line): 
                     if (t.is_newline_after or t.whitespaces_before_count > 30): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ")
                     ok = True
                     tt = t.next0_
                     first_pass3258 = True
                     while True:
                         if first_pass3258: first_pass3258 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         if (tt.is_newline_before): 
                             break
                         npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is not None): 
                             tt = npt.end_token
                             continue
                         ok = False
                         break
                     if (ok): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ")
         if ((isinstance(r, DecreeReferent)) and r.kind == DecreeKind.PUBLISHER and t == t0): 
             res1 = InstrToken._new1512(t, t, ILTypes.APPROVED)
             tt = t.next0_
             first_pass3259 = True
             while True:
                 if first_pass3259: first_pass3259 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_char_of(",;")): 
                     continue
                 if ((isinstance(tt.get_referent(), DecreeReferent)) and tt.get_referent().kind == DecreeKind.PUBLISHER): 
                     res1.end_token = t
                 else: 
                     break
             return res1
         if (t.is_value("ЗА", None) and t.next0_ is not None and t.is_newline_before): 
             rr = t.next0_.get_referent()
             if ((isinstance(rr, PersonReferent)) or (isinstance(rr, PersonPropertyReferent)) or (isinstance(rr, InstrumentParticipantReferent))): 
                 if (t != t0): 
                     break
                 res1 = InstrToken._new1511(t, t.next0_, ILTypes.PERSON, t.next0_)
                 t = t.next0_.next0_
                 if ((isinstance(rr, InstrumentParticipantReferent)) and t is not None): 
                     r = t.get_referent()
                     if ((r) is not None): 
                         if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))): 
                             res1.end_token = t
                             res1.ref = (t)
                 return res1
         ii = 0
         while ii < len(InstrToken._m_directives): 
             if (t.is_value(InstrToken._m_directives[ii], None)): 
                 if (t.next0_ is not None and t.next0_.is_value("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")): 
                     if (t != t0): 
                         break
                     t11 = t.next0_
                     ok = False
                     if (t11.next0_ is not None and t11.next0_.is_char_of(":.") and t11.next0_.is_newline_after): 
                         ok = True
                         t11 = t11.next0_
                     if (ok): 
                         return InstrToken._new1515(t, t11, ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii])
                 if (t.is_newline_after or ((t.next0_ is not None and t.next0_.is_char(':') and t.next0_.is_newline_after))): 
                     if (t != t0): 
                         break
                     if (not t.is_newline_before): 
                         if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ" and InstrToken._m_directives_norm[ii] != "ПОСТАНОВЛЕНИЕ" and InstrToken._m_directives_norm[ii] != "НАКАЗ") and InstrToken._m_directives_norm[ii] != "ПОСТАНОВУ"): 
                             break
                     return InstrToken._new1515(t, (t if t.is_newline_after else t.next0_), ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii])
                 break
             ii += 1
         if (t.is_newline_before and t.chars.is_letter and t.length_char == 1): 
             for d in InstrToken._m_directives: 
                 t11 = MiscHelper.try_attach_word_by_letters(d, t, True)
                 if (t11 is not None): 
                     if (t11.next0_ is not None and t11.next0_.is_char(':')): 
                         t11 = t11.next0_
                     return InstrToken._new1512(t, t11, ILTypes.DIRECTIVE)
         tte = (t.begin_token if isinstance(t, MetaToken) else t)
         term = (tte.term if isinstance(tte, TextToken) else None)
         if (is_start_of_line and not tte.chars.is_all_lower and t == t0): 
             npt = NounPhraseHelper.try_parse(tte, NounPhraseParseAttr.NO, 0, None)
             if (npt is not None and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))): 
                 # if (tte.Next != null && tte.Next.IsChar(':'))
                 npt = (None)
             if (npt is not None and npt.morph.case_.is_nominative and (isinstance(npt.end_token, TextToken))): 
                 term1 = npt.end_token.term
                 if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1 == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ") or term1 == "ФОРМА" or term == "ЗАЯВКА"): 
                     tt1 = npt.end_token.next0_
                     dt1 = DecreeToken.try_attach(tt1, None, False)
                     if (dt1 is not None and dt1.typ == DecreeToken.ItemType.NUMBER): 
                         tt1 = dt1.end_token.next0_
                     elif (isinstance(tt1, NumberToken)): 
                         tt1 = tt1.next0_
                     elif ((isinstance(tt1, TextToken)) and tt1.length_char == 1 and tt1.chars.is_letter): 
                         tt1 = tt1.next0_
                     ok = True
                     if (tt1 is None): 
                         ok = False
                     elif (tt1.is_value("В", "У")): 
                         ok = False
                     elif (tt1.is_value("К", None) and tt1.is_newline_before): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1)
                     elif (not tt1.is_newline_before and InstrToken._check_entered(tt1) is not None): 
                         ok = False
                     elif (tt1 == t.next0_ and ((tt1.is_char(':') or ((tt1.is_value("НА", None) and term1 != "ЗАЯВКА"))))): 
                         ok = False
                     if (ok): 
                         br = BracketHelper.try_parse(tt1, BracketParseAttr.NO, 100)
                         if (br is not None): 
                             tt1 = br.end_token.next0_
                             if (br.end_token.next0_ is None or not br.end_token.is_newline_after or br.end_token.next0_.is_char_of(";,")): 
                                 ok = False
                             if (tt1 is not None and tt1.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                                 ok = False
                     if (prev is not None and prev.typ == ILTypes.APPENDIX): 
                         ok = False
                     if (ok): 
                         cou = 0
                         ttt = tte.previous
                         while ttt is not None and (cou < 300): 
                             if (ttt.is_table_control_char): 
                                 if (not ttt.is_char(chr(0x1F))): 
                                     if (ttt == tte.previous and ttt.is_char(chr(0x1E))): 
                                         pass
                                     else: 
                                         ok = False
                                 break
                             ttt = ttt.previous; cou += 1
                     if (ok): 
                         it1 = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False)
                         if (it1 is not None): 
                             if (it1.has_verb): 
                                 ok = False
                     if (ok and t.previous is not None): 
                         ttp = t.previous
                         first_pass3260 = True
                         while True:
                             if first_pass3260: first_pass3260 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (ttp.is_table_control_char and not ttp.is_char(chr(0x1F))): 
                                 continue
                             if (BracketHelper.is_bracket(ttp, False) and not BracketHelper.can_be_end_of_sequence(ttp, False, None, False)): 
                                 continue
                             if (ttp.is_char_of(";:")): 
                                 ok = False
                             break
                     if ((ok and t.previous is not None and (t.newlines_before_count < 3)) and not t.is_newline_after): 
                         lines = 0
                         ttp = t.previous
                         first_pass3261 = True
                         while True:
                             if first_pass3261: first_pass3261 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (not ttp.is_newline_before): 
                                 continue
                             while ttp is not None and (ttp.end_char < t.begin_char): 
                                 if (isinstance(ttp, NumberToken)): 
                                     pass
                                 elif ((isinstance(ttp, TextToken)) and ttp.length_char > 1): 
                                     if (ttp.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                                         ok = False
                                     break
                                 else: 
                                     break
                                 ttp = ttp.next0_
                             lines += 1
                             if (lines > 1): 
                                 break
                     if (ok and ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК" and term1 != "МНЕНИЕ"))): 
                         if (t.newlines_before_count < 3): 
                             ok = False
                     if (ok): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1)
         app = False
         if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_ is not None and t.next0_.is_value("МНЕНИЕ", "ДУМКА")) and t == t0 and is_start_of_line): 
             app = True
         if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА")) and t.next0_ is not None and t.next0_.is_value("СОГЛАШЕНИЕ", "УГОДА")) and t == t0 and is_start_of_line): 
             app = True
         if (app): 
             tt = t.next0_
             while tt is not None: 
                 if (tt.is_newline_before): 
                     break
                 elif (tt.get_morph_class_in_dictionary() == MorphClass.VERB): 
                     app = False
                     break
                 tt = tt.next0_
             if (app): 
                 return InstrToken._new1512(t, t.next0_, ILTypes.APPENDIX)
         if (not t.chars.is_all_lower and t == t0): 
             tt = InstrToken._check_approved(t)
             if (tt is not None): 
                 if (tt.next0_ is not None and (isinstance(tt.next0_.get_referent(), DecreeReferent))): 
                     return InstrToken._new1511(t, tt, ILTypes.APPROVED, tt.next0_.get_referent())
                 dt1 = DecreeToken.try_attach(tt.next0_, None, False)
                 if (dt1 is not None and dt1.typ == DecreeToken.ItemType.TYP): 
                     return InstrToken._new1512(t, tt, ILTypes.APPROVED)
         t1 = t
         is_start_of_line = False
     if (t1 is None): 
         return None
     res = InstrToken._new1512(t00, t1, ILTypes.UNDEFINED)
     res.no_words = True
     t = t0
     first_pass3262 = True
     while True:
         if first_pass3262: first_pass3262 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= t1.end_char)): break
         if (not (isinstance(t, TextToken))): 
             if (isinstance(t, ReferentToken)): 
                 res.no_words = False
             continue
         if (not t.chars.is_letter): 
             continue
         res.no_words = False
         if (t.is_pure_verb): 
             res.has_verb = True
     if (t0.is_value("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None and t0.next0_.is_char_of(":.")): 
         res.typ = ILTypes.QUESTION
     return res
Ejemplo n.º 10
0
 def try_parse(t : 'Token', loc_onto : 'IntOntologyCollection') -> 'NamedItemToken':
     if (t is None): 
         return None
     if (isinstance(t, ReferentToken)): 
         r = t.get_referent()
         if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or (isinstance(r, GeoReferent))) or r.type_name == "ORGANIZATION"): 
             return NamedItemToken._new1758(t, t, r, t.morph)
         return None
     typ = NamedItemToken.__m_types.try_parse(t, TerminParseAttr.NO)
     nam = NamedItemToken.__m_names.try_parse(t, TerminParseAttr.NO)
     if (typ is not None): 
         if (not (isinstance(t, TextToken))): 
             return None
         res = NamedItemToken._new1759(typ.begin_token, typ.end_token, typ.morph, typ.chars)
         res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind))
         res.type_value = typ.termin.canonic_text
         if ((nam is not None and nam.end_token == typ.end_token and not t.chars.is_all_lower) and (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) == res.kind): 
             res.name_value = nam.termin.canonic_text
             res.is_wellknown = True
         return res
     if (nam is not None): 
         if (nam.begin_token.chars.is_all_lower): 
             return None
         res = NamedItemToken._new1759(nam.begin_token, nam.end_token, nam.morph, nam.chars)
         res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
         res.name_value = nam.termin.canonic_text
         ok = True
         if (not t.is_whitespace_before and t.previous is not None): 
             ok = False
         elif (not t.is_whitespace_after and t.next0_ is not None): 
             if (t.next0_.is_char_of(",.;!?") and t.next0_.is_whitespace_after): 
                 pass
             else: 
                 ok = False
         if (ok): 
             res.is_wellknown = True
             res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str))
         return res
     adj = MiscLocationHelper.try_attach_nord_west(t)
     if (adj is not None): 
         if (adj.morph.class0_.is_noun): 
             if (adj.end_token.is_value("ВОСТОК", None)): 
                 if (adj.begin_token == adj.end_token): 
                     return None
                 re = NamedItemToken._new1761(t, adj.end_token, adj.morph)
                 re.kind = NamedEntityKind.LOCATION
                 re.name_value = MiscHelper.get_text_value(t, adj.end_token, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                 re.is_wellknown = True
                 return re
             return None
         if (adj.whitespaces_after_count > 2): 
             return None
         if ((isinstance(adj.end_token.next0_, ReferentToken)) and (isinstance(adj.end_token.next0_.get_referent(), GeoReferent))): 
             re = NamedItemToken._new1761(t, adj.end_token.next0_, adj.end_token.next0_.morph)
             re.kind = NamedEntityKind.LOCATION
             re.name_value = MiscHelper.get_text_value(t, adj.end_token.next0_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
             re.is_wellknown = True
             re.ref = adj.end_token.next0_.get_referent()
             return re
         res = NamedItemToken.try_parse(adj.end_token.next0_, loc_onto)
         if (res is not None and res.kind == NamedEntityKind.LOCATION): 
             s = adj.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, res.morph.gender, False)
             if (s is not None): 
                 if (res.name_value is None): 
                     res.name_value = s.upper()
                 else: 
                     res.name_value = "{0} {1}".format(s.upper(), res.name_value)
                     res.type_value = (None)
                 res.begin_token = t
                 res.chars = t.chars
                 res.is_wellknown = True
                 return res
     if (t.chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence(t)): 
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and len(npt.adjectives) > 0): 
             test = NamedItemToken.try_parse(npt.noun.begin_token, loc_onto)
             if (test is not None and test.end_token == npt.end_token and test.type_value is not None): 
                 test.begin_token = t
                 tmp = io.StringIO()
                 for a in npt.adjectives: 
                     s = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, test.morph.gender, False)
                     if (tmp.tell() > 0): 
                         print(' ', end="", file=tmp)
                     print(s, end="", file=tmp)
                 test.name_value = Utils.toStringStringIO(tmp)
                 test.chars = t.chars
                 if (test.kind == NamedEntityKind.LOCATION): 
                     test.is_wellknown = True
                 return test
     if ((BracketHelper.is_bracket(t, True) and t.next0_ is not None and t.next0_.chars.is_letter) and not t.next0_.chars.is_all_lower): 
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             res = NamedItemToken(t, br.end_token)
             res.is_in_bracket = True
             res.name_value = MiscHelper.get_text_value(t, br.end_token, GetTextAttr.NO)
             nam = NamedItemToken.__m_names.try_parse(t.next0_, TerminParseAttr.NO)
             if (nam is not None and nam.end_token == br.end_token.previous): 
                 res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
                 res.is_wellknown = True
                 res.name_value = nam.termin.canonic_text
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): 
         res = NamedItemToken._new1761(t, t, t.morph)
         str0_ = t.term
         if (str0_.endswith("О") or str0_.endswith("И") or str0_.endswith("Ы")): 
             res.name_value = str0_
         else: 
             res.name_value = t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
         res.chars = t.chars
         if (((not t.is_whitespace_after and t.next0_ is not None and t.next0_.is_hiphen) and (isinstance(t.next0_.next0_, TextToken)) and not t.next0_.next0_.is_whitespace_after) and t.chars.is_cyrillic_letter == t.next0_.next0_.chars.is_cyrillic_letter): 
             res.end_token = t.next0_.next0_
             t = res.end_token
             res.name_value = "{0}-{1}".format(res.name_value, t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False))
         return res
     return None
Ejemplo n.º 11
0
 def __analize_list_items(chi : typing.List['FragToken'], ind : int) -> int:
     if (ind >= len(chi)): 
         return -1
     res = chi[ind]
     ki = res.kind
     if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): 
         pass
     else: 
         return -1
     if (res.has_changes and res.multiline_changes_value is not None): 
         ci = res.multiline_changes_value
         cit = FragToken._new1340(ci.begin_token, ci.end_token, InstrumentKind.CITATION)
         res.children.append(cit)
         if (BracketHelper.is_bracket(cit.begin_token.previous, True)): 
             cit.begin_token = cit.begin_token.previous
         if (BracketHelper.is_bracket(cit.end_token.next0_, True)): 
             cit.end_token = cit.end_token.next0_
             if (cit.end_token.next0_ is not None and cit.end_token.next0_.is_char_of(";.")): 
                 cit.end_token = cit.end_token.next0_
         res.fill_by_content_children()
         if (res.children[0].has_changes): 
             pass
         cit_kind = InstrumentKind.UNDEFINED
         if (isinstance(ci.tag, DecreeChangeReferent)): 
             dcr = Utils.asObjectOrNull(ci.tag, DecreeChangeReferent)
             if (dcr.value is not None and len(dcr.value.new_items) > 0): 
                 mnem = dcr.value.new_items[0]
                 i = 0
                 i = mnem.find(' ')
                 if (((i)) > 0): 
                     mnem = mnem[0:0+i]
                 cit_kind = PartToken._get_instr_kind_by_typ(PartToken._get_type_by_attr_name(mnem))
             elif (len(dcr.owners) > 0 and (isinstance(dcr.owners[0], DecreePartReferent)) and dcr.kind == DecreeChangeKind.NEW): 
                 pat = Utils.asObjectOrNull(dcr.owners[0], DecreePartReferent)
                 min0_ = 0
                 for s in pat.slots: 
                     ty = PartToken._get_type_by_attr_name(s.type_name)
                     if (ty == PartToken.ItemType.UNDEFINED): 
                         continue
                     l_ = PartToken._get_rank(ty)
                     if (l_ == 0): 
                         continue
                     if (l_ > min0_ or min0_ == 0): 
                         min0_ = l_
                         cit_kind = PartToken._get_instr_kind_by_typ(ty)
         sub = None
         if (cit_kind != InstrumentKind.UNDEFINED and cit_kind != InstrumentKind.APPENDIX): 
             sub = FragToken(ci.begin_token, ci.end_token)
             wr = ContentAnalyzeWhapper()
             wr.analyze(sub, None, True, cit_kind)
             sub.kind = InstrumentKind.CONTENT
         else: 
             sub = FragToken.create_document(ci.begin_token, ci.end_char, cit_kind)
         if (sub is None or len(sub.children) == 0): 
             pass
         elif ((sub.kind == InstrumentKind.CONTENT and len(sub.children) > 0 and sub.children[0].begin_token == sub.begin_token) and sub.children[len(sub.children) - 1].end_token == sub.end_token): 
             cit.children.extend(sub.children)
         else: 
             cit.children.append(sub)
         return 1
     end_char = res.end_char
     if (res._itok is None): 
         res._itok = InstrToken1.parse(res.begin_token, True, None, 0, None, False, res.end_char, False, False)
     lines = ListHelper.LineToken.parse_list(res.begin_token, end_char, None)
     if (lines is None or (len(lines) < 1)): 
         return -1
     ret = 1
     if (res.kind == InstrumentKind.CONTENT): 
         j = ind + 1
         while j < len(chi): 
             if (chi[j].kind == InstrumentKind.CONTENT): 
                 lines2 = ListHelper.LineToken.parse_list(chi[j].begin_token, chi[j].end_char, lines[len(lines) - 1])
                 if (lines2 is None or (len(lines2) < 1)): 
                     break
                 if (not lines2[0].is_list_item): 
                     if ((len(lines2) > 1 and lines2[1].is_list_item and lines2[0].end_token.is_char_of(":")) and not lines2[0].begin_token.chars.is_capital_upper): 
                         lines2[0].is_list_item = True
                     else: 
                         break
                 lines.extend(lines2)
                 ret = ((j - ind) + 1)
             elif (chi[j].kind != InstrumentKind.EDITIONS and chi[j].kind != InstrumentKind.COMMENT): 
                 break
             j += 1
     if (len(lines) < 2): 
         return -1
     if ((len(lines) > 1 and lines[0].is_list_item and lines[1].is_list_item) and lines[0].number != 1): 
         if (len(lines) == 2 or not lines[2].is_list_item): 
             lines[1].is_list_item = False
             lines[0].is_list_item = lines[1].is_list_item
     i = 0
     first_pass3276 = True
     while True:
         if first_pass3276: first_pass3276 = False
         else: i += 1
         if (not (i < len(lines))): break
         if (lines[i].is_list_item): 
             if (i > 0 and lines[i - 1].is_list_item): 
                 continue
             if (((i + 1) < len(lines)) and lines[i + 1].is_list_item): 
                 pass
             else: 
                 lines[i].is_list_item = False
                 continue
             j = 0
             new_line = False
             j = (i + 1)
             while j < len(lines): 
                 if (not lines[j].is_list_item): 
                     break
                 elif (lines[j].is_newline_before): 
                     new_line = True
                 j += 1
             if (new_line): 
                 continue
             if (i > 0 and lines[i - 1].end_token.is_char(':')): 
                 continue
             j = i
             while j < len(lines): 
                 if (not lines[j].is_list_item): 
                     break
                 else: 
                     lines[j].is_list_item = False
                 j += 1
     if (len(lines) > 2): 
         last = lines[len(lines) - 1]
         last2 = lines[len(lines) - 2]
         if ((not last.is_list_item and last.end_token.is_char('.') and last2.is_list_item) and last2.end_token.is_char(';')): 
             if ((last.length_char < (last2.length_char * 2)) or last.begin_token.chars.is_all_lower): 
                 last.is_list_item = True
     i = 0
     while i < (len(lines) - 1): 
         if (not lines[i].is_list_item and not lines[i + 1].is_list_item): 
             if (((i + 2) < len(lines)) and lines[i + 2].is_list_item and lines[i + 1].end_token.is_char(':')): 
                 pass
             else: 
                 lines[i].end_token = lines[i + 1].end_token
                 del lines[i + 1]
                 i -= 1
         i += 1
     i = 0
     while i < (len(lines) - 1): 
         if (lines[i].is_list_item): 
             if (lines[i].number == 1): 
                 ok = True
                 num = 1
                 nonum = 0
                 j = i + 1
                 while j < len(lines): 
                     if (not lines[j].is_list_item): 
                         ok = False
                         break
                     elif (lines[j].number > 0): 
                         num += 1
                         if (lines[j].number != num): 
                             ok = False
                             break
                     else: 
                         nonum += 1
                     j += 1
                 if (not ok or nonum == 0 or (num < 2)): 
                     break
                 lt = lines[i]
                 j = i + 1
                 while j < len(lines): 
                     if (lines[j].number > 0): 
                         lt = lines[j]
                     else: 
                         chli = Utils.asObjectOrNull(lt.tag, list)
                         if (chli is None): 
                             chli = list()
                             lt.tag = (chli)
                         lt.end_token = lines[j].end_token
                         chli.append(lines[j])
                         del lines[j]
                         j -= 1
                     j += 1
         i += 1
     cou = 0
     for li in lines: 
         if (li.is_list_item): 
             cou += 1
     if (cou < 2): 
         return -1
     i = 0
     first_pass3277 = True
     while True:
         if first_pass3277: first_pass3277 = False
         else: i += 1
         if (not (i < len(lines))): break
         if (lines[i].is_list_item): 
             i0 = i
             ok = True
             cou = 1
             while i < len(lines): 
                 if (not lines[i].is_list_item): 
                     break
                 elif (lines[i].number != cou): 
                     ok = False
                 i += 1; cou += 1
             if (not ok): 
                 i = i0
                 while i < len(lines): 
                     if (not lines[i].is_list_item): 
                         break
                     else: 
                         lines[i].number = 0
                     i += 1
             if (cou > 3 and lines[i0].begin_token.get_source_text() != lines[i0 + 1].begin_token.get_source_text() and lines[i0 + 1].begin_token.get_source_text() == lines[i0 + 2].begin_token.get_source_text()): 
                 pref = lines[i0 + 1].begin_token.get_source_text()
                 ok = True
                 j = i0 + 2
                 while j < i: 
                     if (pref != lines[j].begin_token.get_source_text()): 
                         ok = False
                         break
                     j += 1
                 if (not ok): 
                     continue
                 tt = None
                 ok = False
                 tt = lines[i0].end_token.previous
                 while tt is not None and tt != lines[i0].begin_token: 
                     if (tt.get_source_text() == pref): 
                         ok = True
                         break
                     tt = tt.previous
                 if (ok): 
                     li0 = ListHelper.LineToken(lines[i0].begin_token, tt.previous)
                     lines[i0].begin_token = tt
                     lines.insert(i0, li0)
                     i += 1
     for li in lines: 
         li.correct_begin_token()
         ch = FragToken._new1357(li.begin_token, li.end_token, (InstrumentKind.LISTITEM if li.is_list_item else InstrumentKind.CONTENT), li.number)
         if (ch.kind == InstrumentKind.CONTENT and ch.end_token.is_char(':')): 
             ch.kind = InstrumentKind.LISTHEAD
         res.children.append(ch)
         chli = Utils.asObjectOrNull(li.tag, list)
         if (chli is not None): 
             for lt in chli: 
                 ch.children.append(FragToken._new1340(lt.begin_token, lt.end_token, InstrumentKind.LISTITEM))
             if (ch.begin_char < ch.children[0].begin_char): 
                 ch.children.insert(0, FragToken._new1340(ch.begin_token, ch.children[0].begin_token.previous, InstrumentKind.CONTENT))
     return ret