Ejemplo n.º 1
0
 def tryAttachCity(li: typing.List['CityItemToken'],
                   ad: 'AnalyzerDataWithOntology',
                   always: bool = False) -> 'ReferentToken':
     if (li is None):
         return None
     if (len(li) > 2 and li[0].typ == CityItemToken.ItemType.MISC
             and li[1].typ == CityItemToken.ItemType.NOUN):
         li[1].doubtful = False
         del li[0]
     wrapoi1099 = RefOutArgWrapper(None)
     res = CityAttachHelper.__try1(li, wrapoi1099, ad)
     oi = wrapoi1099.value
     if (res is None):
         wrapoi1095 = RefOutArgWrapper(None)
         res = CityAttachHelper.__tryNounName(li, wrapoi1095, False)
         oi = wrapoi1095.value
     if (res is None):
         wrapoi1096 = RefOutArgWrapper(None)
         res = CityAttachHelper.__tryNameExist(li, wrapoi1096, False)
         oi = wrapoi1096.value
     if (res is None):
         res = CityAttachHelper.__try4(li)
     if (res is None and always):
         wrapoi1097 = RefOutArgWrapper(None)
         res = CityAttachHelper.__tryNounName(li, wrapoi1097, True)
         oi = wrapoi1097.value
     if (res is None and always):
         if (AddressItemToken.tryAttachOrg(li[0].begin_token) is not None):
             pass
         else:
             wrapoi1098 = RefOutArgWrapper(None)
             res = CityAttachHelper.__tryNameExist(li, wrapoi1098, True)
             oi = wrapoi1098.value
     if (res is None):
         return None
     if (res is not None and res.morph is not None):
         pass
     if (res.begin_token.previous is not None):
         if (res.begin_token.previous.isValue("ТЕРРИТОРИЯ", None)):
             res.begin_token = res.begin_token.previous
         if ((BracketHelper.canBeStartOfSequence(res.begin_token.previous,
                                                 False, False)
              and BracketHelper.canBeEndOfSequence(res.end_token.next0_,
                                                   False, None, False)
              and res.begin_token.previous.previous is not None)
                 and res.begin_token.previous.previous.isValue(
                     "ТЕРРИТОРИЯ", None)):
             res.begin_token = res.begin_token.previous.previous
             res.end_token = res.end_token.next0_
     return res
Ejemplo n.º 2
0
 def __checkAttach(self, begin: 'Token', end: 'Token') -> bool:
     t = begin
     while t is not None and t != end.next0_:
         if (t != begin):
             co = t.whitespaces_before_count
             if (co > 0):
                 if (co > 1):
                     return False
                 if (t.chars.is_all_lower):
                     return False
                 if (t.previous.chars.is_all_lower):
                     return False
         t = t.next0_
     if (not end.is_whitespace_after and end.next0_ is not None):
         if (not end.next0_.isCharOf(",;")
                 and not BracketHelper.canBeEndOfSequence(
                     end.next0_, False, None, False)):
             return False
     return True
Ejemplo n.º 3
0
 def __tryAttachPureTerr(li: typing.List['TerrItemToken'],
                         ad: 'AnalyzerData') -> 'ReferentToken':
     aid = None
     t = li[0].end_token.next0_
     if (t is None):
         return None
     tt = t
     if (BracketHelper.canBeStartOfSequence(tt, True, False)):
         tt = tt.next0_
     if (len(li) > 1):
         tmp = list(li)
         del tmp[0]
         rt0 = TerrAttachHelper.tryAttachTerritory(tmp, ad, False, None,
                                                   None)
         if (rt0 is None and len(tmp) == 2):
             if (((tmp[0].termin_item is None
                   and tmp[1].termin_item is not None))
                     or ((tmp[0].termin_item is not None
                          and tmp[1].termin_item is None))):
                 if (aid is None):
                     rt0 = TerrAttachHelper.tryAttachTerritory(
                         tmp, ad, True, None, None)
         if (rt0 is not None):
             if ((rt0.referent).is_state):
                 return None
             rt0.begin_token = li[0].begin_token
             return rt0
     if (aid is None):
         aid = AddressItemToken.tryAttachOrg(tt)
     if (aid is not None):
         rt = aid.createGeoOrgTerr()
         if (rt is None):
             return None
         rt.begin_token = li[0].begin_token
         t1 = rt.end_token
         if (tt != t and BracketHelper.canBeEndOfSequence(
                 t1.next0_, False, None, False)):
             t1 = t1.next0_
             rt.end_token = t1
         return rt
     return None
Ejemplo n.º 4
0
 def tryAttach(t : 'Token') -> 'TitleItemToken':
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is not None): 
         t1 = tt
         if (tt.term == "ТЕМА"): 
             tit = TitleItemToken.tryAttach(tt.next0_)
             if (tit is not None and tit.typ == TitleItemToken.Types.TYP): 
                 t1 = tit.end_token
                 if (t1.next0_ is not None and t1.next0_.isChar(':')): 
                     t1 = t1.next0_
                 return TitleItemToken._new2501(t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value)
             if (tt.next0_ is not None and tt.next0_.isChar(':')): 
                 t1 = tt.next0_
             return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПО" or tt.term == "НА"): 
             if (tt.next0_ is not None and tt.next0_.isValue("ТЕМА", None)): 
                 t1 = tt.next0_
                 if (t1.next0_ is not None and t1.next0_.isChar(':')): 
                     t1 = t1.next0_
                 return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"): 
             tt2 = tt.next0_
             if (tt2 is not None and tt2.isChar('.')): 
                 tt2 = tt2.next0_
             if (isinstance(tt2, TextToken)): 
                 if ((tt2).term == "C" or (tt2).term == "С"): 
                     tt2 = tt2.next0_
                     if (isinstance(tt2, TextToken)): 
                         return TitleItemToken(t, tt2, TitleItemToken.Types.TRANSLATE)
         if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION" or tt.term == "СЕКЦІЯ"): 
             t1 = tt.next0_
             if (t1 is not None and t1.isChar(':')): 
                 t1 = t1.next0_
             br = BracketHelper.tryParse(t1, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t1 = br.end_token
             elif (t1 != tt.next0_): 
                 while t1 is not None: 
                     if (t1.is_newline_after): 
                         break
                     t1 = t1.next0_
                 if (t1 is None): 
                     return None
             if (t1 != tt.next0_): 
                 return TitleItemToken(tt, t1, TitleItemToken.Types.DUST)
         t1 = (None)
         if (tt.isValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): 
             t1 = tt.next0_
         elif (tt.morph.class0_.is_preposition and tt.next0_ is not None and tt.next0_.isValue("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): 
             t1 = tt.next0_.next0_
         elif (tt.isChar('/') and tt.is_newline_before): 
             t1 = tt.next0_
         if (t1 is not None): 
             if (t1.isCharOf(":") or t1.is_hiphen): 
                 t1 = t1.next0_
             spec = TitleItemToken.__tryAttachSpeciality(t1, True)
             if (spec is not None): 
                 spec.begin_token = t
                 return spec
     sss = TitleItemToken.__tryAttachSpeciality(t, False)
     if (sss is not None): 
         return sss
     if (isinstance(t, ReferentToken)): 
         return None
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is not None): 
         s = npt.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)
         tok = TitleItemToken.M_TERMINS.tryParse(npt.end_token, TerminParseAttr.NO)
         if (tok is not None): 
             ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types)
             if (ty == TitleItemToken.Types.TYP): 
                 tit = TitleItemToken.tryAttach(tok.end_token.next0_)
                 if (tit is not None and tit.typ == TitleItemToken.Types.THEME): 
                     return TitleItemToken._new2501(npt.begin_token, tit.end_token, TitleItemToken.Types.TYPANDTHEME, s)
                 if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"): 
                     return None
                 t1 = tok.end_token
                 if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"): 
                     err = 0
                     ttt = t1.next0_
                     first_pass3125 = True
                     while True:
                         if first_pass3125: first_pass3125 = False
                         else: ttt = ttt.next0_
                         if (not (ttt is not None)): break
                         if (ttt.morph.class0_.is_preposition): 
                             continue
                         if (ttt.isValue("СОИСКАНИЕ", "")): 
                             continue
                         npt1 = NounPhraseHelper.tryParse(ttt, NounPhraseParseAttr.NO, 0)
                         if (npt1 is not None and npt1.noun.isValue("СТЕПЕНЬ", "СТУПІНЬ")): 
                             ttt = npt1.end_token
                             t1 = ttt
                             continue
                         rt = t1.kit.processReferent("PERSON", ttt)
                         if (rt is not None and (isinstance(rt.referent, PersonPropertyReferent))): 
                             ppr = Utils.asObjectOrNull(rt.referent, PersonPropertyReferent)
                             if (ppr.name == "доктор наук"): 
                                 t1 = rt.end_token
                                 s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "кандидат наук"): 
                                 t1 = rt.end_token
                                 s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "магистр"): 
                                 t1 = rt.end_token
                                 s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                         if (ttt.isValue("ДОКТОР", None) or ttt.isValue("КАНДИДАТ", None) or ttt.isValue("МАГИСТР", "МАГІСТР")): 
                             t1 = ttt
                             npt1 = NounPhraseHelper.tryParse(ttt.next0_, NounPhraseParseAttr.NO, 0)
                             if (npt1 is not None and npt1.end_token.isValue("НАУК", None)): 
                                 t1 = npt1.end_token
                             s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" if ttt.isValue("МАГИСТР", "МАГІСТР") else ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.isValue("ДОКТОР", None) else "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"))
                             break
                         err += 1
                         if ((err) > 3): 
                             break
                 if (t1.next0_ is not None and t1.next0_.isChar('.')): 
                     t1 = t1.next0_
                 if (s.endswith("ОТЧЕТ") and t1.next0_ is not None and t1.next0_.isValue("О", None)): 
                     npt1 = NounPhraseHelper.tryParse(t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0)
                     if (npt1 is not None and npt1.morph.case_.is_prepositional): 
                         t1 = npt1.end_token
                 return TitleItemToken._new2501(npt.begin_token, t1, ty, s)
     tok1 = TitleItemToken.M_TERMINS.tryParse(t, TerminParseAttr.NO)
     if (tok1 is not None): 
         t1 = tok1.end_token
         re = TitleItemToken(tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
         return re
     if (BracketHelper.canBeStartOfSequence(t, False, False)): 
         tok1 = TitleItemToken.M_TERMINS.tryParse(t.next0_, TerminParseAttr.NO)
         if (tok1 is not None and BracketHelper.canBeEndOfSequence(tok1.end_token.next0_, False, None, False)): 
             t1 = tok1.end_token.next0_
             return TitleItemToken(tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
     return None
Ejemplo n.º 5
0
 def __getNameWithoutBrackets(begin: 'Token',
                              end: 'Token',
                              normalize_first_noun_group: bool = False,
                              normal_first_group_single: bool = False,
                              ignore_geo_referent: bool = False) -> str:
     """ Получить строковое значение между токенами, при этом исключая кавычки и скобки
     
     Args:
         begin(Token): начальный токен
         end(Token): конечный токен
         normalize_first_noun_group(bool): нормализовывать ли первую именную группу (именит. падеж)
         normal_first_group_single(bool): приводить ли к единственному числу первую именную группу
         ignore_geo_referent(bool): игнорировать внутри географические сущности
     
     """
     res = None
     if (BracketHelper.canBeStartOfSequence(begin, False, False) and
             BracketHelper.canBeEndOfSequence(end, False, begin, False)):
         begin = begin.next0_
         end = end.previous
     if (normalize_first_noun_group
             and not begin.morph.class0_.is_preposition):
         npt = NounPhraseHelper.tryParse(
             begin, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
         if (npt is not None):
             if (npt.noun.getMorphClassInDictionary().is_undefined
                     and len(npt.adjectives) == 0):
                 npt = (None)
         if (npt is not None and npt.end_token.end_char > end.end_char):
             npt = (None)
         if (npt is not None):
             res = npt.getNormalCaseText(None, normal_first_group_single,
                                         MorphGender.UNDEFINED, False)
             te = npt.end_token.next0_
             if (((te is not None and te.next0_ is not None and te.is_comma)
                  and (isinstance(te.next0_, TextToken))
                  and te.next0_.end_char <= end.end_char)
                     and te.next0_.morph.class0_.is_verb
                     and te.next0_.morph.class0_.is_adjective):
                 for it in te.next0_.morph.items:
                     if (it.gender == npt.morph.gender
                             or (((it.gender) & (npt.morph.gender))) !=
                         (MorphGender.UNDEFINED)):
                         if (not (
                             (it.case_) & npt.morph.case_).is_undefined):
                             if (it.number == npt.morph.number or
                                 (((it.number) & (npt.morph.number))) !=
                                 (MorphNumber.UNDEFINED)):
                                 var = (te.next0_).term
                                 if (isinstance(it, MorphWordForm)):
                                     var = (it).normal_case
                                 bi = MorphBaseInfo._new549(
                                     MorphClass.ADJECTIVE, npt.morph.gender,
                                     npt.morph.number, npt.morph.language)
                                 var = Morphology.getWordform(var, bi)
                                 if (var is not None):
                                     res = "{0}, {1}".format(res, var)
                                     te = te.next0_.next0_
                                 break
             if (te is not None and te.end_char <= end.end_char):
                 s = ProperNameHelper.getNameEx(te, end,
                                                MorphClass.UNDEFINED,
                                                MorphCase.UNDEFINED,
                                                MorphGender.UNDEFINED, True,
                                                ignore_geo_referent)
                 if (not Utils.isNullOrEmpty(s)):
                     if (not str.isalnum(s[0])):
                         res = "{0}{1}".format(res, s)
                     else:
                         res = "{0} {1}".format(res, s)
         elif ((isinstance(begin, TextToken))
               and begin.chars.is_cyrillic_letter):
             mm = begin.getMorphClassInDictionary()
             if (not mm.is_undefined):
                 res = begin.getNormalCaseText(mm, False,
                                               MorphGender.UNDEFINED, False)
                 if (begin.end_char < end.end_char):
                     res = "{0} {1}".format(
                         res,
                         ProperNameHelper.getNameEx(begin.next0_, end,
                                                    MorphClass.UNDEFINED,
                                                    MorphCase.UNDEFINED,
                                                    MorphGender.UNDEFINED,
                                                    True, False))
     if (res is None):
         res = ProperNameHelper.getNameEx(begin, end, MorphClass.UNDEFINED,
                                          MorphCase.UNDEFINED,
                                          MorphGender.UNDEFINED, True,
                                          ignore_geo_referent)
     if (not Utils.isNullOrEmpty(res)):
         k = 0
         i = len(res) - 1
         while i >= 0:
             if (res[i] == '*' or Utils.isWhitespace(res[i])):
                 pass
             else:
                 break
             i -= 1
             k += 1
         if (k > 0):
             if (k == len(res)):
                 return None
             res = res[0:0 + len(res) - k]
     return res
Ejemplo n.º 6
0
 def tryParseList(t: 'Token',
                  max_count: int = 10) -> typing.List['WeaponItemToken']:
     tr = WeaponItemToken.tryParse(t, None, False, False)
     if (tr is None):
         return None
     if (tr.typ == WeaponItemToken.Typs.CLASS
             or tr.typ == WeaponItemToken.Typs.DATE):
         return None
     tr0 = tr
     res = list()
     if (len(tr.__inner_tokens) > 0):
         res.extend(tr.__inner_tokens)
         if (res[0].begin_char > tr.begin_char):
             res[0].begin_token = tr.begin_token
     res.append(tr)
     t = tr.end_token.next0_
     if (tr.typ == WeaponItemToken.Typs.NOUN):
         while t is not None:
             if (t.isChar(':') or t.is_hiphen):
                 pass
             else:
                 break
             t = t.next0_
     and_conj = False
     first_pass3155 = True
     while True:
         if first_pass3155: first_pass3155 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_count > 0 and len(res) >= max_count):
             break
         if (t.isChar(':')):
             continue
         if (tr0.typ == WeaponItemToken.Typs.NOUN):
             if (t.is_hiphen and t.next0_ is not None):
                 t = t.next0_
         tr = WeaponItemToken.tryParse(t, tr0, False, False)
         if (tr is None):
             if (BracketHelper.canBeEndOfSequence(t, True, None, False)
                     and t.next0_ is not None):
                 if (tr0.typ == WeaponItemToken.Typs.MODEL
                         or tr0.typ == WeaponItemToken.Typs.BRAND):
                     tt1 = t.next0_
                     if (tt1 is not None and tt1.is_comma):
                         tt1 = tt1.next0_
                     tr = WeaponItemToken.tryParse(tt1, tr0, False, False)
         if (tr is None and (isinstance(t, ReferentToken))):
             rt = Utils.asObjectOrNull(t, ReferentToken)
             if (rt.begin_token == rt.end_token
                     and (isinstance(rt.begin_token, TextToken))):
                 tr = WeaponItemToken.tryParse(rt.begin_token, tr0, False,
                                               False)
                 if (tr is not None and tr.begin_token == tr.end_token):
                     tr.begin_token = tr.end_token = t
         if (tr is None and t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 tt = br.end_token.next0_
                 if (tt is not None and tt.is_comma):
                     tt = tt.next0_
                 tr = WeaponItemToken.tryParse(tt, tr0, False, False)
                 if (tr is not None
                         and tr.typ == WeaponItemToken.Typs.NUMBER):
                     pass
                 else:
                     tr = (None)
         if (tr is None and t.is_hiphen):
             if (tr0.typ == WeaponItemToken.Typs.BRAND
                     or tr0.typ == WeaponItemToken.Typs.MODEL):
                 tr = WeaponItemToken.tryParse(t.next0_, tr0, False, False)
         if (tr is None and t.is_comma):
             if ((tr0.typ == WeaponItemToken.Typs.NAME
                  or tr0.typ == WeaponItemToken.Typs.BRAND
                  or tr0.typ == WeaponItemToken.Typs.MODEL)
                     or tr0.typ == WeaponItemToken.Typs.CLASS
                     or tr0.typ == WeaponItemToken.Typs.DATE):
                 tr = WeaponItemToken.tryParse(t.next0_, tr0, True, False)
                 if (tr is not None):
                     if (tr.typ == WeaponItemToken.Typs.NUMBER):
                         pass
                     else:
                         tr = (None)
         if (tr is None):
             break
         if (t.is_newline_before):
             if (tr.typ != WeaponItemToken.Typs.NUMBER):
                 break
         if (len(tr.__inner_tokens) > 0):
             res.extend(tr.__inner_tokens)
         res.append(tr)
         tr0 = tr
         t = tr.end_token
         if (and_conj):
             break
     i = 0
     while i < (len(res) - 1):
         if (res[i].typ == WeaponItemToken.Typs.MODEL
                 and res[i + 1].typ == WeaponItemToken.Typs.MODEL):
             res[i].end_token = res[i + 1].end_token
             res[i].value = "{0}{1}{2}".format(
                 res[i].value,
                 ('-' if res[i].end_token.next0_ is not None
                  and res[i].end_token.next0_.is_hiphen else ' '),
                 res[i + 1].value)
             del res[i + 1]
             i -= 1
         i += 1
     return res
Ejemplo n.º 7
0
 def tryAttach(t: 'Token',
               p1: 'InstrumentParticipant' = None,
               p2: 'InstrumentParticipant' = None,
               is_contract: bool = False) -> 'ParticipantToken':
     if (t is None):
         return None
     tt = t
     br = False
     if (p1 is None and p2 is None and is_contract):
         r1 = t.getReferent()
         if ((r1 is not None and t.next0_ is not None
              and t.next0_.is_comma_and)
                 and (isinstance(t.next0_.next0_, ReferentToken))):
             r2 = t.next0_.next0_.getReferent()
             if (r1.type_name == r2.type_name):
                 ttt = t.next0_.next0_.next0_
                 refs = list()
                 refs.append(r1)
                 refs.append(r2)
                 first_pass3014 = True
                 while True:
                     if first_pass3014: first_pass3014 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if ((ttt.is_comma_and and ttt.next0_ is not None
                          and ttt.next0_.getReferent() is not None)
                             and ttt.next0_.getReferent().type_name
                             == r1.type_name):
                         ttt = ttt.next0_
                         if (not ttt.getReferent() in refs):
                             refs.append(ttt.getReferent())
                         continue
                     break
                 first_pass3015 = True
                 while True:
                     if first_pass3015: first_pass3015 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_comma or ttt.morph.class0_.is_preposition):
                         continue
                     if ((ttt.isValue("ИМЕНОВАТЬ", None)
                          or ttt.isValue("ДАЛЬНЕЙШИЙ", None)
                          or ttt.isValue("ДАЛЕЕ", None))
                             or ttt.isValue("ТЕКСТ", None)):
                         continue
                     if (ttt.isValue("ДОГОВАРИВАТЬСЯ", None)):
                         continue
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None
                             and npt.noun.isValue("СТОРОНА", None)
                             and npt.morph.number != MorphNumber.SINGULAR):
                         re = ParticipantToken._new1467(
                             t, npt.end_token,
                             ParticipantToken.Kinds.NAMEDASPARTS)
                         re.parts = refs
                         return re
                     break
         if ((isinstance(r1, OrganizationReferent))
                 or (isinstance(r1, PersonReferent))):
             has_br = False
             has_named = False
             if (isinstance(r1, PersonReferent)):
                 if (t.previous is not None
                         and t.previous.isValue("ЛИЦО", None)):
                     return None
             elif (t.previous is not None
                   and ((t.previous.isValue("ВЫДАВАТЬ", None)
                         or t.previous.isValue("ВЫДАТЬ", None)))):
                 return None
             ttt = (t).begin_token
             while ttt is not None and (ttt.end_char < t.end_char):
                 if (ttt.isChar('(')):
                     has_br = True
                 elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue(
                         "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None))
                       or ttt.isValue("ТЕКСТ", None)):
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition
                        or ttt.is_hiphen) or ttt.isChar(':')):
                     pass
                 elif (isinstance(ttt, ReferentToken)):
                     pass
                 elif (has_br or has_named):
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
                     if (npt is None):
                         break
                     if (has_br):
                         if (npt.end_token.next0_ is None
                                 or not npt.end_token.next0_.isChar(')')):
                             break
                     if (not has_named):
                         if (ParticipantToken.M_ONTOLOGY.tryParse(
                                 ttt, TerminParseAttr.NO) is None):
                             break
                     re = ParticipantToken._new1467(
                         t, t, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = npt.getNormalCaseText(None, True,
                                                    MorphGender.UNDEFINED,
                                                    False)
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 ttt = ttt.next0_
             has_br = False
             has_named = False
             end_side = None
             brr = None
             add_refs = None
             ttt = t.next0_
             first_pass3016 = True
             while True:
                 if first_pass3016: first_pass3016 = False
                 else: ttt = ttt.next0_
                 if (not (ttt is not None)): break
                 if ((isinstance(ttt, NumberToken))
                         and (isinstance(ttt.next0_, TextToken))
                         and (ttt.next0_).term == "СТОРОНЫ"):
                     ttt = ttt.next0_
                     end_side = ttt
                     if (ttt.next0_ is not None and ttt.next0_.is_comma):
                         ttt = ttt.next0_
                     if (ttt.next0_ is not None and ttt.next0_.is_and):
                         break
                 if (brr is not None and ttt.begin_char > brr.end_char):
                     brr = (None)
                 if (BracketHelper.canBeStartOfSequence(ttt, False, False)):
                     brr = BracketHelper.tryParse(ttt, BracketParseAttr.NO,
                                                  100)
                     if (brr is not None and (brr.length_char < 7)
                             and ttt.isChar('(')):
                         ttt = brr.end_token
                         brr = (None)
                         continue
                 elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue(
                         "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None))
                       or ttt.isValue("ТЕКСТ", None)):
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition
                        or ttt.is_hiphen) or ttt.isChar(':')):
                     pass
                 elif (brr is not None or has_named):
                     if (BracketHelper.canBeStartOfSequence(
                             ttt, True, False)):
                         ttt = ttt.next0_
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
                     typ22 = None
                     if (npt is not None):
                         ttt = npt.end_token
                         if (npt.end_token.isValue("ДОГОВОР", None)):
                             continue
                     else:
                         ttok = None
                         if (isinstance(ttt, MetaToken)):
                             ttok = ParticipantToken.M_ONTOLOGY.tryParse(
                                 (ttt).begin_token, TerminParseAttr.NO)
                         if (ttok is not None):
                             typ22 = ttok.termin.canonic_text
                         elif (has_named
                               and ttt.morph.class0_.is_adjective):
                             typ22 = ttt.getNormalCaseText(
                                 MorphClass.ADJECTIVE, False,
                                 MorphGender.UNDEFINED, False)
                         elif (brr is not None):
                             continue
                         else:
                             break
                     if (BracketHelper.canBeEndOfSequence(
                             ttt.next0_, True, None, False)):
                         ttt = ttt.next0_
                     if (brr is not None):
                         if (ttt.next0_ is None):
                             ttt = brr.end_token
                             continue
                         ttt = ttt.next0_
                     if (not has_named and typ22 is None):
                         if (ParticipantToken.M_ONTOLOGY.tryParse(
                                 npt.begin_token, TerminParseAttr.NO) is
                                 None):
                             break
                     re = ParticipantToken._new1467(
                         t, ttt, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = (Utils.ifNotNull(
                         typ22,
                         npt.getNormalCaseText(None, True,
                                               MorphGender.UNDEFINED,
                                               False)))
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 elif ((ttt.isValue("ЗАРЕГИСТРИРОВАННЫЙ", None)
                        or ttt.isValue("КАЧЕСТВО", None)
                        or ttt.isValue("ПРОЖИВАЮЩИЙ", None))
                       or ttt.isValue("ЗАРЕГ", None)):
                     pass
                 elif (ttt.getReferent() == r1):
                     pass
                 elif (
                     (isinstance(ttt.getReferent(), PersonIdentityReferent))
                         or
                     (isinstance(ttt.getReferent(), AddressReferent))):
                     if (add_refs is None):
                         add_refs = list()
                     add_refs.append(ttt.getReferent())
                 else:
                     prr = ttt.kit.processReferent("PERSONPROPERTY", ttt)
                     if (prr is not None):
                         ttt = prr.end_token
                         continue
                     if (isinstance(ttt.getReferent(), GeoReferent)):
                         continue
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None):
                         if ((npt.noun.isValue("МЕСТО", None)
                              or npt.noun.isValue("ЖИТЕЛЬСТВО", None)
                              or npt.noun.isValue("ПРЕДПРИНИМАТЕЛЬ", None))
                                 or npt.noun.isValue("ПОЛ", None)
                                 or npt.noun.isValue("РОЖДЕНИЕ", None)):
                             ttt = npt.end_token
                             continue
                     if (ttt.is_newline_before):
                         break
                     if (ttt.length_char < 3):
                         continue
                     mc = ttt.getMorphClassInDictionary()
                     if (mc.is_adverb or mc.is_adjective):
                         continue
                     if (ttt.chars.is_all_upper):
                         continue
                     break
             if (end_side is not None
                     or ((add_refs is not None and t.previous is not None
                          and t.previous.is_and))):
                 re = ParticipantToken._new1467(
                     t, Utils.ifNotNull(end_side, t),
                     ParticipantToken.Kinds.NAMEDAS)
                 re.typ = (None)
                 re.parts = list()
                 re.parts.append(r1)
                 if (add_refs is not None):
                     re.parts.extend(add_refs)
                 return re
         too = ParticipantToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO)
         if (too is not None):
             if ((isinstance(t.previous, TextToken))
                     and t.previous.isValue("ЛИЦО", None)):
                 too = (None)
         if (too is not None and too.termin.tag is not None
                 and too.termin.canonic_text != "СТОРОНА"):
             tt1 = too.end_token.next0_
             if (tt1 is not None):
                 if (tt1.is_hiphen or tt1.isChar(':')):
                     tt1 = tt1.next0_
             if (isinstance(tt1, ReferentToken)):
                 r1 = tt1.getReferent()
                 if ((isinstance(r1, PersonReferent))
                         or (isinstance(r1, OrganizationReferent))):
                     re = ParticipantToken._new1467(
                         t, tt1, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = too.termin.canonic_text
                     re.parts = list()
                     re.parts.append(r1)
                     return re
     add_typ1 = (None if p1 is None else p1.typ)
     add_typ2 = (None if p2 is None else p2.typ)
     if (BracketHelper.canBeStartOfSequence(tt, False, False)
             and tt.next0_ is not None):
         br = True
         tt = tt.next0_
     term1 = None
     term2 = None
     if (add_typ1 is not None and add_typ1.find(' ') > 0
             and not add_typ1.startswith("СТОРОНА")):
         term1 = Termin(add_typ1)
     if (add_typ2 is not None and add_typ2.find(' ') > 0
             and not add_typ2.startswith("СТОРОНА")):
         term2 = Termin(add_typ2)
     named = False
     typ_ = None
     t1 = None
     t0 = tt
     first_pass3017 = True
     while True:
         if first_pass3017: first_pass3017 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition and typ_ is not None):
             continue
         if (tt.isCharOf("(:)") or tt.is_hiphen):
             continue
         if (tt.is_table_control_char):
             break
         if (tt.is_newline_before and tt != t0):
             if (isinstance(tt, NumberToken)):
                 break
             if ((isinstance(tt, TextToken))
                     and (isinstance(tt.previous, TextToken))):
                 if (tt.previous.isValue((tt).term, None)):
                     break
         if (BracketHelper.isBracket(tt, False)):
             continue
         tok = (ParticipantToken.M_ONTOLOGY.tryParse(
             tt, TerminParseAttr.NO)
                if ParticipantToken.M_ONTOLOGY is not None else None)
         if (tok is not None and (isinstance(tt.previous, TextToken))):
             if (tt.previous.isValue("ЛИЦО", None)):
                 return None
         if (tok is None):
             if (add_typ1 is not None
                     and ((MiscHelper.isNotMoreThanOneError(add_typ1, tt) or
                           ((((isinstance(tt, MetaToken))) and
                             (tt).begin_token.isValue(add_typ1, None)))))):
                 if (typ_ is not None):
                     if (not ParticipantToken.__isTypesEqual(
                             add_typ1, typ_)):
                         break
                 typ_ = add_typ1
                 t1 = tt
                 continue
             if (add_typ2 is not None
                     and ((MiscHelper.isNotMoreThanOneError(add_typ2, tt) or
                           ((((isinstance(tt, MetaToken))) and
                             (tt).begin_token.isValue(add_typ2, None)))))):
                 if (typ_ is not None):
                     if (not ParticipantToken.__isTypesEqual(
                             add_typ2, typ_)):
                         break
                 typ_ = add_typ2
                 t1 = tt
                 continue
             if (tt.chars.is_letter):
                 if (term1 is not None):
                     tok1 = term1.tryParse(tt, TerminParseAttr.NO)
                     if (tok1 is not None):
                         if (typ_ is not None):
                             if (not ParticipantToken.__isTypesEqual(
                                     add_typ1, typ_)):
                                 break
                         typ_ = add_typ1
                         tt = tok1.end_token
                         t1 = tt
                         continue
                 if (term2 is not None):
                     tok2 = term2.tryParse(tt, TerminParseAttr.NO)
                     if (tok2 is not None):
                         if (typ_ is not None):
                             if (not ParticipantToken.__isTypesEqual(
                                     add_typ2, typ_)):
                                 break
                         typ_ = add_typ2
                         tt = tok2.end_token
                         t1 = tt
                         continue
                 if (named and tt.getMorphClassInDictionary().is_noun):
                     if (not tt.chars.is_all_lower
                             or BracketHelper.isBracket(tt.previous, True)):
                         if (DecreeToken.isKeyword(tt, False) is None):
                             val = tt.getNormalCaseText(
                                 MorphClass.NOUN, True,
                                 MorphGender.UNDEFINED, False)
                             if (typ_ is not None):
                                 if (not ParticipantToken.__isTypesEqual(
                                         typ_, val)):
                                     break
                             typ_ = val
                             t1 = tt
                             continue
             if (named and typ_ is None and is_contract):
                 if ((isinstance(tt, TextToken))
                         and tt.chars.is_cyrillic_letter
                         and tt.chars.is_capital_upper):
                     dc = tt.getMorphClassInDictionary()
                     if (dc.is_undefined or dc.is_noun):
                         dt = DecreeToken.tryAttach(tt, None, False)
                         ok = True
                         if (dt is not None):
                             ok = False
                         elif (tt.isValue("СТОРОНА", None)):
                             ok = False
                         if (ok):
                             typ_ = (tt).getLemma()
                             t1 = tt
                             continue
                     if (dc.is_adjective):
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is not None and len(npt.adjectives) > 0
                                 and npt.noun.getMorphClassInDictionary(
                                 ).is_noun):
                             typ_ = npt.getNormalCaseText(
                                 None, True, MorphGender.UNDEFINED, False)
                             t1 = npt.end_token
                             continue
             if (tt == t):
                 break
             if ((isinstance(tt, NumberToken)) or tt.isChar('.')):
                 break
             if (tt.length_char < 4):
                 if (typ_ is not None):
                     continue
             break
         if (tok.termin.tag is None):
             named = True
         else:
             if (typ_ is not None):
                 break
             if (tok.termin.canonic_text == "СТОРОНА"):
                 tt1 = tt.next0_
                 if (tt1 is not None and tt1.is_hiphen):
                     tt1 = tt1.next0_
                 if (not ((isinstance(tt1, NumberToken)))):
                     break
                 if (tt1.is_newline_before):
                     break
                 typ_ = "{0} {1}".format(tok.termin.canonic_text,
                                         (tt1).value)
                 t1 = tt1
             else:
                 typ_ = tok.termin.canonic_text
                 t1 = tok.end_token
             break
         tt = tok.end_token
     if (typ_ is None):
         return None
     if (not named and t1 != t and not typ_.startswith("СТОРОНА")):
         if (not ParticipantToken.__isTypesEqual(typ_, add_typ1)
                 and not ParticipantToken.__isTypesEqual(typ_, add_typ2)):
             return None
     if (BracketHelper.canBeEndOfSequence(t1.next0_, False, None, False)):
         t1 = t1.next0_
         if (not t.is_whitespace_before
                 and BracketHelper.canBeStartOfSequence(
                     t.previous, False, False)):
             t = t.previous
     elif (BracketHelper.canBeStartOfSequence(t, False, False)
           and BracketHelper.canBeEndOfSequence(t1.next0_, True, t, True)):
         t1 = t1.next0_
     if (br and t1.next0_ is not None and BracketHelper.canBeEndOfSequence(
             t1.next0_, False, None, False)):
         t1 = t1.next0_
     res = ParticipantToken._new1472(
         t, t1, (ParticipantToken.Kinds.NAMEDAS
                 if named else ParticipantToken.Kinds.PURE), typ_)
     if (t.isChar(':')):
         res.begin_token = t.next0_
     return res
Ejemplo n.º 8
0
 def __TryAttach(t: 'Token',
                 prev: typing.List['DateItemToken']) -> 'DateItemToken':
     from pullenti.ner.measure.internal.MeasureToken import MeasureToken
     if (t is None):
         return None
     nt = Utils.asObjectOrNull(t, NumberToken)
     begin = t
     end = t
     is_in_brack = False
     if ((BracketHelper.canBeStartOfSequence(t, False, False)
          and t.next0_ is not None and (isinstance(t.next0_, NumberToken)))
             and BracketHelper.canBeEndOfSequence(t.next0_.next0_, False,
                                                  None, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if ((t.is_newline_before and BracketHelper.isBracket(t, False) and
          (isinstance(t.next0_, NumberToken)))
             and BracketHelper.isBracket(t.next0_.next0_, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if (nt is not None):
         if (nt.int_value is None):
             return None
         if (nt.typ == NumberSpellingType.WORDS):
             if (nt.morph.class0_.is_noun
                     and not nt.morph.class0_.is_adjective):
                 if (t.next0_ is not None
                         and ((t.next0_.isValue("КВАРТАЛ", None)
                               or t.next0_.isValue("ПОЛУГОДИЕ", None)
                               or t.next0_.isValue("ПІВРІЧЧЯ", None)))):
                     pass
                 else:
                     return None
         if (NumberHelper.tryParseAge(nt) is not None):
             return None
         res = DateItemToken._new653(begin, end,
                                     DateItemToken.DateItemType.NUMBER,
                                     nt.int_value, nt.morph)
         if ((res.int_value == 20 and
              (isinstance(nt.next0_, NumberToken)) and
              (nt.next0_).int_value is not None)
                 and nt.next0_.length_char == 2 and prev is not None):
             num = 2000 + (nt.next0_).int_value
             if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ
                     == DateItemToken.DateItemType.MONTH):
                 ok = False
                 if (nt.whitespaces_after_count == 1):
                     ok = True
                 elif (nt.is_newline_after and nt.is_newline_after):
                     ok = True
                 if (ok):
                     nt = (Utils.asObjectOrNull(nt.next0_, NumberToken))
                     res.end_token = nt
                     res.int_value = num
         if (res.int_value == 20 or res.int_value == 201):
             tt = t.next0_
             if (tt is not None and tt.isChar('_')):
                 while tt is not None:
                     if (not tt.isChar('_')):
                         break
                     tt = tt.next0_
                 tt = DateItemToken.__testYearRusWord(tt, False)
                 if (tt is not None):
                     res.int_value = 0
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.YEAR
                     return res
         if (res.int_value <= 12 and t.next0_ is not None
                 and (t.whitespaces_after_count < 3)):
             tt = t.next0_
             if (tt.isValue("ЧАС", None)):
                 if (((isinstance(t.previous, TextToken))
                      and not t.previous.chars.is_letter
                      and not t.is_whitespace_before)
                         and (isinstance(t.previous.previous, NumberToken))
                         and not t.previous.is_whitespace_before):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = tt
                     tt = tt.next0_
                     if (tt is not None and tt.isChar('.')):
                         res.end_token = tt
                         tt = tt.next0_
             first_pass2816 = True
             while True:
                 if first_pass2816: first_pass2816 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.isValue("УТРО", "РАНОК")):
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("ВЕЧЕР", "ВЕЧІР")):
                     res.end_token = tt
                     res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("ДЕНЬ", None)):
                     res.end_token = tt
                     if (res.int_value < 10):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("НОЧЬ", "НІЧ")):
                     res.end_token = tt
                     if (res.int_value == 12):
                         res.int_value = 0
                     elif (res.int_value > 9):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_comma or tt.morph.class0_.is_adverb):
                     continue
                 break
             if (res.typ == DateItemToken.DateItemType.HOUR):
                 return res
         can_be_year_ = True
         if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ
                 == DateItemToken.DateItemType.MONTH):
             pass
         elif ((prev is not None and len(prev) >= 4 and
                prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM)
               and prev[len(prev) - 2].can_by_month):
             pass
         elif (nt.next0_ is not None
               and ((nt.next0_.isValue("ГОД", None)
                     or nt.next0_.isValue("РІК", None)))):
             if (res.int_value < 1000):
                 can_be_year_ = False
         tt = DateItemToken.__testYearRusWord(nt.next0_, False)
         if (tt is not None and DateItemToken.__isNewAge(tt.next0_)):
             res.typ = DateItemToken.DateItemType.YEAR
             res.end_token = tt
         elif (can_be_year_):
             if (res.can_be_year):
                 tt = DateItemToken.__testYearRusWord(
                     nt.next0_, res.is_newline_before)
                 if ((tt) is not None):
                     if ((tt.isValue("Г", None)
                          and not tt.is_whitespace_before
                          and t.previous is not None)
                             and ((t.previous.isValue("КОРПУС", None)
                                   or t.previous.isValue("КОРП", None)))):
                         pass
                     elif (
                         (((nt.next0_.isValue("Г", None) and
                            (t.whitespaces_before_count < 3) and t.previous
                            is not None) and t.previous.isValue("Я", None)
                           and t.previous.previous is not None)
                          and t.previous.previous.isCharOf("\\/")
                          and t.previous.previous.previous is not None)
                             and t.previous.previous.previous.isValue(
                                 "А", None)):
                         return None
                     else:
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
             elif (tt is not None and (nt.whitespaces_after_count < 2)
                   and (nt.end_char - nt.begin_char) == 1):
                 res.end_token = tt
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.lang = tt.morph.language
         if (nt.previous is not None):
             if (nt.previous.isValue("В", "У")
                     or nt.previous.isValue("К", None)
                     or nt.previous.isValue("ДО", None)):
                 tt = DateItemToken.__testYearRusWord(nt.next0_, False)
                 if ((tt) is not None):
                     ok = False
                     if ((res.int_value < 100)
                             and (isinstance(tt, TextToken)) and
                         (((tt).term == "ГОДА" or (tt).term == "РОКИ"))):
                         pass
                     else:
                         ok = True
                         if (nt.previous.isValue("ДО", None)
                                 and nt.next0_.isValue("Г", None)):
                             cou = 0
                             ttt = nt.previous.previous
                             while ttt is not None and (cou < 10):
                                 mt = MeasureToken.tryParse(
                                     ttt, None, False, False)
                                 if (mt is not None
                                         and mt.end_char > nt.end_char):
                                     ok = False
                                     break
                                 ttt = ttt.previous
                                 cou += 1
                     if (ok):
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
                         res.begin_token = nt.previous
             elif (((nt.previous.isValue("IN", None)
                     or nt.previous.isValue("SINCE", None)))
                   and res.can_be_year):
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.begin_token = nt.previous
             elif (nt.previous.isValue("NEL", None)
                   or nt.previous.isValue("DEL", None)):
                 if (res.can_be_year):
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.lang = MorphLang.IT
                     res.begin_token = nt.previous
             elif (nt.previous.isValue("IL", None) and res.can_be_day):
                 res.lang = MorphLang.IT
                 res.begin_token = nt.previous
         t1 = res.end_token.next0_
         if (t1 is not None):
             if ((t1.isValue("ЧАС", None) or t1.isValue("ГОДИНА", None))):
                 if ((((prev is not None and len(prev) == 2
                        and prev[0].can_be_hour)
                       and prev[1].typ == DateItemToken.DateItemType.DELIM
                       and not prev[1].is_whitespace_after)
                      and not prev[1].is_whitespace_after
                      and res.int_value >= 0) and (res.int_value < 59)):
                     prev[0].typ = DateItemToken.DateItemType.HOUR
                     res.typ = DateItemToken.DateItemType.MINUTE
                     res.end_token = t1
                 elif (res.int_value < 24):
                     if (t1.next0_ is not None and t1.next0_.isChar('.')):
                         t1 = t1.next0_
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = t1
             elif ((res.int_value < 60) and
                   ((t1.isValue("МИНУТА", None) or t1.isValue("МИН", None)
                     or t.isValue("ХВИЛИНА", None)))):
                 if (t1.next0_ is not None and t1.next0_.isChar('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.MINUTE
                 res.end_token = t1
             elif ((res.int_value < 60) and ((t1.isValue("СЕКУНДА", None)
                                              or t1.isValue("СЕК", None)))):
                 if (t1.next0_ is not None and t1.next0_.isChar('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.SECOND
                 res.end_token = t1
             elif ((res.int_value < 30)
                   and ((t1.isValue("ВЕК", "ВІК")
                         or t1.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")))):
                 res.typ = DateItemToken.DateItemType.CENTURY
                 res.end_token = t1
             elif (res.int_value <= 4 and t1.isValue("КВАРТАЛ", None)):
                 res.typ = DateItemToken.DateItemType.QUARTAL
                 res.end_token = t1
             elif (res.int_value <= 2
                   and ((t1.isValue("ПОЛУГОДИЕ", None)
                         or t1.isValue("ПІВРІЧЧЯ", None)))):
                 res.typ = DateItemToken.DateItemType.HALFYEAR
                 res.end_token = t1
         return res
     t0 = Utils.asObjectOrNull(t, TextToken)
     if (t0 is None):
         return None
     txt = t0.getSourceText()
     if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х')
             or txt[0] == 'V'):
         lat = NumberHelper.tryParseRoman(t)
         if (lat is not None and lat.end_token.next0_ is not None
                 and lat.int_value is not None):
             val = lat.int_value
             tt = lat.end_token.next0_
             if (tt.isValue("КВАРТАЛ", None) and val > 0 and val <= 4):
                 return DateItemToken._new654(
                     t, tt, DateItemToken.DateItemType.QUARTAL, val)
             if (tt.isValue("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0
                     and val <= 2):
                 return DateItemToken._new654(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.HALFYEAR, val)
             if (tt.isValue("ВЕК", "ВІК")
                     or tt.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")):
                 return DateItemToken._new654(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.CENTURY, val)
             if (tt.isValue("В", None) and tt.next0_ is not None
                     and tt.next0_.isChar('.')):
                 if (prev is not None and len(prev) > 0
                         and prev[len(prev) - 1].typ
                         == DateItemToken.DateItemType.POINTER):
                     return DateItemToken._new654(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
                 if (DateItemToken.__isNewAge(tt.next0_.next0_)):
                     return DateItemToken._new654(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
             if (tt.is_hiphen):
                 lat2 = NumberHelper.tryParseRoman(tt.next0_)
                 if ((lat2 is not None and lat2.int_value is not None
                      and lat2.int_value > val)
                         and lat2.end_token.next0_ is not None):
                     if (lat2.end_token.next0_.isValue("ВЕК", "ВІК")
                             or lat2.end_token.next0_.isValue(
                                 "СТОЛЕТИЕ", "СТОЛІТТЯ")):
                         return DateItemToken._new654(
                             t, lat.end_token,
                             DateItemToken.DateItemType.CENTURY, val)
     if (t is not None and t.isValue("НАПРИКІНЦІ", None)):
         return DateItemToken._new660(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "конец")
     if (t is not None and t.isValue("ДОНЕДАВНА", None)):
         return DateItemToken._new660(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "сегодня")
     tok = DateItemToken.M_SEASONS.tryParse(t, TerminParseAttr.NO)
     if ((tok is not None and
          (Utils.valToEnum(tok.termin.tag, DatePointerType))
          == DatePointerType.SUMMER and t.morph.language.is_ru)
             and (isinstance(t, TextToken))):
         str0_ = (t).term
         if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
             tok = (None)
     if (tok is not None):
         return DateItemToken._new654(
             t, tok.end_token, DateItemToken.DateItemType.POINTER,
             Utils.valToEnum(tok.termin.tag, DatePointerType))
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is not None):
         tok = DateItemToken.M_SEASONS.tryParse(npt.end_token,
                                                TerminParseAttr.NO)
         if ((tok is not None and
              (Utils.valToEnum(tok.termin.tag, DatePointerType))
              == DatePointerType.SUMMER and t.morph.language.is_ru)
                 and (isinstance(t, TextToken))):
             str0_ = (t).term
             if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
                 tok = (None)
         if (tok is not None):
             return DateItemToken._new654(
                 t, tok.end_token, DateItemToken.DateItemType.POINTER,
                 Utils.valToEnum(tok.termin.tag, DatePointerType))
         typ_ = DateItemToken.DateItemType.NUMBER
         if (npt.noun.isValue("КВАРТАЛ", None)):
             typ_ = DateItemToken.DateItemType.QUARTAL
         elif (npt.end_token.isValue("ПОЛУГОДИЕ", None)
               or npt.end_token.isValue("ПІВРІЧЧЯ", None)):
             typ_ = DateItemToken.DateItemType.HALFYEAR
         elif (npt.end_token.isValue("НАЧАЛО", None)
               or npt.end_token.isValue("ПОЧАТОК", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "начало")
         elif (npt.end_token.isValue("СЕРЕДИНА", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "середина")
         elif (npt.end_token.isValue("КОНЕЦ", None)
               or npt.end_token.isValue("КІНЕЦЬ", None)
               or npt.end_token.isValue("НАПРИКІНЕЦЬ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "конец")
         elif (npt.end_token.isValue("ВРЕМЯ", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.isValue("НАСТОЯЩЕЕ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         elif (npt.end_token.isValue("ЧАС", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.isValue("ДАНИЙ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         if (typ_ != DateItemToken.DateItemType.NUMBER):
             delta = 0
             if (len(npt.adjectives) > 0):
                 if (npt.adjectives[0].isValue("ПОСЛЕДНИЙ", None)
                         or npt.adjectives[0].isValue("ОСТАННІЙ", None)):
                     return DateItemToken._new654(
                         t0, npt.end_token, typ_,
                         (4 if typ_ == DateItemToken.DateItemType.QUARTAL
                          else 2))
                 if (npt.adjectives[0].isValue("ПРЕДЫДУЩИЙ", None)
                         or npt.adjectives[0].isValue("ПОПЕРЕДНІЙ", None)):
                     delta = -1
                 elif (npt.adjectives[0].isValue("СЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].isValue("ПОСЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].isValue("НАСТУПНИЙ", None)):
                     delta = 1
                 else:
                     return None
             cou = 0
             tt = t.previous
             first_pass2817 = True
             while True:
                 if first_pass2817: first_pass2817 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (cou > 200):
                     break
                 dr = Utils.asObjectOrNull(tt.getReferent(),
                                           DateRangeReferent)
                 if (dr is None):
                     continue
                 if (typ_ == DateItemToken.DateItemType.QUARTAL):
                     ii = dr.quarter_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 4):
                         continue
                     return DateItemToken._new654(t0, npt.end_token, typ_,
                                                  ii)
                 if (typ_ == DateItemToken.DateItemType.HALFYEAR):
                     ii = dr.halfyear_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 2):
                         continue
                     return DateItemToken._new654(t0, npt.end_token, typ_,
                                                  ii)
     term = t0.term
     if (not str.isalnum(term[0])):
         if (t0.isCharOf(".\\/:") or t0.is_hiphen):
             return DateItemToken._new660(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         elif (t0.isChar(',')):
             return DateItemToken._new660(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         else:
             return None
     if (term == "O" or term == "О"):
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after and len(
                     (t.next0_).value) == 1):
             return DateItemToken._new654(t, t.next0_,
                                          DateItemToken.DateItemType.NUMBER,
                                          (t.next0_).int_value)
     if (str.isalpha(term[0])):
         inf = DateItemToken.M_MONTHES.tryParse(t, TerminParseAttr.NO)
         if (inf is not None and inf.termin.tag is None):
             inf = DateItemToken.M_MONTHES.tryParse(inf.end_token.next0_,
                                                    TerminParseAttr.NO)
         if (inf is not None and (isinstance(inf.termin.tag, int))):
             return DateItemToken._new675(inf.begin_token, inf.end_token,
                                          DateItemToken.DateItemType.MONTH,
                                          inf.termin.tag, inf.termin.lang)
     return None
Ejemplo n.º 9
0
 def tryAttach(t: 'Token',
               prev: typing.List['DateItemToken']) -> 'DateItemToken':
     """ Привязать с указанной позиции один примитив
     
     Args:
         cnt: 
         indFrom: 
     
     """
     if (t is None):
         return None
     t0 = t
     if (t0.isChar('_')):
         t = t.next0_
         while t is not None:
             if (t.is_newline_before):
                 return None
             if (not t.isChar('_')):
                 break
             t = t.next0_
     elif (BracketHelper.canBeStartOfSequence(t0, True, False)):
         ok = False
         t = t.next0_
         while t is not None:
             if (BracketHelper.canBeEndOfSequence(t, True, t0, False)):
                 ok = True
                 break
             elif (not t.isChar('_')):
                 break
             t = t.next0_
         if (not ok):
             t = t0
         else:
             t = t.next0_
             while t is not None:
                 if (not t.isChar('_')):
                     break
                 t = t.next0_
     elif ((isinstance(t0, TextToken)) and t0.isValue("THE", None)):
         res0 = DateItemToken.__TryAttach(t.next0_, prev)
         if (res0 is not None):
             res0.begin_token = t
             return res0
     res = DateItemToken.__TryAttach(t, prev)
     if (res is None):
         return None
     res.begin_token = t0
     if (not res.is_whitespace_after and res.end_token.next0_ is not None
             and res.end_token.next0_.isChar('_')):
         t = res.end_token.next0_
         while t is not None:
             if (not t.isChar('_')):
                 break
             else:
                 res.end_token = t
             t = t.next0_
     if (res.typ == DateItemToken.DateItemType.YEAR
             or res.typ == DateItemToken.DateItemType.CENTURY
             or res.typ == DateItemToken.DateItemType.NUMBER):
         tok = None
         ii = 0
         t = res.end_token.next0_
         if (t is not None and t.isValue("ДО", None)):
             tok = DateItemToken.M_NEW_AGE.tryParse(t.next0_,
                                                    TerminParseAttr.NO)
             ii = -1
         elif (t is not None and t.isValue("ОТ", "ВІД")):
             tok = DateItemToken.M_NEW_AGE.tryParse(t.next0_,
                                                    TerminParseAttr.NO)
             ii = 1
         else:
             tok = DateItemToken.M_NEW_AGE.tryParse(t, TerminParseAttr.NO)
             ii = 1
         if (tok is not None):
             res.new_age = (-1 if ii < 0 else 1)
             res.end_token = tok.end_token
             if (res.typ == DateItemToken.DateItemType.NUMBER):
                 res.typ = DateItemToken.DateItemType.YEAR
     return res
Ejemplo n.º 10
0
 def tryParseList(t: 'Token',
                  max_count: int = 10) -> typing.List['TransItemToken']:
     tr = TransItemToken.tryParse(t, None, False, False)
     if (tr is None):
         return None
     if ((tr.typ == TransItemToken.Typs.ORG
          or tr.typ == TransItemToken.Typs.NUMBER
          or tr.typ == TransItemToken.Typs.CLASS)
             or tr.typ == TransItemToken.Typs.DATE):
         return None
     tr0 = tr
     res = list()
     res.append(tr)
     t = tr.end_token.next0_
     if (tr.typ == TransItemToken.Typs.NOUN):
         while t is not None:
             if (t.isChar(':') or t.is_hiphen):
                 pass
             else:
                 break
             t = t.next0_
     and_conj = False
     while t is not None:
         if (max_count > 0 and len(res) >= max_count):
             break
         if (tr0.typ == TransItemToken.Typs.NOUN
                 or tr0.typ == TransItemToken.Typs.ORG):
             if (t.is_hiphen and t.next0_ is not None):
                 t = t.next0_
         tr = TransItemToken.tryParse(t, tr0, False, False)
         if (tr is None):
             if (BracketHelper.canBeEndOfSequence(t, True, None, False)
                     and t.next0_ is not None):
                 if (tr0.typ == TransItemToken.Typs.MODEL
                         or tr0.typ == TransItemToken.Typs.BRAND):
                     tt1 = t.next0_
                     if (tt1 is not None and tt1.is_comma):
                         tt1 = tt1.next0_
                     tr = TransItemToken.tryParse(tt1, tr0, False, False)
         if (tr is None and (isinstance(t, ReferentToken))):
             rt = Utils.asObjectOrNull(t, ReferentToken)
             if (rt.begin_token == rt.end_token
                     and (isinstance(rt.begin_token, TextToken))):
                 tr = TransItemToken.tryParse(rt.begin_token, tr0, False,
                                              False)
                 if (tr is not None and tr.begin_token == tr.end_token):
                     tr.begin_token = tr.end_token = t
         if (tr is None and t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 tr = TransItemToken.tryParse(t.next0_, tr0, False, False)
                 if (tr is not None):
                     if (tr.typ != TransItemToken.Typs.NUMBER):
                         tr = (None)
                     elif (tr.end_token.next0_ is not None):
                         tr.begin_token = t
                         if (tr.end_token.next0_.isChar(')')):
                             tr.end_token = tr.end_token.next0_
                 if (tr is None):
                     tt = br.end_token.next0_
                     if (tt is not None and tt.is_comma):
                         tt = tt.next0_
                     tr = TransItemToken.tryParse(tt, tr0, False, False)
                     if (tr is not None
                             and tr.typ == TransItemToken.Typs.NUMBER):
                         pass
                     else:
                         tr = (None)
         if (tr is None and t.is_hiphen):
             if (tr0.typ == TransItemToken.Typs.BRAND
                     or tr0.typ == TransItemToken.Typs.MODEL):
                 tr = TransItemToken.tryParse(t.next0_, tr0, False, False)
         if (tr is None and t.is_comma):
             if ((tr0.typ == TransItemToken.Typs.NAME
                  or tr0.typ == TransItemToken.Typs.BRAND
                  or tr0.typ == TransItemToken.Typs.MODEL)
                     or tr0.typ == TransItemToken.Typs.CLASS
                     or tr0.typ == TransItemToken.Typs.DATE):
                 tr = TransItemToken.tryParse(t.next0_, tr0, True, False)
                 if (tr is not None):
                     if (tr.typ == TransItemToken.Typs.NUMBER):
                         pass
                     else:
                         tr = (None)
         if (tr is None):
             if (tr0.typ == TransItemToken.Typs.NAME):
                 if (t.isChar(',')):
                     tr = TransItemToken.tryParse(t.next0_, tr0, True,
                                                  False)
                 elif (t.morph.class0_.is_conjunction and t.is_and):
                     tr = TransItemToken.tryParse(t.next0_, tr0, True,
                                                  False)
                     and_conj = True
             if (tr is not None):
                 if (tr.typ != TransItemToken.Typs.NAME):
                     break
                 tr.is_after_conjunction = True
         if (tr is None):
             break
         if (t.is_newline_before):
             if (tr.typ != TransItemToken.Typs.NUMBER):
                 break
         res.append(tr)
         if (tr.typ == TransItemToken.Typs.ORG
                 and tr0.typ == TransItemToken.Typs.NOUN):
             pass
         else:
             tr0 = tr
         t = tr.end_token
         if (and_conj):
             break
         t = t.next0_
     i = 0
     while i < (len(res) - 1):
         if (res[i].typ == TransItemToken.Typs.MODEL
                 and res[i + 1].typ == TransItemToken.Typs.MODEL):
             res[i].end_token = res[i + 1].end_token
             res[i].value = "{0}{1}{2}".format(
                 res[i].value,
                 ('-' if res[i].end_token.next0_ is not None
                  and res[i].end_token.next0_.is_hiphen else ' '),
                 res[i + 1].value)
             del res[i + 1]
             i -= 1
         i += 1
     if ((len(res) > 1 and res[0].typ == TransItemToken.Typs.BRAND
          and res[1].typ == TransItemToken.Typs.MODEL)
             and res[1].length_char == 1
             and not ((isinstance(res[1].begin_token, NumberToken)))):
         return None
     return res
Ejemplo n.º 11
0
 def _process(begin : 'Token', max_char_pos : int, kit : 'AnalysisKit', end_token : 'Token') -> 'TitlePageReferent':
     end_token.value = begin
     res = TitlePageReferent()
     term = None
     lines = Line.parse(begin, 30, 1500, max_char_pos)
     if (len(lines) < 1): 
         return None
     cou = len(lines)
     min_newlines_count = 10
     lines_count_stat = dict()
     i = 0
     while i < len(lines): 
         if (TitleNameToken.canBeStartOfTextOrContent(lines[i].begin_token, lines[i].end_token)): 
             cou = i
             break
         j = lines[i].newlines_before_count
         if (i > 0 and j > 0): 
             if (not j in lines_count_stat): 
                 lines_count_stat[j] = 1
             else: 
                 lines_count_stat[j] += 1
         i += 1
     max0_ = 0
     for kp in lines_count_stat.items(): 
         if (kp[1] > max0_): 
             max0_ = kp[1]
             min_newlines_count = kp[0]
     end_char = (lines[cou - 1].end_char if cou > 0 else 0)
     if (max_char_pos > 0 and end_char > max_char_pos): 
         end_char = max_char_pos
     names = list()
     i = 0
     while i < cou: 
         if (i == 6): 
             pass
         j = i
         while (j < cou) and (j < (i + 5)): 
             if (i == 6 and j == 8): 
                 pass
             if (j > i): 
                 if (lines[j - 1].is_pure_en and lines[j].is_pure_ru): 
                     break
                 if (lines[j - 1].is_pure_ru and lines[j].is_pure_en): 
                     break
                 if (lines[j].newlines_before_count >= (min_newlines_count * 2)): 
                     break
             ttt = TitleNameToken.tryParse(lines[i].begin_token, lines[j].end_token, min_newlines_count)
             if (ttt is not None): 
                 if (lines[i].is_pure_en): 
                     ttt.morph.language = MorphLang.EN
                 elif (lines[i].is_pure_ru): 
                     ttt.morph.language = MorphLang.RU
                 names.append(ttt)
             j += 1
         i += 1
     TitleNameToken.sort(names)
     name_rt = None
     if (len(names) > 0): 
         i0 = 0
         if (names[i0].morph.language.is_en): 
             ii = 1
             while ii < len(names): 
                 if (names[ii].morph.language.is_ru and names[ii].rank > 0): 
                     i0 = ii
                     break
                 ii += 1
         term = res._addName(names[i0].begin_name_token, names[i0].end_name_token)
         if (names[i0].type_value is not None): 
             res._addType(names[i0].type_value)
         if (names[i0].speciality is not None): 
             res.speciality = names[i0].speciality
         rt = ReferentToken(res, names[i0].begin_token, names[i0].end_token)
         if (kit is not None): 
             kit.embedToken(rt)
         else: 
             res.addOccurence(TextAnnotation(rt.begin_token, rt.end_token))
         end_token.value = rt.end_token
         name_rt = rt
         if (begin.begin_char == rt.begin_char): 
             begin = (rt)
     if (term is not None and kit is not None): 
         t = kit.first_token
         first_pass3128 = True
         while True:
             if first_pass3128: first_pass3128 = False
             else: t = t.next0_
             if (not (t is not None)): break
             tok = term.tryParse(t, TerminParseAttr.NO)
             if (tok is None): 
                 continue
             t0 = t
             t1 = tok.end_token
             if (t1.next0_ is not None and t1.next0_.isChar('.')): 
                 t1 = t1.next0_
             if (BracketHelper.canBeStartOfSequence(t0.previous, False, False) and BracketHelper.canBeEndOfSequence(t1.next0_, False, None, False)): 
                 t0 = t0.previous
                 t1 = t1.next0_
             rt = ReferentToken(res, t0, t1)
             kit.embedToken(rt)
             t = (rt)
     pr = PersonRelations()
     pers_typ = TitleItemToken.Types.UNDEFINED
     pers_types = pr.rel_types
     t = begin
     first_pass3129 = True
     while True:
         if first_pass3129: first_pass3129 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos): 
             break
         if (t == name_rt): 
             continue
         tpt = TitleItemToken.tryAttach(t)
         if (tpt is not None): 
             pers_typ = TitleItemToken.Types.UNDEFINED
             if (tpt.typ == TitleItemToken.Types.TYP): 
                 if (len(res.types) == 0): 
                     res._addType(tpt.value)
                 elif (len(res.types) == 1): 
                     ty = res.types[0].upper()
                     if (ty == "РЕФЕРАТ"): 
                         res._addType(tpt.value)
                     elif (ty == "АВТОРЕФЕРАТ"): 
                         if (tpt.value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"): 
                             res.addSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", True, 0)
                         elif (tpt.value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"): 
                             res.addSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", True, 0)
                         elif (tpt.value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"): 
                             res.addSlot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", True, 0)
                         elif (tpt.value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ"): 
                             res.addSlot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", True, 0)
                         elif (tpt.value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ"): 
                             res.addSlot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", True, 0)
                         elif (tpt.value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ"): 
                             res.addSlot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", True, 0)
                         else: 
                             res._addType(tpt.value)
                     elif (tpt.value == "РЕФЕРАТ" or tpt.value == "АВТОРЕФЕРАТ"): 
                         if (not tpt.value in ty): 
                             res._addType(tpt.value)
             elif (tpt.typ == TitleItemToken.Types.SPECIALITY): 
                 if (res.speciality is None): 
                     res.speciality = tpt.value
             elif (tpt.typ in pers_types): 
                 pers_typ = tpt.typ
             t = tpt.end_token
             if (t.end_char > end_token.value.end_char): 
                 end_token.value = t
             if (t.next0_ is not None and t.next0_.isCharOf(":-")): 
                 t = t.next0_
             continue
         if (t.end_char > end_char): 
             break
         rli = t.getReferents()
         if (rli is None): 
             continue
         if (not t.is_newline_before and (isinstance(t.previous, TextToken))): 
             s = (t.previous).term
             if (s == "ИМЕНИ" or s == "ИМ"): 
                 continue
             if (s == "." and t.previous.previous is not None and t.previous.previous.isValue("ИМ", None)): 
                 continue
         for r in rli: 
             if (isinstance(r, PersonReferent)): 
                 if (r != rli[0]): 
                     continue
                 p = Utils.asObjectOrNull(r, PersonReferent)
                 if (pers_typ != TitleItemToken.Types.UNDEFINED): 
                     if (t.previous is not None and t.previous.isChar('.')): 
                         pers_typ = TitleItemToken.Types.UNDEFINED
                 typ = pr.calcTypFromAttrs(p)
                 if (typ != TitleItemToken.Types.UNDEFINED): 
                     pr.add(p, typ, 1)
                     pers_typ = typ
                 elif (pers_typ != TitleItemToken.Types.UNDEFINED): 
                     pr.add(p, pers_typ, 1)
                 elif (t.previous is not None and t.previous.isChar('©')): 
                     pers_typ = TitleItemToken.Types.WORKER
                     pr.add(p, pers_typ, 1)
                 else: 
                     tt = t.next0_
                     first_pass3130 = True
                     while True:
                         if first_pass3130: first_pass3130 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         rr = tt.getReferent()
                         if (rr == res): 
                             pers_typ = TitleItemToken.Types.WORKER
                             break
                         if (isinstance(rr, PersonReferent)): 
                             if (pr.calcTypFromAttrs(Utils.asObjectOrNull(r, PersonReferent)) != TitleItemToken.Types.UNDEFINED): 
                                 break
                             else: 
                                 continue
                         if (rr is not None): 
                             break
                         tpt = TitleItemToken.tryAttach(tt)
                         if (tpt is not None): 
                             if (tpt.typ != TitleItemToken.Types.TYP and tpt.typ != TitleItemToken.Types.TYPANDTHEME): 
                                 break
                             tt = tpt.end_token
                             if (tt.end_char > end_token.value.end_char): 
                                 end_token.value = tt
                             continue
                     if (pers_typ == TitleItemToken.Types.UNDEFINED): 
                         tt = t.previous
                         while tt is not None: 
                             rr = tt.getReferent()
                             if (rr == res): 
                                 pers_typ = TitleItemToken.Types.WORKER
                                 break
                             if (rr is not None): 
                                 break
                             if ((tt.isValue("СТУДЕНТ", None) or tt.isValue("СТУДЕНТКА", None) or tt.isValue("СЛУШАТЕЛЬ", None)) or tt.isValue("ДИПЛОМНИК", None) or tt.isValue("ИСПОЛНИТЕЛЬ", None)): 
                                 pers_typ = TitleItemToken.Types.WORKER
                                 break
                             tpt = TitleItemToken.tryAttach(tt)
                             if (tpt is not None and tpt.typ != TitleItemToken.Types.TYP): 
                                 break
                             tt = tt.previous
                     if (pers_typ != TitleItemToken.Types.UNDEFINED): 
                         pr.add(p, pers_typ, 1)
                     else: 
                         pr.add(p, pers_typ, .5)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
                 continue
             if (r == rli[0]): 
                 pers_typ = TitleItemToken.Types.UNDEFINED
             if (isinstance(r, DateReferent)): 
                 if (res.date is None): 
                     res.date = Utils.asObjectOrNull(r, DateReferent)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
             elif (isinstance(r, GeoReferent)): 
                 if (res.city is None and (r).is_city): 
                     res.city = Utils.asObjectOrNull(r, GeoReferent)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
             if (isinstance(r, OrganizationReferent)): 
                 org0_ = Utils.asObjectOrNull(r, OrganizationReferent)
                 if ("курс" in org0_.types and org0_.number is not None): 
                     wrapi2519 = RefOutArgWrapper(0)
                     inoutres2520 = Utils.tryParseInt(org0_.number, wrapi2519)
                     i = wrapi2519.value
                     if (inoutres2520): 
                         if (i > 0 and (i < 8)): 
                             res.student_year = i
                 while org0_.higher is not None: 
                     if (org0_.kind != OrganizationKind.DEPARTMENT): 
                         break
                     org0_ = org0_.higher
                 if (org0_.kind != OrganizationKind.DEPARTMENT): 
                     if (res.org0_ is None): 
                         res.org0_ = org0_
                     elif (OrganizationReferent.canBeHigher(res.org0_, org0_)): 
                         res.org0_ = org0_
                 if (t.end_char > end_token.value.end_char): 
                     end_token.value = t
             if ((isinstance(r, UriReferent)) or (isinstance(r, GeoReferent))): 
                 if (t.end_char > end_token.value.end_char): 
                     end_token.value = t
     for ty in pers_types: 
         for p in pr.getPersons(ty): 
             if (pr.getAttrNameForType(ty) is not None): 
                 res.addSlot(pr.getAttrNameForType(ty), p, False, 0)
     if (res.getSlotValue(TitlePageReferent.ATTR_AUTHOR) is None): 
         for p in pr.getPersons(TitleItemToken.Types.UNDEFINED): 
             res.addSlot(TitlePageReferent.ATTR_AUTHOR, p, False, 0)
             break
     if (res.city is None and res.org0_ is not None): 
         s = res.org0_.findSlot(OrganizationReferent.ATTR_GEO, None, True)
         if (s is not None and (isinstance(s.value, GeoReferent))): 
             if ((s.value).is_city): 
                 res.city = Utils.asObjectOrNull(s.value, GeoReferent)
     if (res.date is None): 
         t = begin
         first_pass3131 = True
         while True:
             if first_pass3131: first_pass3131 = False
             else: t = t.next0_
             if (not (t is not None and t.end_char <= end_char)): break
             city = Utils.asObjectOrNull(t.getReferent(), GeoReferent)
             if (city is None): 
                 continue
             if (isinstance(t.next0_, TextToken)): 
                 if (t.next0_.isCharOf(":,") or t.next0_.is_hiphen): 
                     t = t.next0_
             rt = t.kit.processReferent(DateAnalyzer.ANALYZER_NAME, t.next0_)
             if (rt is not None): 
                 rt.saveToLocalOntology()
                 res.date = Utils.asObjectOrNull(rt.referent, DateReferent)
                 if (kit is not None): 
                     kit.embedToken(rt)
                 break
     if (len(res.slots) == 0): 
         return None
     else: 
         return res
Ejemplo n.º 12
0
 def parse(t: 'Token',
           max_char: int = 0,
           prev: 'InstrToken' = None) -> 'InstrToken':
     is_start_of_line = False
     t00 = t
     if (t is not None):
         is_start_of_line = t00.is_newline_before
         while t is not None:
             if (t.is_table_control_char and not t.isChar(chr(0x1F))):
                 if (t.is_newline_after and not is_start_of_line):
                     is_start_of_line = True
                 t = t.next0_
             else:
                 break
     if (t is None):
         return None
     if (t.is_newline_before):
         is_start_of_line = True
     t0 = t
     t1 = None
     has_word = False
     first_pass2988 = True
     while True:
         if first_pass2988: first_pass2988 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t != t0):
             break
         if (max_char > 0 and t.begin_char > max_char):
             break
         if (is_start_of_line and t == t0):
             if (t.isValue("ГЛАВА", None)):
                 next0__ = InstrToken.parse(t.next0_, 0, None)
                 if (next0__ is not None and next0__.typ == ILTypes.PERSON):
                     next0__.begin_token = t
                     return next0__
             tt = None
             if ((isinstance(t.getReferent(), PersonReferent)) or
                 (isinstance(t.getReferent(), PersonPropertyReferent)) or
                 (isinstance(t.getReferent(), InstrumentParticipant))):
                 return InstrToken.__correctPerson(
                     InstrToken._new1405(t00, t, ILTypes.PERSON, t))
             is_ref = False
             if (isinstance(t.getReferent(), PersonPropertyReferent)):
                 tt = t.next0_
                 is_ref = True
             elif (prev is not None and prev.typ == ILTypes.PERSON):
                 rt = t.kit.processReferent(PersonAnalyzer.ANALYZER_NAME, t)
                 if (rt is not None):
                     if (isinstance(rt.referent, PersonReferent)):
                         return InstrToken._new1406(t00, rt.end_token,
                                                    ILTypes.PERSON)
                     tt = rt.end_token.next0_
             cou = 0
             t11 = (None if tt is None else tt.previous)
             first_pass2989 = True
             while True:
                 if first_pass2989: first_pass2989 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_table_control_char):
                     continue
                 re = tt.getReferent()
                 if (isinstance(re, PersonReferent)):
                     return InstrToken._new1405(t00, tt, ILTypes.PERSON, tt)
                 if (isinstance(re, GeoReferent)):
                     t11 = tt
                     continue
                 if (re is not None):
                     break
                 if (DecreeToken.isKeyword(tt, False) is not None):
                     break
                 if (tt.is_newline_before):
                     cou += 1
                     if ((cou) > 4):
                         break
             if (tt is None and is_ref):
                 return InstrToken._new1405(t00, Utils.ifNotNull(t11, t),
                                            ILTypes.PERSON, t)
         dt = DecreeToken.tryAttach(t, None, False)
         if (dt is not None):
             if (dt.typ == DecreeToken.ItemType.TYP
                     and not t.chars.is_all_lower):
                 if (t != t0):
                     break
                 has_verb_ = False
                 tt = dt.end_token
                 while tt is not None:
                     if (tt.is_newline_before):
                         break
                     elif ((isinstance(tt, TextToken))
                           and (tt).is_pure_verb):
                         has_verb_ = True
                         break
                     tt = tt.next0_
                 if (not has_verb_):
                     res2 = InstrToken._new1409(
                         t0, dt.end_token, ILTypes.TYP,
                         Utils.ifNotNull(dt.full_value, dt.value))
                     if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ"
                             or res2.value == "ДОДАТКОВА УГОДА"):
                         if (res2.begin_char > 500
                                 and res2.newlines_before_count > 1):
                             res2.typ = ILTypes.APPENDIX
                     return res2
             if (dt.typ == DecreeToken.ItemType.NUMBER):
                 if (t != t0):
                     break
                 return InstrToken._new1409(t0, dt.end_token,
                                            ILTypes.REGNUMBER, dt.value)
             if (dt.typ == DecreeToken.ItemType.ORG):
                 if (t != t0):
                     break
                 return InstrToken._new1411(t0, dt.end_token,
                                            ILTypes.ORGANIZATION, dt.ref,
                                            dt.value)
             if (dt.typ == DecreeToken.ItemType.TERR):
                 if (t != t0):
                     break
                 re = InstrToken._new1411(t0, dt.end_token, ILTypes.GEO,
                                          dt.ref, dt.value)
                 t1 = re.end_token.next0_
                 if (t1 is not None and t1.isChar(',')):
                     t1 = t1.next0_
                 if (t1 is not None and t1.isValue("КРЕМЛЬ", None)):
                     re.end_token = t1
                 elif ((t1 is not None and t1.isValue("ДОМ", "БУДИНОК")
                        and t1.next0_ is not None)
                       and t1.next0_.isValue("СОВЕТ", "РАД")):
                     re.end_token = t1.next0_
                     if (t1.next0_.next0_ is not None and (isinstance(
                             t1.next0_.next0_.getReferent(), GeoReferent))):
                         re.end_token = t1.next0_.next0_
                 return re
             if (dt.typ == DecreeToken.ItemType.OWNER):
                 if (t != t0):
                     break
                 if (dt.ref is not None
                         and str(dt.ref.referent).startswith("агент")):
                     dt = (None)
                 if (dt is not None):
                     res1 = InstrToken._new1411(t0, dt.end_token,
                                                ILTypes.PERSON, dt.ref,
                                                dt.value)
                     return InstrToken.__correctPerson(res1)
         if (BracketHelper.canBeStartOfSequence(t, False, False)):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 t1 = br.end_token
                 t = t1
                 continue
             if (t.next0_ is not None and BracketHelper.canBeEndOfSequence(
                     t.next0_, False, None, False)):
                 t1 = t.next0_
                 t = t1
                 continue
         if (isinstance(t, TextToken)):
             if (t.isChar('_')):
                 t1 = t
                 continue
         r = t.getReferent()
         if (isinstance(r, DateReferent)):
             tt = t
             if (tt.next0_ is not None and tt.next0_.isCharOf(",;")):
                 tt = tt.next0_
             if (not t.is_newline_before and not tt.is_newline_after):
                 t1 = tt
                 continue
             if (not has_word):
                 return InstrToken._new1405(t, tt, ILTypes.DATE, t)
             if (t != t0):
                 break
         has_word = True
         if (isinstance(r, InstrumentParticipant)):
             tt = (t).begin_token
             first_pass2990 = True
             while True:
                 if first_pass2990: first_pass2990 = False
                 else: tt = tt.next0_
                 if (not (tt is not None and (tt.end_char < t.end_char))):
                     break
                 rr = tt.getReferent()
                 if (rr is None):
                     continue
                 if ((isinstance(rr, OrganizationReferent))
                         or (isinstance(rr, BankDataReferent))
                         or (isinstance(rr, UriReferent))):
                     r = (None)
                     break
         if ((isinstance(r, PersonReferent))
                 or (isinstance(r, PersonPropertyReferent))
                 or (isinstance(r, InstrumentParticipant))):
             if (t != t0):
                 break
             if (isinstance(r, InstrumentParticipant)):
                 pass
             res1 = InstrToken._new1405(t, t, ILTypes.PERSON, t)
             return InstrToken.__correctPerson(res1)
         if (isinstance(r, OrganizationReferent)):
             if (t != t0):
                 break
             return InstrToken._new1405(t, t, ILTypes.ORGANIZATION, t)
         if (isinstance(r, DecreePartReferent)):
             dpr = Utils.asObjectOrNull(r, DecreePartReferent)
             if (dpr.appendix is not None):
                 if (t.is_newline_before or is_start_of_line):
                     if (t.is_newline_after
                             or t.whitespaces_before_count > 30):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    "ПРИЛОЖЕНИЕ")
                     ok = True
                     tt = t.next0_
                     first_pass2991 = True
                     while True:
                         if first_pass2991: first_pass2991 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         if (tt.is_newline_before):
                             break
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is not None):
                             tt = npt.end_token
                             continue
                         ok = False
                         break
                     if (ok):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    "ПРИЛОЖЕНИЕ")
         if ((isinstance(r, DecreeReferent))
                 and (r).kind == DecreeKind.PUBLISHER and t == t0):
             res1 = InstrToken._new1406(t, t, ILTypes.APPROVED)
             tt = t.next0_
             first_pass2992 = True
             while True:
                 if first_pass2992: first_pass2992 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.isCharOf(",;")):
                     continue
                 if ((isinstance(tt.getReferent(), DecreeReferent)) and
                     (tt.getReferent()).kind == DecreeKind.PUBLISHER):
                     res1.end_token = t
                 else:
                     break
             return res1
         if (t.isValue("ЗА", None) and t.next0_ is not None
                 and t.is_newline_before):
             rr = t.next0_.getReferent()
             if ((isinstance(rr, PersonReferent))
                     or (isinstance(rr, PersonPropertyReferent))
                     or (isinstance(rr, InstrumentParticipant))):
                 if (t != t0):
                     break
                 res1 = InstrToken._new1405(t, t.next0_, ILTypes.PERSON,
                                            t.next0_)
                 t = t.next0_.next0_
                 if ((isinstance(rr, InstrumentParticipant))
                         and t is not None):
                     r = t.getReferent()
                     if ((r) is not None):
                         if ((isinstance(r, PersonReferent)) or
                             (isinstance(r, PersonPropertyReferent))):
                             res1.end_token = t
                             res1.ref = (t)
                 return res1
         ii = 0
         while ii < len(InstrToken._m_directives):
             if (t.isValue(InstrToken._m_directives[ii], None)):
                 if (t.next0_ is not None
                         and t.next0_.isValue("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")):
                     if (t != t0):
                         break
                     t11 = t.next0_
                     ok = False
                     if (t11.next0_ is not None
                             and t11.next0_.isCharOf(":.")
                             and t11.next0_.is_newline_after):
                         ok = True
                         t11 = t11.next0_
                     if (ok):
                         return InstrToken._new1409(
                             t, t11, ILTypes.DIRECTIVE,
                             InstrToken._m_directives_norm[ii])
                 if (t.is_newline_after
                         or ((t.next0_ is not None and t.next0_.isChar(':')
                              and t.next0_.is_newline_after))):
                     if (t != t0):
                         break
                     if (not t.is_newline_before):
                         if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ"
                              and InstrToken._m_directives_norm[ii] !=
                              "ПОСТАНОВЛЕНИЕ" and
                              InstrToken._m_directives_norm[ii] != "НАКАЗ")
                                 and InstrToken._m_directives_norm[ii] !=
                                 "ПОСТАНОВУ"):
                             break
                     return InstrToken._new1409(
                         t, (t if t.is_newline_after else t.next0_),
                         ILTypes.DIRECTIVE,
                         InstrToken._m_directives_norm[ii])
                 break
             ii += 1
         if (t.is_newline_before and t.chars.is_letter
                 and t.length_char == 1):
             for d in InstrToken._m_directives:
                 t11 = MiscHelper.tryAttachWordByLetters(d, t, True)
                 if (t11 is not None):
                     if (t11.next0_ is not None and t11.next0_.isChar(':')):
                         t11 = t11.next0_
                     return InstrToken._new1406(t, t11, ILTypes.DIRECTIVE)
         tte = ((t).begin_token if (isinstance(t, MetaToken)) else t)
         term = ((tte).term if isinstance(tte, TextToken) else None)
         if (is_start_of_line and not tte.chars.is_all_lower and t == t0):
             npt = NounPhraseHelper.tryParse(tte, NounPhraseParseAttr.NO, 0)
             if (npt is not None
                     and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))):
                 # if (tte.Next != null && tte.Next.IsChar(':'))
                 npt = (None)
             if (npt is not None and npt.morph.case_.is_nominative
                     and (isinstance(npt.end_token, TextToken))):
                 term1 = (npt.end_token).term
                 if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1
                       == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ")
                         or term1 == "ФОРМА" or term == "ЗАЯВКА"):
                     tt1 = npt.end_token.next0_
                     dt1 = DecreeToken.tryAttach(tt1, None, False)
                     if (dt1 is not None
                             and dt1.typ == DecreeToken.ItemType.NUMBER):
                         tt1 = dt1.end_token.next0_
                     elif (isinstance(tt1, NumberToken)):
                         tt1 = tt1.next0_
                     elif ((isinstance(tt1, TextToken))
                           and tt1.length_char == 1
                           and tt1.chars.is_letter):
                         tt1 = tt1.next0_
                     ok = True
                     if (tt1 is None):
                         ok = False
                     elif (tt1.isValue("В", "У")):
                         ok = False
                     elif (tt1.isValue("К", None)
                           and tt1.is_newline_before):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    term1)
                     elif (not tt1.is_newline_before
                           and InstrToken._checkEntered(tt1) is not None):
                         ok = False
                     elif (tt1 == t.next0_
                           and ((tt1.isChar(':') or
                                 ((tt1.isValue("НА", None)
                                   and term1 != "ЗАЯВКА"))))):
                         ok = False
                     if (ok):
                         br = BracketHelper.tryParse(
                             tt1, BracketParseAttr.NO, 100)
                         if (br is not None):
                             tt1 = br.end_token.next0_
                             if (br.end_token.next0_ is None
                                     or not br.end_token.is_newline_after
                                     or br.end_token.next0_.isCharOf(";,")):
                                 ok = False
                             if (tt1 is not None and tt1.isValue(
                                     "ПРИЛОЖЕНИЕ", "ДОДАТОК")):
                                 ok = False
                     if (prev is not None and prev.typ == ILTypes.APPENDIX):
                         ok = False
                     if (ok):
                         cou = 0
                         ttt = tte.previous
                         while ttt is not None and (cou < 300):
                             if (ttt.is_table_control_char):
                                 if (not ttt.isChar(chr(0x1F))):
                                     if (ttt == tte.previous
                                             and ttt.isChar(chr(0x1E))):
                                         pass
                                     else:
                                         ok = False
                                 break
                             ttt = ttt.previous
                             cou += 1
                     if (ok):
                         it1 = InstrToken1.parse(t, True, None, 0, None,
                                                 False, 0, False)
                         if (it1 is not None):
                             if (it1.has_verb):
                                 ok = False
                     if (ok and t.previous is not None):
                         ttp = t.previous
                         first_pass2993 = True
                         while True:
                             if first_pass2993: first_pass2993 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (ttp.is_table_control_char
                                     and not ttp.isChar(chr(0x1F))):
                                 continue
                             if (BracketHelper.isBracket(ttp, False) and
                                     not BracketHelper.canBeEndOfSequence(
                                         ttp, False, None, False)):
                                 continue
                             if (ttp.isCharOf(";:")):
                                 ok = False
                             break
                     if ((ok and t.previous is not None and
                          (t.newlines_before_count < 3))
                             and not t.is_newline_after):
                         lines = 0
                         ttp = t.previous
                         first_pass2994 = True
                         while True:
                             if first_pass2994: first_pass2994 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (not ttp.is_newline_before):
                                 continue
                             while ttp is not None and (ttp.end_char <
                                                        t.begin_char):
                                 if (isinstance(ttp, NumberToken)):
                                     pass
                                 elif ((isinstance(ttp, TextToken))
                                       and ttp.length_char > 1):
                                     if (ttp.isValue(
                                             "ПРИЛОЖЕНИЕ", "ДОДАТОК")):
                                         ok = False
                                     break
                                 else:
                                     break
                                 ttp = ttp.next0_
                             lines += 1
                             if ((lines) > 1):
                                 break
                     if (ok and
                         ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК"
                           and term1 != "МНЕНИЕ"))):
                         if (t.newlines_before_count < 3):
                             ok = False
                     if (ok):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    term1)
         app = False
         if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_
              is not None and t.next0_.isValue("МНЕНИЕ", "ДУМКА"))
                 and t == t0 and is_start_of_line):
             app = True
         if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА"))
              and t.next0_ is not None
              and t.next0_.isValue("СОГЛАШЕНИЕ", "УГОДА")) and t == t0
                 and is_start_of_line):
             app = True
         if (app):
             tt = t.next0_
             while tt is not None:
                 if (tt.is_newline_before):
                     break
                 elif (tt.getMorphClassInDictionary() == MorphClass.VERB):
                     app = False
                     break
                 tt = tt.next0_
             if (app):
                 return InstrToken._new1406(t, t.next0_, ILTypes.APPENDIX)
         if (not t.chars.is_all_lower and t == t0):
             tt = InstrToken._checkApproved(t)
             if (tt is not None):
                 if (tt.next0_ is not None and
                     (isinstance(tt.next0_.getReferent(), DecreeReferent))):
                     return InstrToken._new1405(t, tt, ILTypes.APPROVED,
                                                tt.next0_.getReferent())
                 dt1 = DecreeToken.tryAttach(tt.next0_, None, False)
                 if (dt1 is not None
                         and dt1.typ == DecreeToken.ItemType.TYP):
                     return InstrToken._new1406(t, tt, ILTypes.APPROVED)
         t1 = t
         is_start_of_line = False
     if (t1 is None):
         return None
     res = InstrToken._new1406(t00, t1, ILTypes.UNDEFINED)
     res.no_words = True
     t = t0
     first_pass2995 = True
     while True:
         if first_pass2995: first_pass2995 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= t1.end_char)): break
         if (not ((isinstance(t, TextToken)))):
             if (isinstance(t, ReferentToken)):
                 res.no_words = False
             continue
         if (not t.chars.is_letter):
             continue
         res.no_words = False
         if ((t).is_pure_verb):
             res.has_verb = True
     if (t0.isValue("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None
             and t0.next0_.isCharOf(":.")):
         res.typ = ILTypes.QUESTION
     return res
Ejemplo n.º 13
0
 def tryAttach(self,
               t: 'Token',
               for_ontology: bool = False) -> 'ReferentToken':
     if (t is None):
         return None
     rt0 = self.__tryAttachSpec(t)
     if (rt0 is not None):
         return rt0
     if (t.chars.is_all_lower):
         if (not t.is_whitespace_after
                 and (isinstance(t.next0_, NumberToken))):
             if (t.previous is None or t.is_whitespace_before
                     or t.previous.isCharOf(",:")):
                 pass
             else:
                 return None
         else:
             return None
     tmp = io.StringIO()
     t1 = t
     hiph = False
     ok = True
     nums = 0
     chars = 0
     w = t1.next0_
     first_pass2888 = True
     while True:
         if first_pass2888: first_pass2888 = False
         else: w = w.next0_
         if (not (w is not None)): break
         if (w.is_whitespace_before and not for_ontology):
             break
         if (w.isCharOf("/\\_") or w.is_hiphen):
             hiph = True
             print('-', end="", file=tmp)
             continue
         hiph = False
         nt = Utils.asObjectOrNull(w, NumberToken)
         if (nt is not None):
             if (nt.typ != NumberSpellingType.DIGIT):
                 break
             t1 = (nt)
             print(nt.getSourceText(), end="", file=tmp)
             nums += 1
             continue
         tt = Utils.asObjectOrNull(w, TextToken)
         if (tt is None):
             break
         if (tt.length_char > 3):
             ok = False
             break
         if (not str.isalpha(tt.term[0])):
             if (tt.isCharOf(",:") or BracketHelper.canBeEndOfSequence(
                     tt, False, None, False)):
                 break
             if (not tt.isCharOf("+*&^#@!")):
                 ok = False
                 break
             chars += 1
         t1 = (tt)
         print(tt.getSourceText(), end="", file=tmp)
     if (not for_ontology):
         if ((tmp.tell() < 1) or not ok or hiph):
             return None
         if (tmp.tell() > 12):
             return None
         last = Utils.getCharAtStringIO(tmp, tmp.tell() - 1)
         if (last == '!'):
             return None
         if ((nums + chars) == 0):
             return None
         if (not self.__checkAttach(t, t1)):
             return None
     new_dr = DenominationReferent()
     new_dr._addValue(t, t1)
     return ReferentToken(new_dr, t, t1)