Esempio n. 1
0
 def __canBeGeoAfter(tt: 'Token') -> bool:
     while tt is not None and ((tt.is_comma
                                or BracketHelper.isBracket(tt, True))):
         tt = tt.next0_
     if (tt is None):
         return False
     if (isinstance(tt.getReferent(), GeoReferent)):
         return True
     tli = TerrItemToken.tryParseList(tt, None, 2)
     if (tli is not None and len(tli) > 1):
         if (tli[0].termin_item is None and tli[1].termin_item is not None):
             return True
         elif (tli[0].termin_item is not None
               and tli[1].termin_item is None):
             return True
     if (CityAttachHelper.checkCityAfter(tt)):
         return True
     if (TerrAttachHelper.tryAttachStateUSATerritory(tt) is not None):
         return True
     return False
Esempio n. 2
0
 def createNickname(pr : 'PersonReferent', t : 'Token') -> 'Token':
     """ Выделить кличку
     
     Args:
         pr(PersonReferent): 
         t(Token): начальный токен
     
     Returns:
         Token: если не null, то последний токен клички, а в pr запишет саму кличку
     """
     has_keyw = False
     is_br = False
     first_pass3097 = True
     while True:
         if first_pass3097: first_pass3097 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_hiphen or t.is_comma or t.isCharOf(".:;")): 
             continue
         if (t.morph.class0_.is_preposition): 
             continue
         if (t.isChar('(')): 
             is_br = True
             continue
         if ((t.isValue("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.isValue("КЛИЧКА", None) or t.isValue("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.isValue("ПСЕВДО", None) or t.isValue("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): 
             has_keyw = True
             continue
         break
     if (not has_keyw or t is None): 
         return None
     if (BracketHelper.isBracket(t, True)): 
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
             if (ni is not None): 
                 pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = br.end_token
                 tt = t.next0_
                 first_pass3098 = True
                 while True:
                     if first_pass3098: first_pass3098 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_comma_and): 
                         continue
                     if (not BracketHelper.isBracket(tt, True)): 
                         break
                     br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100)
                     if (br is None): 
                         break
                     ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
                     if (ni is not None): 
                         pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     tt = br.end_token
                     t = tt
                 if (is_br and t.next0_ is not None and t.next0_.isChar(')')): 
                     t = t.next0_
                 return t
     else: 
         pli = PersonItemToken.tryAttachList(t, None, PersonItemToken.ParseAttr.NO, 10)
         if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): 
             ni = MiscHelper.getTextValue(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO)
             if (ni is not None): 
                 pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = pli[len(pli) - 1].end_token
                 if (is_br and t.next0_ is not None and t.next0_.isChar(')')): 
                     t = t.next0_
                 return t
     return None
Esempio n. 3
0
 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.getAnalyzerData(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3158 = True
     while True:
         if first_pass3158: first_pass3158 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = WeaponItemToken.tryParseList(t, 10)
         if (its is None):
             continue
         rts = self.__tryAttach(its, False)
         if (rts is not None):
             for rt in rts:
                 rt.referent = ad.registerReferent(rt.referent)
                 kit.embedToken(rt)
                 t = (rt)
                 for s in rt.referent.slots:
                     if (s.type_name == WeaponReferent.ATTR_MODEL):
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])):
                                 li = []
                                 wrapli2638 = RefOutArgWrapper(None)
                                 inoutres2639 = Utils.tryGetValue(
                                     objs_by_model, mod, wrapli2638)
                                 li = wrapli2638.value
                                 if (not inoutres2639):
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li):
                                     li.append(rt.referent)
                                 models.addStr(mod, li, None, False)
                             if (k > 0):
                                 break
                             brand = rt.referent.getStringValue(
                                 WeaponReferent.ATTR_BRAND)
                             if (brand is None):
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == WeaponReferent.ATTR_NAME):
                         obj_by_names.add(
                             Termin._new117(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0):
         return
     t = kit.first_token
     first_pass3159 = True
     while True:
         if first_pass3159: first_pass3159 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10)
         if (br is not None):
             toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO)
             if (toks is not None
                     and toks.end_token.next0_ == br.end_token):
                 rt0 = ReferentToken(
                     Utils.asObjectOrNull(toks.termin.tag, Referent),
                     br.begin_token, br.end_token)
                 kit.embedToken(rt0)
                 t = (rt0)
                 continue
         if (not ((isinstance(t, TextToken)))):
             continue
         if (not t.chars.is_letter):
             continue
         tok = models.tryParse(t, TerminParseAttr.NO)
         if (tok is None):
             if (not t.chars.is_all_lower):
                 tok = obj_by_names.tryParse(t, TerminParseAttr.NO)
             if (tok is None):
                 continue
         if (not tok.is_whitespace_after):
             if (tok.end_token.next0_ is None
                     or not tok.end_token.next0_.isCharOf(",.)")):
                 if (not BracketHelper.isBracket(tok.end_token.next0_,
                                                 False)):
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1):
             tr = li[0]
         else:
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None):
             tit = WeaponItemToken.tryParse(tok.begin_token.previous, None,
                                            False, True)
             if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND):
                 tr.addSlot(WeaponReferent.ATTR_BRAND, tit.value, False, 0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embedToken(rt0)
             t = (rt0)
             continue
Esempio n. 4
0
 def tryParse(
         self,
         t0: 'Token',
         pars: 'TerminParseAttr' = TerminParseAttr.NO) -> 'TerminToken':
     """ Попробовать привязать термин
     
     Args:
         t0(Token): 
         fullWordsOnly: 
     
     """
     from pullenti.ner.core.MiscHelper import MiscHelper
     from pullenti.ner.core.BracketHelper import BracketHelper
     if (t0 is None):
         return None
     term = None
     if (isinstance(t0, TextToken)):
         term = (t0).term
     if (self.acronym_smart is not None
             and (((pars) &
                   (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)
             and term is not None):
         if (self.acronym_smart == term):
             if (t0.next0_ is not None and t0.next0_.isChar('.')
                     and not t0.is_whitespace_after):
                 return TerminToken._new606(t0, t0.next0_, self)
             else:
                 return TerminToken._new606(t0, t0, self)
         t1 = Utils.asObjectOrNull(t0, TextToken)
         tt = Utils.asObjectOrNull(t0, TextToken)
         i = 0
         while i < len(self.acronym):
             if (tt is None):
                 break
             term1 = tt.term
             if (len(term1) != 1 or tt.is_whitespace_after):
                 break
             if (i > 0 and tt.is_whitespace_before):
                 break
             if (term1[0] != self.acronym[i]):
                 break
             if (tt.next0_ is None or not tt.next0_.isChar('.')):
                 break
             t1 = (Utils.asObjectOrNull(tt.next0_, TextToken))
             tt = (Utils.asObjectOrNull(tt.next0_.next0_, TextToken))
             i += 1
         if (i >= len(self.acronym)):
             return TerminToken._new606(t0, t1, self)
     if (self.acronym is not None and term is not None
             and self.acronym == term):
         if (t0.chars.is_all_upper or self.acronym_can_be_lower
                 or ((not t0.chars.is_all_lower and len(term) >= 3))):
             return TerminToken._new606(t0, t0, self)
     if (self.acronym is not None and t0.chars.is_last_lower
             and t0.length_char > 3):
         if (t0.isValue(self.acronym, None)):
             return TerminToken._new606(t0, t0, self)
     cou = 0
     i = 0
     while i < len(self.terms):
         if (self.terms[i].is_hiphen):
             cou -= 1
         else:
             cou += 1
         i += 1
     if (len(self.terms) > 0
             and ((not self.ignore_terms_order or cou == 1))):
         t1 = t0
         tt = t0
         e0_ = None
         eup = None
         ok = True
         mc = None
         dont_change_mc = False
         i = 0
         first_pass2812 = True
         while True:
             if first_pass2812: first_pass2812 = False
             else: i += 1
             if (not (i < len(self.terms))): break
             if (self.terms[i].is_hiphen):
                 continue
             if (tt is not None and tt.is_hiphen and i > 0):
                 tt = tt.next0_
             if (i > 0 and tt is not None):
                 if ((((pars) & (TerminParseAttr.IGNOREBRACKETS))) !=
                     (TerminParseAttr.NO) and not tt.chars.is_letter
                         and BracketHelper.isBracket(tt, False)):
                     tt = tt.next0_
             if (((((pars) & (TerminParseAttr.CANBEGEOOBJECT))) !=
                  (TerminParseAttr.NO) and i > 0 and
                  (isinstance(tt, ReferentToken)))
                     and tt.getReferent().type_name == "GEO"):
                 tt = tt.next0_
             if ((isinstance(tt, ReferentToken)) and e0_ is None):
                 eup = tt
                 e0_ = (tt).end_token
                 tt = (tt).begin_token
             if (tt is None):
                 ok = False
                 break
             if (not self.terms[i].checkByToken(tt)):
                 if (tt.next0_ is not None and tt.isChar('.')
                         and self.terms[i].checkByToken(tt.next0_)):
                     tt = tt.next0_
                 elif (((i > 0 and tt.next0_ is not None and
                         (isinstance(tt, TextToken))) and
                        ((tt.morph.class0_.is_preposition
                          or MiscHelper.isEngArticle(tt)))
                        and self.terms[i].checkByToken(tt.next0_))
                       and not self.terms[i - 1].is_pattern_any):
                     tt = tt.next0_
                 else:
                     ok = False
                     if (((i + 2) < len(self.terms))
                             and self.terms[i + 1].is_hiphen
                             and self.terms[i + 2].checkByPrefToken(
                                 self.terms[i],
                                 Utils.asObjectOrNull(tt, TextToken))):
                         i += 2
                         ok = True
                     elif (((not tt.is_whitespace_after
                             and tt.next0_ is not None and
                             (isinstance(tt, TextToken))) and
                            (tt).length_char == 1
                            and tt.next0_.isCharOf("\"'`’“”"))
                           and not tt.next0_.is_whitespace_after
                           and (isinstance(tt.next0_.next0_, TextToken))):
                         if (self.terms[i].checkByStrPrefToken(
                             (tt).term,
                                 Utils.asObjectOrNull(
                                     tt.next0_.next0_, TextToken))):
                             ok = True
                             tt = tt.next0_.next0_
                     if (not ok):
                         if (i > 0 and
                             (((pars) &
                               (TerminParseAttr.IGNORESTOPWORDS))) !=
                             (TerminParseAttr.NO)):
                             if (isinstance(tt, TextToken)):
                                 if (not tt.chars.is_letter):
                                     tt = tt.next0_
                                     i -= 1
                                     continue
                                 mc1 = tt.getMorphClassInDictionary()
                                 if (mc1.is_conjunction
                                         or mc1.is_preposition):
                                     tt = tt.next0_
                                     i -= 1
                                     continue
                             if (isinstance(tt, NumberToken)):
                                 tt = tt.next0_
                                 i -= 1
                                 continue
                         break
             if (tt.morph.items_count > 0 and not dont_change_mc):
                 mc = MorphCollection(tt.morph)
                 if (((mc.class0_.is_noun or mc.class0_.is_verb))
                         and not mc.class0_.is_adjective):
                     if (((i + 1) < len(self.terms))
                             and self.terms[i + 1].is_hiphen):
                         pass
                     else:
                         dont_change_mc = True
             if (tt.morph.class0_.is_preposition
                     or tt.morph.class0_.is_conjunction):
                 dont_change_mc = True
             if (tt == e0_):
                 tt = eup
                 eup = (None)
                 e0_ = (None)
             if (e0_ is None):
                 t1 = tt
             tt = tt.next0_
         if (ok and i >= len(self.terms)):
             if (t1.next0_ is not None and t1.next0_.isChar('.')
                     and self.abridges is not None):
                 for a in self.abridges:
                     if (a.tryAttach(t0) is not None):
                         t1 = t1.next0_
                         break
             if (t0 != t1 and t0.morph.class0_.is_adjective):
                 npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None and npt.end_char <= t1.end_char):
                     mc = npt.morph
             return TerminToken._new611(t0, t1, mc)
     if (len(self.terms) > 1 and self.ignore_terms_order):
         terms_ = list(self.terms)
         t1 = t0
         tt = t0
         while len(terms_) > 0:
             if (tt != t0 and tt is not None and tt.is_hiphen):
                 tt = tt.next0_
             if (tt is None):
                 break
             j = 0
             while j < len(terms_):
                 if (terms_[j].checkByToken(tt)):
                     break
                 j += 1
             if (j >= len(terms_)):
                 if (tt != t0 and (((pars) &
                                    (TerminParseAttr.IGNORESTOPWORDS))) !=
                     (TerminParseAttr.NO)):
                     if (isinstance(tt, TextToken)):
                         if (not tt.chars.is_letter):
                             tt = tt.next0_
                             continue
                         mc1 = tt.getMorphClassInDictionary()
                         if (mc1.is_conjunction or mc1.is_preposition):
                             tt = tt.next0_
                             continue
                     if (isinstance(tt, NumberToken)):
                         tt = tt.next0_
                         continue
                 break
             del terms_[j]
             t1 = tt
             tt = tt.next0_
         for i in range(len(terms_) - 1, -1, -1):
             if (terms_[i].is_hiphen):
                 del terms_[i]
         if (len(terms_) == 0):
             return TerminToken(t0, t1)
     if (self.abridges is not None and
         (((pars) &
           (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)):
         res = None
         for a in self.abridges:
             r = a.tryAttach(t0)
             if (r is None):
                 continue
             if (r.abridge_without_point and len(self.terms) > 0):
                 if (not ((isinstance(t0, TextToken)))):
                     continue
                 if (a.parts[0].value != (t0).term):
                     continue
             if (res is None or (res.length_char < r.length_char)):
                 res = r
         if (res is not None):
             return res
     return None
Esempio n. 5
0
 def getNameEx(begin: 'Token',
               end: 'Token',
               cla: 'MorphClass',
               mc: 'MorphCase',
               gender: 'MorphGender' = MorphGender.UNDEFINED,
               ignore_brackets_and_hiphens: bool = False,
               ignore_geo_referent: bool = False) -> str:
     if (end is None or begin is None):
         return None
     if (begin.end_char > end.begin_char and begin != end):
         return None
     res = io.StringIO()
     prefix = None
     t = begin
     first_pass2809 = True
     while True:
         if first_pass2809: first_pass2809 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= end.end_char)): break
         if (res.tell() > 1000):
             break
         if (t.is_table_control_char):
             continue
         if (ignore_brackets_and_hiphens):
             if (BracketHelper.isBracket(t, False)):
                 if (t == end):
                     break
                 if (t.isCharOf("(<[")):
                     br = BracketHelper.tryParse(t, BracketParseAttr.NO,
                                                 100)
                     if (br is not None and br.end_char <= end.end_char):
                         tmp = ProperNameHelper.getNameEx(
                             br.begin_token.next0_, br.end_token.previous,
                             MorphClass.UNDEFINED, MorphCase.UNDEFINED,
                             MorphGender.UNDEFINED,
                             ignore_brackets_and_hiphens, False)
                         if (tmp is not None):
                             if ((br.end_char == end.end_char
                                  and br.begin_token.next0_
                                  == br.end_token.previous and
                                  not br.begin_token.next0_.chars.is_letter)
                                     and not ((isinstance(
                                         br.begin_token.next0_,
                                         ReferentToken)))):
                                 pass
                             else:
                                 print(" {0}{1}{2}".format(
                                     t.getSourceText(), tmp,
                                     br.end_token.getSourceText()),
                                       end="",
                                       file=res,
                                       flush=True)
                         t = br.end_token
                 continue
             if (t.is_hiphen):
                 if (t == end):
                     break
                 elif (t.is_whitespace_before or t.is_whitespace_after):
                     continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (not ignore_brackets_and_hiphens):
                 if ((tt.next0_ is not None and tt.next0_.is_hiphen and
                      (isinstance(tt.next0_.next0_, TextToken)))
                         and tt != end and tt.next0_ != end):
                     if (prefix is None):
                         prefix = tt.term
                     else:
                         prefix = "{0}-{1}".format(prefix, tt.term)
                     t = tt.next0_
                     if (t == end):
                         break
                     else:
                         continue
             s = None
             if (cla.value != (0) or not mc.is_undefined
                     or gender != MorphGender.UNDEFINED):
                 for wff in tt.morph.items:
                     wf = Utils.asObjectOrNull(wff, MorphWordForm)
                     if (wf is None):
                         continue
                     if (cla.value != (0)):
                         if ((((wf.class0_.value) & (cla.value))) == 0):
                             continue
                     if (not mc.is_undefined):
                         if (((wf.case_) & mc).is_undefined):
                             continue
                     if (gender != MorphGender.UNDEFINED):
                         if ((((wf.gender) &
                               (gender))) == (MorphGender.UNDEFINED)):
                             continue
                     if (s is None or wf.normal_case == tt.term):
                         s = wf.normal_case
                 if (s is None and gender != MorphGender.UNDEFINED):
                     for wff in tt.morph.items:
                         wf = Utils.asObjectOrNull(wff, MorphWordForm)
                         if (wf is None):
                             continue
                         if (cla.value != (0)):
                             if ((((wf.class0_.value) & (cla.value))) == 0):
                                 continue
                         if (not mc.is_undefined):
                             if (((wf.case_) & mc).is_undefined):
                                 continue
                         if (s is None or wf.normal_case == tt.term):
                             s = wf.normal_case
             if (s is None):
                 s = tt.term
                 if (tt.chars.is_last_lower and tt.length_char > 2):
                     s = tt.getSourceText()
                     for i in range(len(s) - 1, -1, -1):
                         if (str.isupper(s[i])):
                             s = s[0:0 + i + 1]
                             break
             if (prefix is not None):
                 delim = "-"
                 if (ignore_brackets_and_hiphens):
                     delim = " "
                 s = "{0}{1}{2}".format(prefix, delim, s)
             prefix = (None)
             if (res.tell() > 0 and len(s) > 0):
                 if (str.isalnum(s[0])):
                     ch0 = Utils.getCharAtStringIO(res, res.tell() - 1)
                     if (ch0 == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 elif (not ignore_brackets_and_hiphens
                       and BracketHelper.canBeStartOfSequence(
                           tt, False, False)):
                     print(' ', end="", file=res)
             print(s, end="", file=res)
         elif (isinstance(t, NumberToken)):
             if (res.tell() > 0):
                 if (not t.is_whitespace_before and Utils.getCharAtStringIO(
                         res,
                         res.tell() - 1) == '-'):
                     pass
                 else:
                     print(' ', end="", file=res)
             nt = Utils.asObjectOrNull(t, NumberToken)
             if ((t.morph.class0_.is_adjective
                  and nt.typ == NumberSpellingType.WORDS
                  and nt.begin_token == nt.end_token)
                     and (isinstance(nt.begin_token, TextToken))):
                 print((nt.begin_token).term, end="", file=res)
             else:
                 print(nt.value, end="", file=res)
         elif (isinstance(t, MetaToken)):
             if ((ignore_geo_referent and t != begin
                  and t.getReferent() is not None)
                     and t.getReferent().type_name == "GEO"):
                 continue
             s = ProperNameHelper.getNameEx(
                 (t).begin_token, (t).end_token, cla, mc, gender,
                 ignore_brackets_and_hiphens, ignore_geo_referent)
             if (not Utils.isNullOrEmpty(s)):
                 if (res.tell() > 0):
                     if (not t.is_whitespace_before
                             and Utils.getCharAtStringIO(
                                 res,
                                 res.tell() - 1) == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 print(s, end="", file=res)
         if (t == end):
             break
     if (res.tell() == 0):
         return None
     return Utils.toStringStringIO(res)
Esempio n. 6
0
 def __TryParse(t: 'Token',
                prev: 'WeaponItemToken',
                after_conj: bool,
                attach_high: bool = False) -> 'WeaponItemToken':
     if (t is None):
         return None
     if (BracketHelper.isBracket(t, True)):
         wit = WeaponItemToken.__TryParse(t.next0_, prev, after_conj,
                                          attach_high)
         if (wit is not None):
             if (wit.end_token.next0_ is None):
                 wit.begin_token = t
                 return wit
             if (BracketHelper.isBracket(wit.end_token.next0_, True)):
                 wit.begin_token = t
                 wit.end_token = wit.end_token.next0_
                 return wit
     tok = WeaponItemToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO)
     if (tok is not None):
         res = WeaponItemToken(t, tok.end_token)
         res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs))
         if (res.typ == WeaponItemToken.Typs.NOUN):
             res.value = tok.termin.canonic_text
             if (tok.termin.tag2 is not None):
                 res.is_doubt = True
             tt = res.end_token.next0_
             first_pass3156 = True
             while True:
                 if first_pass3156: first_pass3156 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.whitespaces_before_count > 2):
                     break
                 wit = WeaponItemToken.__TryParse(tt, None, False, False)
                 if (wit is not None):
                     if (wit.typ == WeaponItemToken.Typs.BRAND):
                         res.__inner_tokens.append(wit)
                         tt = wit.end_token
                         res.end_token = tt
                         continue
                     break
                 if (not ((isinstance(tt, TextToken)))):
                     break
                 mc = tt.getMorphClassInDictionary()
                 if (mc == MorphClass.ADJECTIVE):
                     if (res.alt_value is None):
                         res.alt_value = res.value
                     if (res.alt_value.endswith(res.value)):
                         res.alt_value = res.alt_value[0:0 +
                                                       len(res.alt_value) -
                                                       len(res.value)]
                     res.alt_value = "{0}{1} {2}".format(
                         res.alt_value, (tt).term, res.value)
                     res.end_token = tt
                     continue
                 break
             return res
         if (res.typ == WeaponItemToken.Typs.BRAND
                 or res.typ == WeaponItemToken.Typs.NAME):
             res.value = tok.termin.canonic_text
             return res
         if (res.typ == WeaponItemToken.Typs.MODEL):
             res.value = tok.termin.canonic_text
             if (isinstance(tok.termin.tag2, list)):
                 li = Utils.asObjectOrNull(tok.termin.tag2, list)
                 for to in li:
                     wit = WeaponItemToken._new2600(
                         t, tok.end_token,
                         Utils.valToEnum(to.tag, WeaponItemToken.Typs),
                         to.canonic_text, tok.begin_token == tok.end_token)
                     res.__inner_tokens.append(wit)
                     if (to.additional_vars is not None
                             and len(to.additional_vars) > 0):
                         wit.alt_value = to.additional_vars[0].canonic_text
             res.__correctModel()
             return res
     nnn = MiscHelper.checkNumberPrefix(t)
     if (nnn is not None):
         tit = TransItemToken._attachNumber(nnn, True)
         if (tit is not None):
             res = WeaponItemToken._new2601(t, tit.end_token,
                                            WeaponItemToken.Typs.NUMBER)
             res.value = tit.value
             res.alt_value = tit.alt_value
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter
          and t.chars.is_all_upper) and (t.length_char < 4)):
         if ((t.next0_ is not None and
              ((t.next0_.is_hiphen or t.next0_.isChar('.'))) and
              (t.next0_.whitespaces_after_count < 2))
                 and (isinstance(t.next0_.next0_, NumberToken))):
             res = WeaponItemToken._new2602(t, t.next0_,
                                            WeaponItemToken.Typs.MODEL,
                                            True)
             res.value = (t).term
             res.__correctModel()
             return res
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after):
             res = WeaponItemToken._new2602(t, t,
                                            WeaponItemToken.Typs.MODEL,
                                            True)
             res.value = (t).term
             res.__correctModel()
             return res
         if ((t).term == "СП" and (t.whitespaces_after_count < 3)
                 and (isinstance(t.next0_, TextToken))):
             pp = WeaponItemToken.__TryParse(t.next0_, None, False, False)
             if (pp is not None
                     and ((pp.typ == WeaponItemToken.Typs.MODEL
                           or pp.typ == WeaponItemToken.Typs.BRAND))):
                 res = WeaponItemToken._new2601(t, t,
                                                WeaponItemToken.Typs.NOUN)
                 res.value = "ПИСТОЛЕТ"
                 res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"
                 return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter
          and not t.chars.is_all_lower) and t.length_char > 2):
         ok = False
         if (prev is not None
                 and ((prev.typ == WeaponItemToken.Typs.NOUN
                       or prev.typ == WeaponItemToken.Typs.MODEL
                       or prev.typ == WeaponItemToken.Typs.BRAND))):
             ok = True
         elif (prev is None and t.previous is not None
               and t.previous.is_comma_and):
             ok = True
         if (ok):
             res = WeaponItemToken._new2602(t, t, WeaponItemToken.Typs.NAME,
                                            True)
             res.value = (t).term
             if ((t.next0_ is not None and t.next0_.is_hiphen and
                  (isinstance(t.next0_.next0_, TextToken)))
                     and t.next0_.next0_.chars == t.chars):
                 res.value = "{0}-{1}".format(res.value,
                                              (t.next0_.next0_).term)
                 res.end_token = t.next0_.next0_
             if (prev is not None
                     and prev.typ == WeaponItemToken.Typs.NOUN):
                 res.typ = WeaponItemToken.Typs.BRAND
             if (res.end_token.next0_ is not None
                     and res.end_token.next0_.is_hiphen and
                 (isinstance(res.end_token.next0_.next0_, NumberToken))):
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correctModel()
             elif (not res.end_token.is_whitespace_after
                   and (isinstance(res.end_token.next0_, NumberToken))):
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correctModel()
             return res
     return None
Esempio n. 7
0
 def tryAttach(t: 'Token',
               p1: 'InstrumentParticipant' = None,
               p2: 'InstrumentParticipant' = None,
               is_contract: bool = False) -> 'ParticipantToken':
     if (t is None):
         return None
     tt = t
     br = False
     if (p1 is None and p2 is None and is_contract):
         r1 = t.getReferent()
         if ((r1 is not None and t.next0_ is not None
              and t.next0_.is_comma_and)
                 and (isinstance(t.next0_.next0_, ReferentToken))):
             r2 = t.next0_.next0_.getReferent()
             if (r1.type_name == r2.type_name):
                 ttt = t.next0_.next0_.next0_
                 refs = list()
                 refs.append(r1)
                 refs.append(r2)
                 first_pass3014 = True
                 while True:
                     if first_pass3014: first_pass3014 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if ((ttt.is_comma_and and ttt.next0_ is not None
                          and ttt.next0_.getReferent() is not None)
                             and ttt.next0_.getReferent().type_name
                             == r1.type_name):
                         ttt = ttt.next0_
                         if (not ttt.getReferent() in refs):
                             refs.append(ttt.getReferent())
                         continue
                     break
                 first_pass3015 = True
                 while True:
                     if first_pass3015: first_pass3015 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_comma or ttt.morph.class0_.is_preposition):
                         continue
                     if ((ttt.isValue("ИМЕНОВАТЬ", None)
                          or ttt.isValue("ДАЛЬНЕЙШИЙ", None)
                          or ttt.isValue("ДАЛЕЕ", None))
                             or ttt.isValue("ТЕКСТ", None)):
                         continue
                     if (ttt.isValue("ДОГОВАРИВАТЬСЯ", None)):
                         continue
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None
                             and npt.noun.isValue("СТОРОНА", None)
                             and npt.morph.number != MorphNumber.SINGULAR):
                         re = ParticipantToken._new1467(
                             t, npt.end_token,
                             ParticipantToken.Kinds.NAMEDASPARTS)
                         re.parts = refs
                         return re
                     break
         if ((isinstance(r1, OrganizationReferent))
                 or (isinstance(r1, PersonReferent))):
             has_br = False
             has_named = False
             if (isinstance(r1, PersonReferent)):
                 if (t.previous is not None
                         and t.previous.isValue("ЛИЦО", None)):
                     return None
             elif (t.previous is not None
                   and ((t.previous.isValue("ВЫДАВАТЬ", None)
                         or t.previous.isValue("ВЫДАТЬ", None)))):
                 return None
             ttt = (t).begin_token
             while ttt is not None and (ttt.end_char < t.end_char):
                 if (ttt.isChar('(')):
                     has_br = True
                 elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue(
                         "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None))
                       or ttt.isValue("ТЕКСТ", None)):
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition
                        or ttt.is_hiphen) or ttt.isChar(':')):
                     pass
                 elif (isinstance(ttt, ReferentToken)):
                     pass
                 elif (has_br or has_named):
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
                     if (npt is None):
                         break
                     if (has_br):
                         if (npt.end_token.next0_ is None
                                 or not npt.end_token.next0_.isChar(')')):
                             break
                     if (not has_named):
                         if (ParticipantToken.M_ONTOLOGY.tryParse(
                                 ttt, TerminParseAttr.NO) is None):
                             break
                     re = ParticipantToken._new1467(
                         t, t, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = npt.getNormalCaseText(None, True,
                                                    MorphGender.UNDEFINED,
                                                    False)
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 ttt = ttt.next0_
             has_br = False
             has_named = False
             end_side = None
             brr = None
             add_refs = None
             ttt = t.next0_
             first_pass3016 = True
             while True:
                 if first_pass3016: first_pass3016 = False
                 else: ttt = ttt.next0_
                 if (not (ttt is not None)): break
                 if ((isinstance(ttt, NumberToken))
                         and (isinstance(ttt.next0_, TextToken))
                         and (ttt.next0_).term == "СТОРОНЫ"):
                     ttt = ttt.next0_
                     end_side = ttt
                     if (ttt.next0_ is not None and ttt.next0_.is_comma):
                         ttt = ttt.next0_
                     if (ttt.next0_ is not None and ttt.next0_.is_and):
                         break
                 if (brr is not None and ttt.begin_char > brr.end_char):
                     brr = (None)
                 if (BracketHelper.canBeStartOfSequence(ttt, False, False)):
                     brr = BracketHelper.tryParse(ttt, BracketParseAttr.NO,
                                                  100)
                     if (brr is not None and (brr.length_char < 7)
                             and ttt.isChar('(')):
                         ttt = brr.end_token
                         brr = (None)
                         continue
                 elif ((ttt.isValue("ИМЕНОВАТЬ", None) or ttt.isValue(
                         "ДАЛЬНЕЙШИЙ", None) or ttt.isValue("ДАЛЕЕ", None))
                       or ttt.isValue("ТЕКСТ", None)):
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition
                        or ttt.is_hiphen) or ttt.isChar(':')):
                     pass
                 elif (brr is not None or has_named):
                     if (BracketHelper.canBeStartOfSequence(
                             ttt, True, False)):
                         ttt = ttt.next0_
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0)
                     typ22 = None
                     if (npt is not None):
                         ttt = npt.end_token
                         if (npt.end_token.isValue("ДОГОВОР", None)):
                             continue
                     else:
                         ttok = None
                         if (isinstance(ttt, MetaToken)):
                             ttok = ParticipantToken.M_ONTOLOGY.tryParse(
                                 (ttt).begin_token, TerminParseAttr.NO)
                         if (ttok is not None):
                             typ22 = ttok.termin.canonic_text
                         elif (has_named
                               and ttt.morph.class0_.is_adjective):
                             typ22 = ttt.getNormalCaseText(
                                 MorphClass.ADJECTIVE, False,
                                 MorphGender.UNDEFINED, False)
                         elif (brr is not None):
                             continue
                         else:
                             break
                     if (BracketHelper.canBeEndOfSequence(
                             ttt.next0_, True, None, False)):
                         ttt = ttt.next0_
                     if (brr is not None):
                         if (ttt.next0_ is None):
                             ttt = brr.end_token
                             continue
                         ttt = ttt.next0_
                     if (not has_named and typ22 is None):
                         if (ParticipantToken.M_ONTOLOGY.tryParse(
                                 npt.begin_token, TerminParseAttr.NO) is
                                 None):
                             break
                     re = ParticipantToken._new1467(
                         t, ttt, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = (Utils.ifNotNull(
                         typ22,
                         npt.getNormalCaseText(None, True,
                                               MorphGender.UNDEFINED,
                                               False)))
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 elif ((ttt.isValue("ЗАРЕГИСТРИРОВАННЫЙ", None)
                        or ttt.isValue("КАЧЕСТВО", None)
                        or ttt.isValue("ПРОЖИВАЮЩИЙ", None))
                       or ttt.isValue("ЗАРЕГ", None)):
                     pass
                 elif (ttt.getReferent() == r1):
                     pass
                 elif (
                     (isinstance(ttt.getReferent(), PersonIdentityReferent))
                         or
                     (isinstance(ttt.getReferent(), AddressReferent))):
                     if (add_refs is None):
                         add_refs = list()
                     add_refs.append(ttt.getReferent())
                 else:
                     prr = ttt.kit.processReferent("PERSONPROPERTY", ttt)
                     if (prr is not None):
                         ttt = prr.end_token
                         continue
                     if (isinstance(ttt.getReferent(), GeoReferent)):
                         continue
                     npt = NounPhraseHelper.tryParse(
                         ttt, NounPhraseParseAttr.NO, 0)
                     if (npt is not None):
                         if ((npt.noun.isValue("МЕСТО", None)
                              or npt.noun.isValue("ЖИТЕЛЬСТВО", None)
                              or npt.noun.isValue("ПРЕДПРИНИМАТЕЛЬ", None))
                                 or npt.noun.isValue("ПОЛ", None)
                                 or npt.noun.isValue("РОЖДЕНИЕ", None)):
                             ttt = npt.end_token
                             continue
                     if (ttt.is_newline_before):
                         break
                     if (ttt.length_char < 3):
                         continue
                     mc = ttt.getMorphClassInDictionary()
                     if (mc.is_adverb or mc.is_adjective):
                         continue
                     if (ttt.chars.is_all_upper):
                         continue
                     break
             if (end_side is not None
                     or ((add_refs is not None and t.previous is not None
                          and t.previous.is_and))):
                 re = ParticipantToken._new1467(
                     t, Utils.ifNotNull(end_side, t),
                     ParticipantToken.Kinds.NAMEDAS)
                 re.typ = (None)
                 re.parts = list()
                 re.parts.append(r1)
                 if (add_refs is not None):
                     re.parts.extend(add_refs)
                 return re
         too = ParticipantToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO)
         if (too is not None):
             if ((isinstance(t.previous, TextToken))
                     and t.previous.isValue("ЛИЦО", None)):
                 too = (None)
         if (too is not None and too.termin.tag is not None
                 and too.termin.canonic_text != "СТОРОНА"):
             tt1 = too.end_token.next0_
             if (tt1 is not None):
                 if (tt1.is_hiphen or tt1.isChar(':')):
                     tt1 = tt1.next0_
             if (isinstance(tt1, ReferentToken)):
                 r1 = tt1.getReferent()
                 if ((isinstance(r1, PersonReferent))
                         or (isinstance(r1, OrganizationReferent))):
                     re = ParticipantToken._new1467(
                         t, tt1, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = too.termin.canonic_text
                     re.parts = list()
                     re.parts.append(r1)
                     return re
     add_typ1 = (None if p1 is None else p1.typ)
     add_typ2 = (None if p2 is None else p2.typ)
     if (BracketHelper.canBeStartOfSequence(tt, False, False)
             and tt.next0_ is not None):
         br = True
         tt = tt.next0_
     term1 = None
     term2 = None
     if (add_typ1 is not None and add_typ1.find(' ') > 0
             and not add_typ1.startswith("СТОРОНА")):
         term1 = Termin(add_typ1)
     if (add_typ2 is not None and add_typ2.find(' ') > 0
             and not add_typ2.startswith("СТОРОНА")):
         term2 = Termin(add_typ2)
     named = False
     typ_ = None
     t1 = None
     t0 = tt
     first_pass3017 = True
     while True:
         if first_pass3017: first_pass3017 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition and typ_ is not None):
             continue
         if (tt.isCharOf("(:)") or tt.is_hiphen):
             continue
         if (tt.is_table_control_char):
             break
         if (tt.is_newline_before and tt != t0):
             if (isinstance(tt, NumberToken)):
                 break
             if ((isinstance(tt, TextToken))
                     and (isinstance(tt.previous, TextToken))):
                 if (tt.previous.isValue((tt).term, None)):
                     break
         if (BracketHelper.isBracket(tt, False)):
             continue
         tok = (ParticipantToken.M_ONTOLOGY.tryParse(
             tt, TerminParseAttr.NO)
                if ParticipantToken.M_ONTOLOGY is not None else None)
         if (tok is not None and (isinstance(tt.previous, TextToken))):
             if (tt.previous.isValue("ЛИЦО", None)):
                 return None
         if (tok is None):
             if (add_typ1 is not None
                     and ((MiscHelper.isNotMoreThanOneError(add_typ1, tt) or
                           ((((isinstance(tt, MetaToken))) and
                             (tt).begin_token.isValue(add_typ1, None)))))):
                 if (typ_ is not None):
                     if (not ParticipantToken.__isTypesEqual(
                             add_typ1, typ_)):
                         break
                 typ_ = add_typ1
                 t1 = tt
                 continue
             if (add_typ2 is not None
                     and ((MiscHelper.isNotMoreThanOneError(add_typ2, tt) or
                           ((((isinstance(tt, MetaToken))) and
                             (tt).begin_token.isValue(add_typ2, None)))))):
                 if (typ_ is not None):
                     if (not ParticipantToken.__isTypesEqual(
                             add_typ2, typ_)):
                         break
                 typ_ = add_typ2
                 t1 = tt
                 continue
             if (tt.chars.is_letter):
                 if (term1 is not None):
                     tok1 = term1.tryParse(tt, TerminParseAttr.NO)
                     if (tok1 is not None):
                         if (typ_ is not None):
                             if (not ParticipantToken.__isTypesEqual(
                                     add_typ1, typ_)):
                                 break
                         typ_ = add_typ1
                         tt = tok1.end_token
                         t1 = tt
                         continue
                 if (term2 is not None):
                     tok2 = term2.tryParse(tt, TerminParseAttr.NO)
                     if (tok2 is not None):
                         if (typ_ is not None):
                             if (not ParticipantToken.__isTypesEqual(
                                     add_typ2, typ_)):
                                 break
                         typ_ = add_typ2
                         tt = tok2.end_token
                         t1 = tt
                         continue
                 if (named and tt.getMorphClassInDictionary().is_noun):
                     if (not tt.chars.is_all_lower
                             or BracketHelper.isBracket(tt.previous, True)):
                         if (DecreeToken.isKeyword(tt, False) is None):
                             val = tt.getNormalCaseText(
                                 MorphClass.NOUN, True,
                                 MorphGender.UNDEFINED, False)
                             if (typ_ is not None):
                                 if (not ParticipantToken.__isTypesEqual(
                                         typ_, val)):
                                     break
                             typ_ = val
                             t1 = tt
                             continue
             if (named and typ_ is None and is_contract):
                 if ((isinstance(tt, TextToken))
                         and tt.chars.is_cyrillic_letter
                         and tt.chars.is_capital_upper):
                     dc = tt.getMorphClassInDictionary()
                     if (dc.is_undefined or dc.is_noun):
                         dt = DecreeToken.tryAttach(tt, None, False)
                         ok = True
                         if (dt is not None):
                             ok = False
                         elif (tt.isValue("СТОРОНА", None)):
                             ok = False
                         if (ok):
                             typ_ = (tt).getLemma()
                             t1 = tt
                             continue
                     if (dc.is_adjective):
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is not None and len(npt.adjectives) > 0
                                 and npt.noun.getMorphClassInDictionary(
                                 ).is_noun):
                             typ_ = npt.getNormalCaseText(
                                 None, True, MorphGender.UNDEFINED, False)
                             t1 = npt.end_token
                             continue
             if (tt == t):
                 break
             if ((isinstance(tt, NumberToken)) or tt.isChar('.')):
                 break
             if (tt.length_char < 4):
                 if (typ_ is not None):
                     continue
             break
         if (tok.termin.tag is None):
             named = True
         else:
             if (typ_ is not None):
                 break
             if (tok.termin.canonic_text == "СТОРОНА"):
                 tt1 = tt.next0_
                 if (tt1 is not None and tt1.is_hiphen):
                     tt1 = tt1.next0_
                 if (not ((isinstance(tt1, NumberToken)))):
                     break
                 if (tt1.is_newline_before):
                     break
                 typ_ = "{0} {1}".format(tok.termin.canonic_text,
                                         (tt1).value)
                 t1 = tt1
             else:
                 typ_ = tok.termin.canonic_text
                 t1 = tok.end_token
             break
         tt = tok.end_token
     if (typ_ is None):
         return None
     if (not named and t1 != t and not typ_.startswith("СТОРОНА")):
         if (not ParticipantToken.__isTypesEqual(typ_, add_typ1)
                 and not ParticipantToken.__isTypesEqual(typ_, add_typ2)):
             return None
     if (BracketHelper.canBeEndOfSequence(t1.next0_, False, None, False)):
         t1 = t1.next0_
         if (not t.is_whitespace_before
                 and BracketHelper.canBeStartOfSequence(
                     t.previous, False, False)):
             t = t.previous
     elif (BracketHelper.canBeStartOfSequence(t, False, False)
           and BracketHelper.canBeEndOfSequence(t1.next0_, True, t, True)):
         t1 = t1.next0_
     if (br and t1.next0_ is not None and BracketHelper.canBeEndOfSequence(
             t1.next0_, False, None, False)):
         t1 = t1.next0_
     res = ParticipantToken._new1472(
         t, t1, (ParticipantToken.Kinds.NAMEDAS
                 if named else ParticipantToken.Kinds.PURE), typ_)
     if (t.isChar(':')):
         res.begin_token = t.next0_
     return res
Esempio n. 8
0
 def attachFirst(self, p: 'InstrumentParticipant', min_char: int,
                 max_char: int) -> 'ReferentToken':
     tt0 = self.begin_token
     refs = list()
     t = tt0.previous
     first_pass3019 = True
     while True:
         if first_pass3019: first_pass3019 = False
         else: t = t.previous
         if (not (t is not None and t.begin_char >= min_char)): break
         if (t.is_newline_after):
             if (t.newlines_after_count > 1):
                 break
             if (isinstance(t.next0_, NumberToken)):
                 break
         tt = ParticipantToken.__tryAttachContractGround(t, p, False)
         if (tt is not None):
             continue
         r = t.getReferent()
         if (((((isinstance(r, OrganizationReferent)) or
                (isinstance(r, PhoneReferent)) or
                (isinstance(r, PersonReferent))) or
               (isinstance(r, PersonPropertyReferent)) or
               (isinstance(r, AddressReferent))) or
              (isinstance(r, UriReferent)) or
              (isinstance(r, PersonIdentityReferent)))
                 or (isinstance(r, BankDataReferent))):
             if (not r in refs):
                 refs.insert(0, r)
             tt0 = t
     if (len(refs) > 0):
         for r in refs:
             if (r != refs[0]
                     and (isinstance(refs[0], OrganizationReferent))
                     and (((isinstance(r, PersonReferent)) or
                           (isinstance(r, PersonPropertyReferent))))):
                 p.addSlot(InstrumentParticipant.ATTR_DELEGATE, r, False, 0)
             else:
                 p.addSlot(InstrumentParticipant.ATTR_REF, r, False, 0)
     rt = ReferentToken(p, tt0, self.end_token)
     t = self.end_token.next0_
     if (BracketHelper.isBracket(t, False)):
         t = t.next0_
     if (t is not None and t.isChar(',')):
         t = t.next0_
     first_pass3020 = True
     while True:
         if first_pass3020: first_pass3020 = False
         else: t = t.next0_
         if (not (t is not None and
                  ((max_char == 0 or t.begin_char <= max_char)))):
             break
         if (t.isValue("СТОРОНА", None)):
             break
         r = t.getReferent()
         if (((((isinstance(r, OrganizationReferent)) or
                (isinstance(r, PhoneReferent)) or
                (isinstance(r, PersonReferent))) or
               (isinstance(r, PersonPropertyReferent)) or
               (isinstance(r, AddressReferent))) or
              (isinstance(r, UriReferent)) or
              (isinstance(r, PersonIdentityReferent)))
                 or (isinstance(r, BankDataReferent))):
             if ((((isinstance(r, PersonPropertyReferent))
                   and t.next0_ is not None and t.next0_.is_comma) and
                  (isinstance(t.next0_.next0_, ReferentToken)) and
                  (isinstance(t.next0_.next0_.getReferent(),
                              PersonReferent)))
                     and not t.next0_.is_newline_after):
                 pe = Utils.asObjectOrNull(t.next0_.next0_.getReferent(),
                                           PersonReferent)
                 pe.addSlot(PersonReferent.ATTR_ATTR, r, False, 0)
                 r = (pe)
                 t = t.next0_.next0_
             is_delegate = False
             if (t.previous.isValue("ЛИЦО", None)
                     or t.previous.isValue("ИМЯ", None)):
                 is_delegate = True
             if (t.previous.isValue("КОТОРЫЙ", None)
                     and t.previous.previous is not None
                     and ((t.previous.previous.isValue("ИМЯ", None)
                           or t.previous.previous.isValue("ЛИЦО", None)))):
                 is_delegate = True
             p.addSlot(
                 (InstrumentParticipant.ATTR_DELEGATE if
                  (((isinstance(r, PersonReferent)) or
                    (isinstance(r, PersonPropertyReferent))))
                  and is_delegate else InstrumentParticipant.ATTR_REF), r,
                 False, 0)
             rt.end_token = t
             continue
         tt = ParticipantToken.__tryAttachContractGround(t, p, False)
         if (tt is not None):
             rt.end_token = tt
             t = rt.end_token
             if (rt.begin_char == tt.begin_char):
                 rt.begin_token = tt
             continue
         if (t.isValue("В", None) and t.next0_ is not None
                 and t.next0_.isValue("ЛИЦО", None)):
             t = t.next0_
             continue
         if (t.isValue("ОТ", None) and t.next0_ is not None
                 and t.next0_.isValue("ИМЯ", None)):
             t = t.next0_
             continue
         if (t.isValue("ПО", None) and t.next0_ is not None
                 and t.next0_.isValue("ПОРУЧЕНИЕ", None)):
             t = t.next0_
             continue
         if (t.is_newline_before):
             break
         if (t.getMorphClassInDictionary() == MorphClass.VERB):
             if ((not t.isValue("УДОСТОВЕРЯТЬ", None)
                  and not t.isValue("ПРОЖИВАТЬ", None)
                  and not t.isValue("ЗАРЕГИСТРИРОВАТЬ", None))
                     and not t.isValue("ДЕЙСТВОВАТЬ", None)):
                 break
         if (t.is_and and t.previous is not None and t.previous.is_comma):
             break
         if (t.is_and and t.next0_.getReferent() is not None):
             if (isinstance(t.next0_.getReferent(), OrganizationReferent)):
                 break
             pe = Utils.asObjectOrNull(t.next0_.getReferent(),
                                       PersonReferent)
             if (pe is not None):
                 has_ip = False
                 for s in pe.slots:
                     if (s.type_name == PersonReferent.ATTR_ATTR):
                         if (str(s.value).startswith(
                                 "индивидуальный предприниматель")):
                             has_ip = True
                             break
                 if (has_ip):
                     break
     t = rt.begin_token
     while t is not None and t.end_char <= rt.end_char:
         tt = ParticipantToken.__tryAttachContractGround(t, p, True)
         if (tt is not None):
             if (tt.end_char > rt.end_char):
                 rt.end_token = tt
             t = tt
         t = t.next0_
     return rt
Esempio n. 9
0
 def tryParse(t: 'Token',
              loc_onto: 'IntOntologyCollection') -> 'NamedItemToken':
     if (t is None):
         return None
     if (isinstance(t, ReferentToken)):
         r = t.getReferent()
         if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or
              (isinstance(r, GeoReferent)))
                 or r.type_name == "ORGANIZATION"):
             return NamedItemToken._new1635(t, t, r, t.morph)
         return None
     typ = NamedItemToken.__m_types.tryParse(t, TerminParseAttr.NO)
     nam = NamedItemToken.__m_names.tryParse(t, TerminParseAttr.NO)
     if (typ is not None):
         if (not ((isinstance(t, TextToken)))):
             return None
         res = NamedItemToken._new1636(typ.begin_token, typ.end_token,
                                       typ.morph, typ.chars)
         res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind))
         res.type_value = typ.termin.canonic_text
         if ((nam is not None and nam.end_token == typ.end_token
              and not t.chars.is_all_lower) and (Utils.valToEnum(
                  nam.termin.tag, NamedEntityKind)) == res.kind):
             res.name_value = nam.termin.canonic_text
             res.is_wellknown = True
         return res
     if (nam is not None):
         if (nam.begin_token.chars.is_all_lower):
             return None
         res = NamedItemToken._new1636(nam.begin_token, nam.end_token,
                                       nam.morph, nam.chars)
         res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
         res.name_value = nam.termin.canonic_text
         ok = True
         if (not t.is_whitespace_before and t.previous is not None):
             ok = False
         elif (not t.is_whitespace_after and t.next0_ is not None):
             if (t.next0_.isCharOf(",.;!?")
                     and t.next0_.is_whitespace_after):
                 pass
             else:
                 ok = False
         if (ok):
             res.is_wellknown = True
             res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str))
         return res
     adj = MiscLocationHelper.tryAttachNordWest(t)
     if (adj is not None):
         if (adj.morph.class0_.is_noun):
             if (adj.end_token.isValue("ВОСТОК", None)):
                 if (adj.begin_token == adj.end_token):
                     return None
                 re = NamedItemToken._new1638(t, adj.end_token, adj.morph)
                 re.kind = NamedEntityKind.LOCATION
                 re.name_value = MiscHelper.getTextValue(
                     t, adj.end_token,
                     GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                 re.is_wellknown = True
                 return re
             return None
         if (adj.whitespaces_after_count > 2):
             return None
         if ((isinstance(adj.end_token.next0_, ReferentToken)) and
             (isinstance(adj.end_token.next0_.getReferent(), GeoReferent))):
             re = NamedItemToken._new1638(t, adj.end_token.next0_,
                                          adj.end_token.next0_.morph)
             re.kind = NamedEntityKind.LOCATION
             re.name_value = MiscHelper.getTextValue(
                 t, adj.end_token.next0_,
                 GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
             re.is_wellknown = True
             re.ref = adj.end_token.next0_.getReferent()
             return re
         res = NamedItemToken.tryParse(adj.end_token.next0_, loc_onto)
         if (res is not None and res.kind == NamedEntityKind.LOCATION):
             s = adj.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                       res.morph.gender, False)
             if (s is not None):
                 if (res.name_value is None):
                     res.name_value = s.upper()
                 else:
                     res.name_value = "{0} {1}".format(
                         s.upper(), res.name_value)
                     res.type_value = (None)
                 res.begin_token = t
                 res.chars = t.chars
                 res.is_wellknown = True
                 return res
     if (t.chars.is_capital_upper
             and not MiscHelper.canBeStartOfSentence(t)):
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt is not None and len(npt.adjectives) > 0):
             test = NamedItemToken.tryParse(npt.noun.begin_token, loc_onto)
             if (test is not None and test.end_token == npt.end_token
                     and test.type_value is not None):
                 test.begin_token = t
                 tmp = io.StringIO()
                 for a in npt.adjectives:
                     s = a.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                             test.morph.gender, False)
                     if (tmp.tell() > 0):
                         print(' ', end="", file=tmp)
                     print(s, end="", file=tmp)
                 test.name_value = Utils.toStringStringIO(tmp)
                 test.chars = t.chars
                 if (test.kind == NamedEntityKind.LOCATION):
                     test.is_wellknown = True
                 return test
     if ((BracketHelper.isBracket(t, True) and t.next0_ is not None
          and t.next0_.chars.is_letter)
             and not t.next0_.chars.is_all_lower):
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             res = NamedItemToken(t, br.end_token)
             res.is_in_bracket = True
             res.name_value = MiscHelper.getTextValue(
                 t, br.end_token, GetTextAttr.NO)
             nam = NamedItemToken.__m_names.tryParse(
                 t.next0_, TerminParseAttr.NO)
             if (nam is not None
                     and nam.end_token == br.end_token.previous):
                 res.kind = (Utils.valToEnum(nam.termin.tag,
                                             NamedEntityKind))
                 res.is_wellknown = True
                 res.name_value = nam.termin.canonic_text
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter
          and not t.chars.is_all_lower) and t.length_char > 2):
         res = NamedItemToken._new1638(t, t, t.morph)
         str0_ = (t).term
         if (str0_.endswith("О") or str0_.endswith("И")
                 or str0_.endswith("Ы")):
             res.name_value = str0_
         else:
             res.name_value = t.getNormalCaseText(None, False,
                                                  MorphGender.UNDEFINED,
                                                  False)
         res.chars = t.chars
         if (((not t.is_whitespace_after and t.next0_ is not None
               and t.next0_.is_hiphen) and
              (isinstance(t.next0_.next0_, TextToken))
              and not t.next0_.next0_.is_whitespace_after)
                 and t.chars.is_cyrillic_letter
                 == t.next0_.next0_.chars.is_cyrillic_letter):
             res.end_token = t.next0_.next0_
             t = res.end_token
             res.name_value = "{0}-{1}".format(
                 res.name_value,
                 t.getNormalCaseText(None, False, MorphGender.UNDEFINED,
                                     False))
         return res
     return None
Esempio n. 10
0
 def __TryAttach(t: 'Token',
                 prev: typing.List['DateItemToken']) -> 'DateItemToken':
     from pullenti.ner.measure.internal.MeasureToken import MeasureToken
     if (t is None):
         return None
     nt = Utils.asObjectOrNull(t, NumberToken)
     begin = t
     end = t
     is_in_brack = False
     if ((BracketHelper.canBeStartOfSequence(t, False, False)
          and t.next0_ is not None and (isinstance(t.next0_, NumberToken)))
             and BracketHelper.canBeEndOfSequence(t.next0_.next0_, False,
                                                  None, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if ((t.is_newline_before and BracketHelper.isBracket(t, False) and
          (isinstance(t.next0_, NumberToken)))
             and BracketHelper.isBracket(t.next0_.next0_, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if (nt is not None):
         if (nt.int_value is None):
             return None
         if (nt.typ == NumberSpellingType.WORDS):
             if (nt.morph.class0_.is_noun
                     and not nt.morph.class0_.is_adjective):
                 if (t.next0_ is not None
                         and ((t.next0_.isValue("КВАРТАЛ", None)
                               or t.next0_.isValue("ПОЛУГОДИЕ", None)
                               or t.next0_.isValue("ПІВРІЧЧЯ", None)))):
                     pass
                 else:
                     return None
         if (NumberHelper.tryParseAge(nt) is not None):
             return None
         res = DateItemToken._new653(begin, end,
                                     DateItemToken.DateItemType.NUMBER,
                                     nt.int_value, nt.morph)
         if ((res.int_value == 20 and
              (isinstance(nt.next0_, NumberToken)) and
              (nt.next0_).int_value is not None)
                 and nt.next0_.length_char == 2 and prev is not None):
             num = 2000 + (nt.next0_).int_value
             if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ
                     == DateItemToken.DateItemType.MONTH):
                 ok = False
                 if (nt.whitespaces_after_count == 1):
                     ok = True
                 elif (nt.is_newline_after and nt.is_newline_after):
                     ok = True
                 if (ok):
                     nt = (Utils.asObjectOrNull(nt.next0_, NumberToken))
                     res.end_token = nt
                     res.int_value = num
         if (res.int_value == 20 or res.int_value == 201):
             tt = t.next0_
             if (tt is not None and tt.isChar('_')):
                 while tt is not None:
                     if (not tt.isChar('_')):
                         break
                     tt = tt.next0_
                 tt = DateItemToken.__testYearRusWord(tt, False)
                 if (tt is not None):
                     res.int_value = 0
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.YEAR
                     return res
         if (res.int_value <= 12 and t.next0_ is not None
                 and (t.whitespaces_after_count < 3)):
             tt = t.next0_
             if (tt.isValue("ЧАС", None)):
                 if (((isinstance(t.previous, TextToken))
                      and not t.previous.chars.is_letter
                      and not t.is_whitespace_before)
                         and (isinstance(t.previous.previous, NumberToken))
                         and not t.previous.is_whitespace_before):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = tt
                     tt = tt.next0_
                     if (tt is not None and tt.isChar('.')):
                         res.end_token = tt
                         tt = tt.next0_
             first_pass2816 = True
             while True:
                 if first_pass2816: first_pass2816 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.isValue("УТРО", "РАНОК")):
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("ВЕЧЕР", "ВЕЧІР")):
                     res.end_token = tt
                     res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("ДЕНЬ", None)):
                     res.end_token = tt
                     if (res.int_value < 10):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.isValue("НОЧЬ", "НІЧ")):
                     res.end_token = tt
                     if (res.int_value == 12):
                         res.int_value = 0
                     elif (res.int_value > 9):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_comma or tt.morph.class0_.is_adverb):
                     continue
                 break
             if (res.typ == DateItemToken.DateItemType.HOUR):
                 return res
         can_be_year_ = True
         if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ
                 == DateItemToken.DateItemType.MONTH):
             pass
         elif ((prev is not None and len(prev) >= 4 and
                prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM)
               and prev[len(prev) - 2].can_by_month):
             pass
         elif (nt.next0_ is not None
               and ((nt.next0_.isValue("ГОД", None)
                     or nt.next0_.isValue("РІК", None)))):
             if (res.int_value < 1000):
                 can_be_year_ = False
         tt = DateItemToken.__testYearRusWord(nt.next0_, False)
         if (tt is not None and DateItemToken.__isNewAge(tt.next0_)):
             res.typ = DateItemToken.DateItemType.YEAR
             res.end_token = tt
         elif (can_be_year_):
             if (res.can_be_year):
                 tt = DateItemToken.__testYearRusWord(
                     nt.next0_, res.is_newline_before)
                 if ((tt) is not None):
                     if ((tt.isValue("Г", None)
                          and not tt.is_whitespace_before
                          and t.previous is not None)
                             and ((t.previous.isValue("КОРПУС", None)
                                   or t.previous.isValue("КОРП", None)))):
                         pass
                     elif (
                         (((nt.next0_.isValue("Г", None) and
                            (t.whitespaces_before_count < 3) and t.previous
                            is not None) and t.previous.isValue("Я", None)
                           and t.previous.previous is not None)
                          and t.previous.previous.isCharOf("\\/")
                          and t.previous.previous.previous is not None)
                             and t.previous.previous.previous.isValue(
                                 "А", None)):
                         return None
                     else:
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
             elif (tt is not None and (nt.whitespaces_after_count < 2)
                   and (nt.end_char - nt.begin_char) == 1):
                 res.end_token = tt
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.lang = tt.morph.language
         if (nt.previous is not None):
             if (nt.previous.isValue("В", "У")
                     or nt.previous.isValue("К", None)
                     or nt.previous.isValue("ДО", None)):
                 tt = DateItemToken.__testYearRusWord(nt.next0_, False)
                 if ((tt) is not None):
                     ok = False
                     if ((res.int_value < 100)
                             and (isinstance(tt, TextToken)) and
                         (((tt).term == "ГОДА" or (tt).term == "РОКИ"))):
                         pass
                     else:
                         ok = True
                         if (nt.previous.isValue("ДО", None)
                                 and nt.next0_.isValue("Г", None)):
                             cou = 0
                             ttt = nt.previous.previous
                             while ttt is not None and (cou < 10):
                                 mt = MeasureToken.tryParse(
                                     ttt, None, False, False)
                                 if (mt is not None
                                         and mt.end_char > nt.end_char):
                                     ok = False
                                     break
                                 ttt = ttt.previous
                                 cou += 1
                     if (ok):
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
                         res.begin_token = nt.previous
             elif (((nt.previous.isValue("IN", None)
                     or nt.previous.isValue("SINCE", None)))
                   and res.can_be_year):
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.begin_token = nt.previous
             elif (nt.previous.isValue("NEL", None)
                   or nt.previous.isValue("DEL", None)):
                 if (res.can_be_year):
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.lang = MorphLang.IT
                     res.begin_token = nt.previous
             elif (nt.previous.isValue("IL", None) and res.can_be_day):
                 res.lang = MorphLang.IT
                 res.begin_token = nt.previous
         t1 = res.end_token.next0_
         if (t1 is not None):
             if ((t1.isValue("ЧАС", None) or t1.isValue("ГОДИНА", None))):
                 if ((((prev is not None and len(prev) == 2
                        and prev[0].can_be_hour)
                       and prev[1].typ == DateItemToken.DateItemType.DELIM
                       and not prev[1].is_whitespace_after)
                      and not prev[1].is_whitespace_after
                      and res.int_value >= 0) and (res.int_value < 59)):
                     prev[0].typ = DateItemToken.DateItemType.HOUR
                     res.typ = DateItemToken.DateItemType.MINUTE
                     res.end_token = t1
                 elif (res.int_value < 24):
                     if (t1.next0_ is not None and t1.next0_.isChar('.')):
                         t1 = t1.next0_
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = t1
             elif ((res.int_value < 60) and
                   ((t1.isValue("МИНУТА", None) or t1.isValue("МИН", None)
                     or t.isValue("ХВИЛИНА", None)))):
                 if (t1.next0_ is not None and t1.next0_.isChar('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.MINUTE
                 res.end_token = t1
             elif ((res.int_value < 60) and ((t1.isValue("СЕКУНДА", None)
                                              or t1.isValue("СЕК", None)))):
                 if (t1.next0_ is not None and t1.next0_.isChar('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.SECOND
                 res.end_token = t1
             elif ((res.int_value < 30)
                   and ((t1.isValue("ВЕК", "ВІК")
                         or t1.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")))):
                 res.typ = DateItemToken.DateItemType.CENTURY
                 res.end_token = t1
             elif (res.int_value <= 4 and t1.isValue("КВАРТАЛ", None)):
                 res.typ = DateItemToken.DateItemType.QUARTAL
                 res.end_token = t1
             elif (res.int_value <= 2
                   and ((t1.isValue("ПОЛУГОДИЕ", None)
                         or t1.isValue("ПІВРІЧЧЯ", None)))):
                 res.typ = DateItemToken.DateItemType.HALFYEAR
                 res.end_token = t1
         return res
     t0 = Utils.asObjectOrNull(t, TextToken)
     if (t0 is None):
         return None
     txt = t0.getSourceText()
     if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х')
             or txt[0] == 'V'):
         lat = NumberHelper.tryParseRoman(t)
         if (lat is not None and lat.end_token.next0_ is not None
                 and lat.int_value is not None):
             val = lat.int_value
             tt = lat.end_token.next0_
             if (tt.isValue("КВАРТАЛ", None) and val > 0 and val <= 4):
                 return DateItemToken._new654(
                     t, tt, DateItemToken.DateItemType.QUARTAL, val)
             if (tt.isValue("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0
                     and val <= 2):
                 return DateItemToken._new654(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.HALFYEAR, val)
             if (tt.isValue("ВЕК", "ВІК")
                     or tt.isValue("СТОЛЕТИЕ", "СТОЛІТТЯ")):
                 return DateItemToken._new654(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.CENTURY, val)
             if (tt.isValue("В", None) and tt.next0_ is not None
                     and tt.next0_.isChar('.')):
                 if (prev is not None and len(prev) > 0
                         and prev[len(prev) - 1].typ
                         == DateItemToken.DateItemType.POINTER):
                     return DateItemToken._new654(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
                 if (DateItemToken.__isNewAge(tt.next0_.next0_)):
                     return DateItemToken._new654(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
             if (tt.is_hiphen):
                 lat2 = NumberHelper.tryParseRoman(tt.next0_)
                 if ((lat2 is not None and lat2.int_value is not None
                      and lat2.int_value > val)
                         and lat2.end_token.next0_ is not None):
                     if (lat2.end_token.next0_.isValue("ВЕК", "ВІК")
                             or lat2.end_token.next0_.isValue(
                                 "СТОЛЕТИЕ", "СТОЛІТТЯ")):
                         return DateItemToken._new654(
                             t, lat.end_token,
                             DateItemToken.DateItemType.CENTURY, val)
     if (t is not None and t.isValue("НАПРИКІНЦІ", None)):
         return DateItemToken._new660(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "конец")
     if (t is not None and t.isValue("ДОНЕДАВНА", None)):
         return DateItemToken._new660(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "сегодня")
     tok = DateItemToken.M_SEASONS.tryParse(t, TerminParseAttr.NO)
     if ((tok is not None and
          (Utils.valToEnum(tok.termin.tag, DatePointerType))
          == DatePointerType.SUMMER and t.morph.language.is_ru)
             and (isinstance(t, TextToken))):
         str0_ = (t).term
         if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
             tok = (None)
     if (tok is not None):
         return DateItemToken._new654(
             t, tok.end_token, DateItemToken.DateItemType.POINTER,
             Utils.valToEnum(tok.termin.tag, DatePointerType))
     npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
     if (npt is not None):
         tok = DateItemToken.M_SEASONS.tryParse(npt.end_token,
                                                TerminParseAttr.NO)
         if ((tok is not None and
              (Utils.valToEnum(tok.termin.tag, DatePointerType))
              == DatePointerType.SUMMER and t.morph.language.is_ru)
                 and (isinstance(t, TextToken))):
             str0_ = (t).term
             if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
                 tok = (None)
         if (tok is not None):
             return DateItemToken._new654(
                 t, tok.end_token, DateItemToken.DateItemType.POINTER,
                 Utils.valToEnum(tok.termin.tag, DatePointerType))
         typ_ = DateItemToken.DateItemType.NUMBER
         if (npt.noun.isValue("КВАРТАЛ", None)):
             typ_ = DateItemToken.DateItemType.QUARTAL
         elif (npt.end_token.isValue("ПОЛУГОДИЕ", None)
               or npt.end_token.isValue("ПІВРІЧЧЯ", None)):
             typ_ = DateItemToken.DateItemType.HALFYEAR
         elif (npt.end_token.isValue("НАЧАЛО", None)
               or npt.end_token.isValue("ПОЧАТОК", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "начало")
         elif (npt.end_token.isValue("СЕРЕДИНА", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "середина")
         elif (npt.end_token.isValue("КОНЕЦ", None)
               or npt.end_token.isValue("КІНЕЦЬ", None)
               or npt.end_token.isValue("НАПРИКІНЕЦЬ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "конец")
         elif (npt.end_token.isValue("ВРЕМЯ", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.isValue("НАСТОЯЩЕЕ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         elif (npt.end_token.isValue("ЧАС", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.isValue("ДАНИЙ", None)):
             return DateItemToken._new660(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         if (typ_ != DateItemToken.DateItemType.NUMBER):
             delta = 0
             if (len(npt.adjectives) > 0):
                 if (npt.adjectives[0].isValue("ПОСЛЕДНИЙ", None)
                         or npt.adjectives[0].isValue("ОСТАННІЙ", None)):
                     return DateItemToken._new654(
                         t0, npt.end_token, typ_,
                         (4 if typ_ == DateItemToken.DateItemType.QUARTAL
                          else 2))
                 if (npt.adjectives[0].isValue("ПРЕДЫДУЩИЙ", None)
                         or npt.adjectives[0].isValue("ПОПЕРЕДНІЙ", None)):
                     delta = -1
                 elif (npt.adjectives[0].isValue("СЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].isValue("ПОСЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].isValue("НАСТУПНИЙ", None)):
                     delta = 1
                 else:
                     return None
             cou = 0
             tt = t.previous
             first_pass2817 = True
             while True:
                 if first_pass2817: first_pass2817 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (cou > 200):
                     break
                 dr = Utils.asObjectOrNull(tt.getReferent(),
                                           DateRangeReferent)
                 if (dr is None):
                     continue
                 if (typ_ == DateItemToken.DateItemType.QUARTAL):
                     ii = dr.quarter_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 4):
                         continue
                     return DateItemToken._new654(t0, npt.end_token, typ_,
                                                  ii)
                 if (typ_ == DateItemToken.DateItemType.HALFYEAR):
                     ii = dr.halfyear_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 2):
                         continue
                     return DateItemToken._new654(t0, npt.end_token, typ_,
                                                  ii)
     term = t0.term
     if (not str.isalnum(term[0])):
         if (t0.isCharOf(".\\/:") or t0.is_hiphen):
             return DateItemToken._new660(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         elif (t0.isChar(',')):
             return DateItemToken._new660(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         else:
             return None
     if (term == "O" or term == "О"):
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after and len(
                     (t.next0_).value) == 1):
             return DateItemToken._new654(t, t.next0_,
                                          DateItemToken.DateItemType.NUMBER,
                                          (t.next0_).int_value)
     if (str.isalpha(term[0])):
         inf = DateItemToken.M_MONTHES.tryParse(t, TerminParseAttr.NO)
         if (inf is not None and inf.termin.tag is None):
             inf = DateItemToken.M_MONTHES.tryParse(inf.end_token.next0_,
                                                    TerminParseAttr.NO)
         if (inf is not None and (isinstance(inf.termin.tag, int))):
             return DateItemToken._new675(inf.begin_token, inf.end_token,
                                          DateItemToken.DateItemType.MONTH,
                                          inf.termin.tag, inf.termin.lang)
     return None
Esempio n. 11
0
 def __TryParse(t: 'Token',
                prev: 'TransItemToken',
                after_conj: bool,
                attach_high: bool = False) -> 'TransItemToken':
     if (t is None):
         return None
     t1 = t
     if (t1.isChar(',')):
         t1 = t1.next0_
     if (t1 is not None and t1.isValue("ПРИНАДЛЕЖАТЬ", "НАЛЕЖАТИ")):
         t1 = t1.next0_
     if (isinstance(t1, ReferentToken)):
         if (t1.getReferent().type_name == "ORGANIZATION"):
             return TransItemToken._new2521(t, t1, TransItemToken.Typs.ORG,
                                            t1.getReferent(), t1.morph)
     route = False
     if (t1 is not None and ((t1.isValue("СЛЕДОВАТЬ", "СЛІДУВАТИ")
                              or t1.isValue("ВЫПОЛНЯТЬ", "ВИКОНУВАТИ")))):
         t1 = t1.next0_
         route = True
     if (t1 is not None and t1.morph.class0_.is_preposition):
         t1 = t1.next0_
     if (t1 is not None and
         ((t1.isValue("РЕЙС", None) or t1.isValue("МАРШРУТ", None)))):
         t1 = t1.next0_
         route = True
     if (isinstance(t1, ReferentToken)):
         if (isinstance(t1.getReferent(), GeoReferent)):
             geo_ = Utils.asObjectOrNull(t1.getReferent(), GeoReferent)
             if (geo_.is_state or geo_.is_city):
                 tit = TransItemToken._new2522(t, t1,
                                               TransItemToken.Typs.ROUTE,
                                               list())
                 tit.route_items.append(geo_)
                 t1 = t1.next0_
                 first_pass3132 = True
                 while True:
                     if first_pass3132: first_pass3132 = False
                     else: t1 = t1.next0_
                     if (not (t1 is not None)): break
                     if (t1.is_hiphen):
                         continue
                     if (t1.morph.class0_.is_preposition
                             or t1.morph.class0_.is_conjunction):
                         continue
                     geo_ = (Utils.asObjectOrNull(t1.getReferent(),
                                                  GeoReferent))
                     if (geo_ is None):
                         break
                     if (not geo_.is_city and not geo_.is_state):
                         break
                     tit.route_items.append(geo_)
                     tit.end_token = t1
                 if (len(tit.route_items) > 1 or route):
                     return tit
         elif ((isinstance(t1.getReferent(), DateReferent))
               and (t1.whitespaces_before_count < 3)):
             tit = TransItemToken._new2523(t, t1, TransItemToken.Typs.DATE,
                                           t1.getReferent())
             if (t1.next0_ is not None):
                 if (t1.next0_.isValue("В", None)
                         and t1.next0_.next0_ is not None
                         and t1.next0_.next0_.isChar('.')):
                     tit.end_token = t1.next0_.next0_
                 elif (t1.next0_.isValue("ВЫП", None)
                       or t1.next0_.isValue("ВЫПУСК", None)):
                     tit.end_token = t1.next0_
                     if (t1.next0_.next0_ is not None
                             and t1.next0_.next0_.isChar('.')):
                         tit.end_token = t1.next0_.next0_
             return tit
     if (isinstance(t, TextToken)):
         num = MiscHelper.checkNumberPrefix(t)
         if (num is not None):
             tit = TransItemToken.__attachRusAutoNumber(num)
             if (tit is None):
                 tit = TransItemToken._attachNumber(num, False)
             if (tit is not None):
                 tit.begin_token = t
                 return tit
         tok = TransItemToken.M_ONTOLOGY.tryParse(t, TerminParseAttr.NO)
         if (tok is None and ((t.isValue("С", None) or t.isValue("C", None)
                               or t.isValue("ЗА", None)))):
             tok = TransItemToken.M_ONTOLOGY.tryParse(
                 t.next0_, TerminParseAttr.NO)
         if (tok is None and BracketHelper.isBracket(t, True)):
             tok1 = TransItemToken.M_ONTOLOGY.tryParse(
                 t.next0_, TerminParseAttr.NO)
             if (tok1 is not None and BracketHelper.isBracket(
                     tok1.end_token.next0_, True)):
                 tok = tok1
                 tok.begin_token = t
                 tok.end_token = tok.end_token.next0_
                 tok.begin_token = t
             elif (tok1 is not None):
                 tt = Utils.asObjectOrNull(tok1.termin,
                                           TransItemToken.TransTermin)
                 if (tt.typ == TransItemToken.Typs.BRAND):
                     tok = tok1
                     tok.begin_token = t
         if (tok is None and t.isValue("МАРКА", None)):
             res1 = TransItemToken.__TryParse(t.next0_, prev, after_conj,
                                              False)
             if (res1 is not None):
                 if (res1.typ == TransItemToken.Typs.NAME
                         or res1.typ == TransItemToken.Typs.BRAND):
                     res1.begin_token = t
                     res1.typ = TransItemToken.Typs.BRAND
                     return res1
         if (tok is not None):
             tt = Utils.asObjectOrNull(tok.termin,
                                       TransItemToken.TransTermin)
             if (tt.typ == TransItemToken.Typs.NUMBER):
                 tit = TransItemToken.__attachRusAutoNumber(
                     tok.end_token.next0_)
                 if (tit is None):
                     tit = TransItemToken._attachNumber(
                         tok.end_token.next0_, False)
                 if (tit is not None):
                     tit.begin_token = t
                     return tit
                 else:
                     return None
             if (tt.is_doubt and not attach_high):
                 if (prev is None or prev.typ != TransItemToken.Typs.NOUN):
                     if ((prev is not None
                          and prev.typ == TransItemToken.Typs.BRAND
                          and tt.typ == TransItemToken.Typs.BRAND)
                             and Utils.compareStrings(
                                 tt.canonic_text, prev.value, True) == 0):
                         pass
                     else:
                         return None
             if (tt.canonic_text == "СУДНО"):
                 if ((((tok.morph.number) & (MorphNumber.PLURAL))) !=
                     (MorphNumber.UNDEFINED)):
                     if (not BracketHelper.canBeStartOfSequence(
                             tok.end_token.next0_, False, False)):
                         return None
             tit = TransItemToken._new2524(tok.begin_token, tok.end_token,
                                           tt.kind, tt.typ, tt.is_doubt,
                                           tok.chars, tok.morph)
             tit.value = tt.canonic_text
             if (tit.typ == TransItemToken.Typs.NOUN):
                 tit.value = tit.value.lower()
             else:
                 tit.value = tit.value.upper()
             return tit
         if (tok is None and t.morph.class0_.is_adjective):
             npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
             if (npt is not None and len(npt.adjectives) > 0):
                 state_ = None
                 tt = t
                 first_pass3133 = True
                 while True:
                     if first_pass3133: first_pass3133 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.previous != npt.end_token)):
                         break
                     tok = TransItemToken.M_ONTOLOGY.tryParse(
                         tt, TerminParseAttr.NO)
                     if (tok is None and state_ is None):
                         state_ = tt.kit.processReferent("GEO", tt)
                     if (tok is not None
                             and tok.end_token == npt.end_token):
                         if ((tok.termin).typ == TransItemToken.Typs.NOUN):
                             tit = TransItemToken._new2524(
                                 t, tok.end_token, (tok.termin).kind,
                                 TransItemToken.Typs.NOUN,
                                 (tok.termin).is_doubt, tok.chars,
                                 npt.morph)
                             tit.value = (tok.termin).canonic_text.lower()
                             tit.alt_value = npt.getNormalCaseText(
                                 None, False, MorphGender.UNDEFINED,
                                 False).lower()
                             if (LanguageHelper.endsWithEx(
                                     tit.alt_value, "суд", "суда", None,
                                     None)):
                                 if (not BracketHelper.canBeStartOfSequence(
                                         tok.end_token.next0_, False,
                                         False)):
                                     continue
                             if (state_ is not None):
                                 if ((state_.referent).is_state):
                                     tit.state = state_
                             return tit
     if (t is not None and t.isValue("КЛАСС", None)
             and t.next0_ is not None):
         br = BracketHelper.tryParse(t.next0_, BracketParseAttr.NO, 100)
         if (br is not None):
             return TransItemToken._new2526(
                 t, br.end_token, TransItemToken.Typs.CLASS,
                 MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO))
     nt = Utils.asObjectOrNull(t, NumberToken)
     if (nt is not None):
         if (prev is None or nt.typ != NumberSpellingType.DIGIT):
             return None
         if (prev.typ == TransItemToken.Typs.BRAND):
             return TransItemToken.__attachModel(t, False, prev)
         else:
             return None
     res = TransItemToken.__attachRusAutoNumber(t)
     if ((res) is not None):
         if (not res.is_doubt):
             return res
         if (prev is not None and prev.typ == TransItemToken.Typs.NOUN
                 and prev.kind == TransportKind.AUTO):
             return res
         if (prev is not None
                 and ((prev.typ == TransItemToken.Typs.BRAND
                       or prev.typ == TransItemToken.Typs.MODEL))):
             return res
     t1 = t
     if (t.is_hiphen):
         t1 = t.next0_
     if (prev is not None and prev.typ == TransItemToken.Typs.BRAND
             and t1 is not None):
         tit = TransItemToken.__attachModel(t1, True, prev)
         if (tit is not None):
             tit.begin_token = t
             return tit
     if (prev is not None
             and ((prev.typ == TransItemToken.Typs.NOUN or after_conj))):
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None and br.is_quote_type):
             tit = TransItemToken.tryParse(br.begin_token.next0_, prev,
                                           after_conj, False)
             if (tit is not None and tit.end_token.next0_ == br.end_token):
                 if (not tit.is_doubt
                         or tit.typ == TransItemToken.Typs.BRAND):
                     tit.begin_token = br.begin_token
                     tit.end_token = br.end_token
                     return tit
             s = MiscHelper.getTextValueOfMetaToken(br, GetTextAttr.NO)
             if (not Utils.isNullOrEmpty(s) and (len(s) < 30)):
                 chars_ = 0
                 digs = 0
                 un = 0
                 for c in s:
                     if (not Utils.isWhitespace(c)):
                         if (str.isalpha(c)):
                             chars_ += 1
                         elif (str.isdigit(c)):
                             digs += 1
                         else:
                             un += 1
                 if (((digs == 0 and un == 0
                       and t.next0_.chars.is_capital_upper))
                         or prev.kind == TransportKind.SHIP
                         or prev.kind == TransportKind.SPACE):
                     return TransItemToken._new2526(
                         br.begin_token, br.end_token,
                         TransItemToken.Typs.NAME, s)
                 if (digs > 0 and (chars_ < 5)):
                     return TransItemToken._new2526(
                         br.begin_token, br.end_token,
                         TransItemToken.Typs.MODEL, s.replace(" ", ""))
     if (prev is not None and (((prev.typ == TransItemToken.Typs.NOUN
                                 or prev.typ == TransItemToken.Typs.BRAND
                                 or prev.typ == TransItemToken.Typs.NAME)
                                or prev.typ == TransItemToken.Typs.MODEL))):
         tit = TransItemToken.__attachModel(
             t, prev.typ != TransItemToken.Typs.NAME, prev)
         if (tit is not None):
             return tit
     if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN
           and prev.kind == TransportKind.AUTO) and
          (isinstance(t, TextToken)) and t.chars.is_letter)
             and not t.chars.is_all_lower
             and (t.whitespaces_before_count < 2)):
         pt = t.kit.processReferent("PERSON", t)
         if (pt is None):
             tit = TransItemToken._new2529(t, t, TransItemToken.Typs.BRAND)
             tit.value = (t).term
             return tit
     if (((prev is not None and prev.typ == TransItemToken.Typs.NOUN and
           ((prev.kind == TransportKind.SHIP
             or prev.kind == TransportKind.SPACE)))) or after_conj):
         if (t.chars.is_capital_upper):
             ok = True
             npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
             if (npt is not None and len(npt.adjectives) > 0):
                 ok = False
             else:
                 rt = t.kit.processReferent("PERSON", t)
                 if (rt is not None):
                     ok = False
             if (t.getMorphClassInDictionary().is_proper_surname):
                 if (not t.morph.case_.is_nominative):
                     ok = False
             if (ok):
                 t1 = t
                 tt = t.next0_
                 while tt is not None:
                     if (tt.whitespaces_before_count > 1):
                         break
                     if (tt.chars != t.chars):
                         break
                     tit = TransItemToken.tryParse(tt, None, False, False)
                     if ((tit) is not None):
                         break
                     t1 = tt
                     tt = tt.next0_
                 s = MiscHelper.getTextValue(t, t1, GetTextAttr.NO)
                 if (s is not None):
                     res1 = TransItemToken._new2530(
                         t, t1, TransItemToken.Typs.NAME, True, s)
                     if (not t1.is_newline_after):
                         br = BracketHelper.tryParse(
                             t1.next0_, BracketParseAttr.NO, 100)
                         if (br is not None):
                             res1.end_token = br.end_token
                             res1.alt_value = res1.value
                             res1.value = MiscHelper.getTextValueOfMetaToken(
                                 br, GetTextAttr.NO)
                     return res1
     return None
Esempio n. 12
0
 def parse(t: 'Token',
           max_char: int = 0,
           prev: 'InstrToken' = None) -> 'InstrToken':
     is_start_of_line = False
     t00 = t
     if (t is not None):
         is_start_of_line = t00.is_newline_before
         while t is not None:
             if (t.is_table_control_char and not t.isChar(chr(0x1F))):
                 if (t.is_newline_after and not is_start_of_line):
                     is_start_of_line = True
                 t = t.next0_
             else:
                 break
     if (t is None):
         return None
     if (t.is_newline_before):
         is_start_of_line = True
     t0 = t
     t1 = None
     has_word = False
     first_pass2988 = True
     while True:
         if first_pass2988: first_pass2988 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t != t0):
             break
         if (max_char > 0 and t.begin_char > max_char):
             break
         if (is_start_of_line and t == t0):
             if (t.isValue("ГЛАВА", None)):
                 next0__ = InstrToken.parse(t.next0_, 0, None)
                 if (next0__ is not None and next0__.typ == ILTypes.PERSON):
                     next0__.begin_token = t
                     return next0__
             tt = None
             if ((isinstance(t.getReferent(), PersonReferent)) or
                 (isinstance(t.getReferent(), PersonPropertyReferent)) or
                 (isinstance(t.getReferent(), InstrumentParticipant))):
                 return InstrToken.__correctPerson(
                     InstrToken._new1405(t00, t, ILTypes.PERSON, t))
             is_ref = False
             if (isinstance(t.getReferent(), PersonPropertyReferent)):
                 tt = t.next0_
                 is_ref = True
             elif (prev is not None and prev.typ == ILTypes.PERSON):
                 rt = t.kit.processReferent(PersonAnalyzer.ANALYZER_NAME, t)
                 if (rt is not None):
                     if (isinstance(rt.referent, PersonReferent)):
                         return InstrToken._new1406(t00, rt.end_token,
                                                    ILTypes.PERSON)
                     tt = rt.end_token.next0_
             cou = 0
             t11 = (None if tt is None else tt.previous)
             first_pass2989 = True
             while True:
                 if first_pass2989: first_pass2989 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_table_control_char):
                     continue
                 re = tt.getReferent()
                 if (isinstance(re, PersonReferent)):
                     return InstrToken._new1405(t00, tt, ILTypes.PERSON, tt)
                 if (isinstance(re, GeoReferent)):
                     t11 = tt
                     continue
                 if (re is not None):
                     break
                 if (DecreeToken.isKeyword(tt, False) is not None):
                     break
                 if (tt.is_newline_before):
                     cou += 1
                     if ((cou) > 4):
                         break
             if (tt is None and is_ref):
                 return InstrToken._new1405(t00, Utils.ifNotNull(t11, t),
                                            ILTypes.PERSON, t)
         dt = DecreeToken.tryAttach(t, None, False)
         if (dt is not None):
             if (dt.typ == DecreeToken.ItemType.TYP
                     and not t.chars.is_all_lower):
                 if (t != t0):
                     break
                 has_verb_ = False
                 tt = dt.end_token
                 while tt is not None:
                     if (tt.is_newline_before):
                         break
                     elif ((isinstance(tt, TextToken))
                           and (tt).is_pure_verb):
                         has_verb_ = True
                         break
                     tt = tt.next0_
                 if (not has_verb_):
                     res2 = InstrToken._new1409(
                         t0, dt.end_token, ILTypes.TYP,
                         Utils.ifNotNull(dt.full_value, dt.value))
                     if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ"
                             or res2.value == "ДОДАТКОВА УГОДА"):
                         if (res2.begin_char > 500
                                 and res2.newlines_before_count > 1):
                             res2.typ = ILTypes.APPENDIX
                     return res2
             if (dt.typ == DecreeToken.ItemType.NUMBER):
                 if (t != t0):
                     break
                 return InstrToken._new1409(t0, dt.end_token,
                                            ILTypes.REGNUMBER, dt.value)
             if (dt.typ == DecreeToken.ItemType.ORG):
                 if (t != t0):
                     break
                 return InstrToken._new1411(t0, dt.end_token,
                                            ILTypes.ORGANIZATION, dt.ref,
                                            dt.value)
             if (dt.typ == DecreeToken.ItemType.TERR):
                 if (t != t0):
                     break
                 re = InstrToken._new1411(t0, dt.end_token, ILTypes.GEO,
                                          dt.ref, dt.value)
                 t1 = re.end_token.next0_
                 if (t1 is not None and t1.isChar(',')):
                     t1 = t1.next0_
                 if (t1 is not None and t1.isValue("КРЕМЛЬ", None)):
                     re.end_token = t1
                 elif ((t1 is not None and t1.isValue("ДОМ", "БУДИНОК")
                        and t1.next0_ is not None)
                       and t1.next0_.isValue("СОВЕТ", "РАД")):
                     re.end_token = t1.next0_
                     if (t1.next0_.next0_ is not None and (isinstance(
                             t1.next0_.next0_.getReferent(), GeoReferent))):
                         re.end_token = t1.next0_.next0_
                 return re
             if (dt.typ == DecreeToken.ItemType.OWNER):
                 if (t != t0):
                     break
                 if (dt.ref is not None
                         and str(dt.ref.referent).startswith("агент")):
                     dt = (None)
                 if (dt is not None):
                     res1 = InstrToken._new1411(t0, dt.end_token,
                                                ILTypes.PERSON, dt.ref,
                                                dt.value)
                     return InstrToken.__correctPerson(res1)
         if (BracketHelper.canBeStartOfSequence(t, False, False)):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 t1 = br.end_token
                 t = t1
                 continue
             if (t.next0_ is not None and BracketHelper.canBeEndOfSequence(
                     t.next0_, False, None, False)):
                 t1 = t.next0_
                 t = t1
                 continue
         if (isinstance(t, TextToken)):
             if (t.isChar('_')):
                 t1 = t
                 continue
         r = t.getReferent()
         if (isinstance(r, DateReferent)):
             tt = t
             if (tt.next0_ is not None and tt.next0_.isCharOf(",;")):
                 tt = tt.next0_
             if (not t.is_newline_before and not tt.is_newline_after):
                 t1 = tt
                 continue
             if (not has_word):
                 return InstrToken._new1405(t, tt, ILTypes.DATE, t)
             if (t != t0):
                 break
         has_word = True
         if (isinstance(r, InstrumentParticipant)):
             tt = (t).begin_token
             first_pass2990 = True
             while True:
                 if first_pass2990: first_pass2990 = False
                 else: tt = tt.next0_
                 if (not (tt is not None and (tt.end_char < t.end_char))):
                     break
                 rr = tt.getReferent()
                 if (rr is None):
                     continue
                 if ((isinstance(rr, OrganizationReferent))
                         or (isinstance(rr, BankDataReferent))
                         or (isinstance(rr, UriReferent))):
                     r = (None)
                     break
         if ((isinstance(r, PersonReferent))
                 or (isinstance(r, PersonPropertyReferent))
                 or (isinstance(r, InstrumentParticipant))):
             if (t != t0):
                 break
             if (isinstance(r, InstrumentParticipant)):
                 pass
             res1 = InstrToken._new1405(t, t, ILTypes.PERSON, t)
             return InstrToken.__correctPerson(res1)
         if (isinstance(r, OrganizationReferent)):
             if (t != t0):
                 break
             return InstrToken._new1405(t, t, ILTypes.ORGANIZATION, t)
         if (isinstance(r, DecreePartReferent)):
             dpr = Utils.asObjectOrNull(r, DecreePartReferent)
             if (dpr.appendix is not None):
                 if (t.is_newline_before or is_start_of_line):
                     if (t.is_newline_after
                             or t.whitespaces_before_count > 30):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    "ПРИЛОЖЕНИЕ")
                     ok = True
                     tt = t.next0_
                     first_pass2991 = True
                     while True:
                         if first_pass2991: first_pass2991 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         if (tt.is_newline_before):
                             break
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is not None):
                             tt = npt.end_token
                             continue
                         ok = False
                         break
                     if (ok):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    "ПРИЛОЖЕНИЕ")
         if ((isinstance(r, DecreeReferent))
                 and (r).kind == DecreeKind.PUBLISHER and t == t0):
             res1 = InstrToken._new1406(t, t, ILTypes.APPROVED)
             tt = t.next0_
             first_pass2992 = True
             while True:
                 if first_pass2992: first_pass2992 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.isCharOf(",;")):
                     continue
                 if ((isinstance(tt.getReferent(), DecreeReferent)) and
                     (tt.getReferent()).kind == DecreeKind.PUBLISHER):
                     res1.end_token = t
                 else:
                     break
             return res1
         if (t.isValue("ЗА", None) and t.next0_ is not None
                 and t.is_newline_before):
             rr = t.next0_.getReferent()
             if ((isinstance(rr, PersonReferent))
                     or (isinstance(rr, PersonPropertyReferent))
                     or (isinstance(rr, InstrumentParticipant))):
                 if (t != t0):
                     break
                 res1 = InstrToken._new1405(t, t.next0_, ILTypes.PERSON,
                                            t.next0_)
                 t = t.next0_.next0_
                 if ((isinstance(rr, InstrumentParticipant))
                         and t is not None):
                     r = t.getReferent()
                     if ((r) is not None):
                         if ((isinstance(r, PersonReferent)) or
                             (isinstance(r, PersonPropertyReferent))):
                             res1.end_token = t
                             res1.ref = (t)
                 return res1
         ii = 0
         while ii < len(InstrToken._m_directives):
             if (t.isValue(InstrToken._m_directives[ii], None)):
                 if (t.next0_ is not None
                         and t.next0_.isValue("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")):
                     if (t != t0):
                         break
                     t11 = t.next0_
                     ok = False
                     if (t11.next0_ is not None
                             and t11.next0_.isCharOf(":.")
                             and t11.next0_.is_newline_after):
                         ok = True
                         t11 = t11.next0_
                     if (ok):
                         return InstrToken._new1409(
                             t, t11, ILTypes.DIRECTIVE,
                             InstrToken._m_directives_norm[ii])
                 if (t.is_newline_after
                         or ((t.next0_ is not None and t.next0_.isChar(':')
                              and t.next0_.is_newline_after))):
                     if (t != t0):
                         break
                     if (not t.is_newline_before):
                         if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ"
                              and InstrToken._m_directives_norm[ii] !=
                              "ПОСТАНОВЛЕНИЕ" and
                              InstrToken._m_directives_norm[ii] != "НАКАЗ")
                                 and InstrToken._m_directives_norm[ii] !=
                                 "ПОСТАНОВУ"):
                             break
                     return InstrToken._new1409(
                         t, (t if t.is_newline_after else t.next0_),
                         ILTypes.DIRECTIVE,
                         InstrToken._m_directives_norm[ii])
                 break
             ii += 1
         if (t.is_newline_before and t.chars.is_letter
                 and t.length_char == 1):
             for d in InstrToken._m_directives:
                 t11 = MiscHelper.tryAttachWordByLetters(d, t, True)
                 if (t11 is not None):
                     if (t11.next0_ is not None and t11.next0_.isChar(':')):
                         t11 = t11.next0_
                     return InstrToken._new1406(t, t11, ILTypes.DIRECTIVE)
         tte = ((t).begin_token if (isinstance(t, MetaToken)) else t)
         term = ((tte).term if isinstance(tte, TextToken) else None)
         if (is_start_of_line and not tte.chars.is_all_lower and t == t0):
             npt = NounPhraseHelper.tryParse(tte, NounPhraseParseAttr.NO, 0)
             if (npt is not None
                     and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))):
                 # if (tte.Next != null && tte.Next.IsChar(':'))
                 npt = (None)
             if (npt is not None and npt.morph.case_.is_nominative
                     and (isinstance(npt.end_token, TextToken))):
                 term1 = (npt.end_token).term
                 if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1
                       == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ")
                         or term1 == "ФОРМА" or term == "ЗАЯВКА"):
                     tt1 = npt.end_token.next0_
                     dt1 = DecreeToken.tryAttach(tt1, None, False)
                     if (dt1 is not None
                             and dt1.typ == DecreeToken.ItemType.NUMBER):
                         tt1 = dt1.end_token.next0_
                     elif (isinstance(tt1, NumberToken)):
                         tt1 = tt1.next0_
                     elif ((isinstance(tt1, TextToken))
                           and tt1.length_char == 1
                           and tt1.chars.is_letter):
                         tt1 = tt1.next0_
                     ok = True
                     if (tt1 is None):
                         ok = False
                     elif (tt1.isValue("В", "У")):
                         ok = False
                     elif (tt1.isValue("К", None)
                           and tt1.is_newline_before):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    term1)
                     elif (not tt1.is_newline_before
                           and InstrToken._checkEntered(tt1) is not None):
                         ok = False
                     elif (tt1 == t.next0_
                           and ((tt1.isChar(':') or
                                 ((tt1.isValue("НА", None)
                                   and term1 != "ЗАЯВКА"))))):
                         ok = False
                     if (ok):
                         br = BracketHelper.tryParse(
                             tt1, BracketParseAttr.NO, 100)
                         if (br is not None):
                             tt1 = br.end_token.next0_
                             if (br.end_token.next0_ is None
                                     or not br.end_token.is_newline_after
                                     or br.end_token.next0_.isCharOf(";,")):
                                 ok = False
                             if (tt1 is not None and tt1.isValue(
                                     "ПРИЛОЖЕНИЕ", "ДОДАТОК")):
                                 ok = False
                     if (prev is not None and prev.typ == ILTypes.APPENDIX):
                         ok = False
                     if (ok):
                         cou = 0
                         ttt = tte.previous
                         while ttt is not None and (cou < 300):
                             if (ttt.is_table_control_char):
                                 if (not ttt.isChar(chr(0x1F))):
                                     if (ttt == tte.previous
                                             and ttt.isChar(chr(0x1E))):
                                         pass
                                     else:
                                         ok = False
                                 break
                             ttt = ttt.previous
                             cou += 1
                     if (ok):
                         it1 = InstrToken1.parse(t, True, None, 0, None,
                                                 False, 0, False)
                         if (it1 is not None):
                             if (it1.has_verb):
                                 ok = False
                     if (ok and t.previous is not None):
                         ttp = t.previous
                         first_pass2993 = True
                         while True:
                             if first_pass2993: first_pass2993 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (ttp.is_table_control_char
                                     and not ttp.isChar(chr(0x1F))):
                                 continue
                             if (BracketHelper.isBracket(ttp, False) and
                                     not BracketHelper.canBeEndOfSequence(
                                         ttp, False, None, False)):
                                 continue
                             if (ttp.isCharOf(";:")):
                                 ok = False
                             break
                     if ((ok and t.previous is not None and
                          (t.newlines_before_count < 3))
                             and not t.is_newline_after):
                         lines = 0
                         ttp = t.previous
                         first_pass2994 = True
                         while True:
                             if first_pass2994: first_pass2994 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (not ttp.is_newline_before):
                                 continue
                             while ttp is not None and (ttp.end_char <
                                                        t.begin_char):
                                 if (isinstance(ttp, NumberToken)):
                                     pass
                                 elif ((isinstance(ttp, TextToken))
                                       and ttp.length_char > 1):
                                     if (ttp.isValue(
                                             "ПРИЛОЖЕНИЕ", "ДОДАТОК")):
                                         ok = False
                                     break
                                 else:
                                     break
                                 ttp = ttp.next0_
                             lines += 1
                             if ((lines) > 1):
                                 break
                     if (ok and
                         ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК"
                           and term1 != "МНЕНИЕ"))):
                         if (t.newlines_before_count < 3):
                             ok = False
                     if (ok):
                         return InstrToken._new1409(t, t, ILTypes.APPENDIX,
                                                    term1)
         app = False
         if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_
              is not None and t.next0_.isValue("МНЕНИЕ", "ДУМКА"))
                 and t == t0 and is_start_of_line):
             app = True
         if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА"))
              and t.next0_ is not None
              and t.next0_.isValue("СОГЛАШЕНИЕ", "УГОДА")) and t == t0
                 and is_start_of_line):
             app = True
         if (app):
             tt = t.next0_
             while tt is not None:
                 if (tt.is_newline_before):
                     break
                 elif (tt.getMorphClassInDictionary() == MorphClass.VERB):
                     app = False
                     break
                 tt = tt.next0_
             if (app):
                 return InstrToken._new1406(t, t.next0_, ILTypes.APPENDIX)
         if (not t.chars.is_all_lower and t == t0):
             tt = InstrToken._checkApproved(t)
             if (tt is not None):
                 if (tt.next0_ is not None and
                     (isinstance(tt.next0_.getReferent(), DecreeReferent))):
                     return InstrToken._new1405(t, tt, ILTypes.APPROVED,
                                                tt.next0_.getReferent())
                 dt1 = DecreeToken.tryAttach(tt.next0_, None, False)
                 if (dt1 is not None
                         and dt1.typ == DecreeToken.ItemType.TYP):
                     return InstrToken._new1406(t, tt, ILTypes.APPROVED)
         t1 = t
         is_start_of_line = False
     if (t1 is None):
         return None
     res = InstrToken._new1406(t00, t1, ILTypes.UNDEFINED)
     res.no_words = True
     t = t0
     first_pass2995 = True
     while True:
         if first_pass2995: first_pass2995 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= t1.end_char)): break
         if (not ((isinstance(t, TextToken)))):
             if (isinstance(t, ReferentToken)):
                 res.no_words = False
             continue
         if (not t.chars.is_letter):
             continue
         res.no_words = False
         if ((t).is_pure_verb):
             res.has_verb = True
     if (t0.isValue("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None
             and t0.next0_.isCharOf(":.")):
         res.typ = ILTypes.QUESTION
     return res
Esempio n. 13
0
 def __analizeListItems(chi: typing.List['FragToken'], ind: int) -> int:
     if (ind >= len(chi)):
         return -1
     res = chi[ind]
     ki = res.kind
     if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE
           or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM
          or ki == InstrumentKind.SUBITEM)
             or ki == InstrumentKind.CLAUSEPART
             or ki == InstrumentKind.INDENTION):
         pass
     else:
         return -1
     if (res.has_changes and res.multiline_changes_value is not None):
         ci = res.multiline_changes_value
         cit = FragToken._new1242(ci.begin_token, ci.end_token,
                                  InstrumentKind.CITATION)
         res.children.append(cit)
         if (BracketHelper.isBracket(cit.begin_token.previous, True)):
             cit.begin_token = cit.begin_token.previous
         if (BracketHelper.isBracket(cit.end_token.next0_, True)):
             cit.end_token = cit.end_token.next0_
             if (cit.end_token.next0_ is not None
                     and cit.end_token.next0_.isCharOf(";.")):
                 cit.end_token = cit.end_token.next0_
         res.fillByContentChildren()
         if (res.children[0].has_changes):
             pass
         cit_kind = InstrumentKind.UNDEFINED
         if (isinstance(ci.tag, DecreeChangeReferent)):
             dcr = Utils.asObjectOrNull(ci.tag, DecreeChangeReferent)
             if (dcr.value is not None and len(dcr.value.new_items) > 0):
                 mnem = dcr.value.new_items[0]
                 i = mnem.find(' ')
                 if (((i)) > 0):
                     mnem = mnem[0:0 + i]
                 cit_kind = PartToken._getInstrKindByTyp(
                     PartToken._getTypeByAttrName(mnem))
             elif (len(dcr.owners) > 0
                   and (isinstance(dcr.owners[0], DecreePartReferent))
                   and dcr.kind == DecreeChangeKind.NEW):
                 pat = Utils.asObjectOrNull(dcr.owners[0],
                                            DecreePartReferent)
                 min0_ = 0
                 for s in pat.slots:
                     ty = PartToken._getTypeByAttrName(s.type_name)
                     if (ty == PartToken.ItemType.UNDEFINED):
                         continue
                     l_ = PartToken._getRank(ty)
                     if (l_ == 0):
                         continue
                     if (l_ > min0_ or min0_ == 0):
                         min0_ = l_
                         cit_kind = PartToken._getInstrKindByTyp(ty)
         sub = None
         if (cit_kind != InstrumentKind.UNDEFINED
                 and cit_kind != InstrumentKind.APPENDIX):
             sub = FragToken(ci.begin_token, ci.end_token)
             wr = ContentAnalyzeWhapper()
             wr.analyze(sub, None, True, cit_kind)
             sub.kind = InstrumentKind.CONTENT
         else:
             sub = FragToken.createDocument(ci.begin_token, ci.end_char,
                                            cit_kind)
         if (sub is None or len(sub.children) == 0):
             pass
         elif ((sub.kind == InstrumentKind.CONTENT and len(sub.children) > 0
                and sub.children[0].begin_token == sub.begin_token)
               and sub.children[len(sub.children) - 1].end_token
               == sub.end_token):
             cit.children.extend(sub.children)
         else:
             cit.children.append(sub)
         return 1
     end_char = res.end_char
     if (res._itok is None):
         res._itok = InstrToken1.parse(res.begin_token, True, None, 0, None,
                                       False, res.end_char, False)
     lines = ListHelper.LineToken.parseList(res.begin_token, end_char, None)
     if (lines is None or (len(lines) < 1)):
         return -1
     ret = 1
     if (res.kind == InstrumentKind.CONTENT):
         j = ind + 1
         while j < len(chi):
             if (chi[j].kind == InstrumentKind.CONTENT):
                 lines2 = ListHelper.LineToken.parseList(
                     chi[j].begin_token, chi[j].end_char,
                     lines[len(lines) - 1])
                 if (lines2 is None or (len(lines2) < 1)):
                     break
                 if (not lines2[0].is_list_item):
                     if ((len(lines2) > 1 and lines2[1].is_list_item
                          and lines2[0].end_token.isCharOf(":"))
                             and not lines2[0].begin_token.chars.
                             is_capital_upper):
                         lines2[0].is_list_item = True
                     else:
                         break
                 lines.extend(lines2)
                 ret = ((j - ind) + 1)
             elif (chi[j].kind != InstrumentKind.EDITIONS
                   and chi[j].kind != InstrumentKind.COMMENT):
                 break
             j += 1
     if (len(lines) < 2):
         return -1
     if ((len(lines) > 1 and lines[0].is_list_item
          and lines[1].is_list_item) and lines[0].number != 1):
         if (len(lines) == 2 or not lines[2].is_list_item):
             lines[1].is_list_item = False
             lines[0].is_list_item = lines[1].is_list_item
     i = 0
     first_pass3008 = True
     while True:
         if first_pass3008: first_pass3008 = False
         else: i += 1
         if (not (i < len(lines))): break
         if (lines[i].is_list_item):
             if (i > 0 and lines[i - 1].is_list_item):
                 continue
             if (((i + 1) < len(lines)) and lines[i + 1].is_list_item):
                 pass
             else:
                 lines[i].is_list_item = False
                 continue
             new_line = False
             j = (i + 1)
             while j < len(lines):
                 if (not lines[j].is_list_item):
                     break
                 elif (lines[j].is_newline_before):
                     new_line = True
                 j += 1
             if (new_line):
                 continue
             if (i > 0 and lines[i - 1].end_token.isChar(':')):
                 continue
             j = i
             while j < len(lines):
                 if (not lines[j].is_list_item):
                     break
                 else:
                     lines[j].is_list_item = False
                 j += 1
     if (len(lines) > 2):
         last = lines[len(lines) - 1]
         last2 = lines[len(lines) - 2]
         if ((not last.is_list_item and last.end_token.isChar('.')
              and last2.is_list_item) and last2.end_token.isChar(';')):
             if ((last.length_char < (last2.length_char * 2))
                     or last.begin_token.chars.is_all_lower):
                 last.is_list_item = True
     i = 0
     while i < (len(lines) - 1):
         if (not lines[i].is_list_item and not lines[i + 1].is_list_item):
             if (((i + 2) < len(lines)) and lines[i + 2].is_list_item
                     and lines[i + 1].end_token.isChar(':')):
                 pass
             else:
                 lines[i].end_token = lines[i + 1].end_token
                 del lines[i + 1]
                 i -= 1
         i += 1
     i = 0
     while i < (len(lines) - 1):
         if (lines[i].is_list_item):
             if (lines[i].number == 1):
                 ok = True
                 num = 1
                 nonum = 0
                 j = i + 1
                 while j < len(lines):
                     if (not lines[j].is_list_item):
                         ok = False
                         break
                     elif (lines[j].number > 0):
                         num += 1
                         if (lines[j].number != num):
                             ok = False
                             break
                     else:
                         nonum += 1
                     j += 1
                 if (not ok or nonum == 0 or (num < 2)):
                     break
                 lt = lines[i]
                 j = i + 1
                 while j < len(lines):
                     if (lines[j].number > 0):
                         lt = lines[j]
                     else:
                         chli = Utils.asObjectOrNull(lt.tag, list)
                         if (chli is None):
                             chli = list()
                             lt.tag = chli
                         lt.end_token = lines[j].end_token
                         chli.append(lines[j])
                         del lines[j]
                         j -= 1
                     j += 1
         i += 1
     cou = 0
     for li in lines:
         if (li.is_list_item):
             cou += 1
     if (cou < 2):
         return -1
     i = 0
     first_pass3009 = True
     while True:
         if first_pass3009: first_pass3009 = False
         else: i += 1
         if (not (i < len(lines))): break
         if (lines[i].is_list_item):
             i0 = i
             ok = True
             cou = 1
             while i < len(lines):
                 if (not lines[i].is_list_item):
                     break
                 elif (lines[i].number != cou):
                     ok = False
                 i += 1
                 cou += 1
             if (not ok):
                 i = i0
                 while i < len(lines):
                     if (not lines[i].is_list_item):
                         break
                     else:
                         lines[i].number = 0
                     i += 1
             if (cou > 3 and lines[i0].begin_token.getSourceText() !=
                     lines[i0 + 1].begin_token.getSourceText()
                     and lines[i0 + 1].begin_token.getSourceText()
                     == lines[i0 + 2].begin_token.getSourceText()):
                 pref = lines[i0 + 1].begin_token.getSourceText()
                 ok = True
                 j = i0 + 2
                 while j < i:
                     if (pref != lines[j].begin_token.getSourceText()):
                         ok = False
                         break
                     j += 1
                 if (not ok):
                     continue
                 tt = None
                 ok = False
                 tt = lines[i0].end_token.previous
                 while tt is not None and tt != lines[i0].begin_token:
                     if (tt.getSourceText() == pref):
                         ok = True
                         break
                     tt = tt.previous
                 if (ok):
                     li0 = ListHelper.LineToken(lines[i0].begin_token,
                                                tt.previous)
                     lines[i0].begin_token = tt
                     lines.insert(i0, li0)
                     i += 1
     for li in lines:
         li.correctBeginToken()
         ch = FragToken._new1259(li.begin_token, li.end_token,
                                 (InstrumentKind.LISTITEM if li.is_list_item
                                  else InstrumentKind.CONTENT), li.number)
         if (ch.kind == InstrumentKind.CONTENT
                 and ch.end_token.isChar(':')):
             ch.kind = InstrumentKind.LISTHEAD
         res.children.append(ch)
         chli = Utils.asObjectOrNull(li.tag, list)
         if (chli is not None):
             for lt in chli:
                 ch.children.append(
                     FragToken._new1242(lt.begin_token, lt.end_token,
                                        InstrumentKind.LISTITEM))
             if (ch.begin_char < ch.children[0].begin_char):
                 ch.children.insert(
                     0,
                     FragToken._new1242(ch.begin_token,
                                        ch.children[0].begin_token.previous,
                                        InstrumentKind.CONTENT))
     return ret