Ejemplo n.º 1
0
 def initialize(lang: 'MorphLang' = None) -> None:
     """ Инициализация сервиса. Каждый анализатор нужно аинициализировать отдельно.
     Если вызывается Sdk.Initialize(), то там инициализация сервиса и всех анализаторов делается.
     
     Args:
         lang(MorphLang): необходимые языки (по умолчанию, русский и английский)
     
     """
     from pullenti.ner.core.internal.NumberExHelper import NumberExHelper
     from pullenti.ner.core.internal.BlockLine import BlockLine
     from pullenti.ner.core.internal.NounPhraseItem import NounPhraseItem
     from pullenti.ner.core.PrepositionHelper import PrepositionHelper
     from pullenti.ner.core.ConjunctionHelper import ConjunctionHelper
     if (ProcessorService.__m_inited):
         return
     ProcessorService.__m_inited = True
     MorphologyService.initialize(lang)
     DerivateService.initialize(lang)
     Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = True
     PrepositionHelper._initialize()
     ConjunctionHelper._initialize()
     NounPhraseItem._initialize()
     NumberHelper._initialize()
     NumberExHelper._initialize()
     BlockLine.initialize()
     Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = False
Ejemplo n.º 2
0
 def create_noun_group(gr : 'SemGraph', npt : 'NounPhraseToken') -> 'SemObject':
     noun = npt.noun.begin_token
     sem = SemObject(gr)
     sem.tokens.append(npt.noun)
     sem.typ = SemObjectType.NOUN
     if (npt.noun.morph.class0_.is_personal_pronoun): 
         sem.typ = SemObjectType.PERSONALPRONOUN
     elif (npt.noun.morph.class0_.is_pronoun): 
         sem.typ = SemObjectType.PRONOUN
     if (npt.noun.begin_token != npt.noun.end_token): 
         sem.morph.normal_case = npt.noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
         sem.morph.normal_full = npt.noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
         sem.morph.class0_ = MorphClass.NOUN
         sem.morph.number = npt.morph.number
         sem.morph.gender = npt.morph.gender
         sem.morph.case_ = npt.morph.case_
     elif (isinstance(noun, TextToken)): 
         for wf in noun.morph.items: 
             if (wf.check_accord(npt.morph, False, False) and (isinstance(wf, MorphWordForm))): 
                 CreateHelper._set_morph(sem, Utils.asObjectOrNull(wf, MorphWordForm))
                 break
         if (sem.morph.normal_case is None): 
             sem.morph.normal_case = noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
             sem.morph.normal_full = noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
         grs = DerivateService.find_derivates(sem.morph.normal_full, True, None)
         if (grs is not None and len(grs) > 0): 
             sem.concept = (grs[0])
     elif (isinstance(noun, ReferentToken)): 
         r = noun.referent
         if (r is None): 
             return None
         sem.morph.normal_case = str(r)
         sem.morph.normal_full = sem.morph.normal_case
         sem.concept = (r)
     elif (isinstance(noun, NumberToken)): 
         num = Utils.asObjectOrNull(noun, NumberToken)
         sem.morph.gender = noun.morph.gender
         sem.morph.number = noun.morph.number
         if (num.int_value is not None): 
             sem.morph.normal_case = NumberHelper.get_number_adjective(num.int_value, noun.morph.gender, noun.morph.number)
             sem.morph.normal_full = NumberHelper.get_number_adjective(num.int_value, MorphGender.MASCULINE, MorphNumber.SINGULAR)
         else: 
             sem.morph.normal_case = noun.get_source_text().upper()
             sem.morph.normal_full = sem.morph.normal_case
     noun.tag = (sem)
     if (len(npt.adjectives) > 0): 
         for a in npt.adjectives: 
             if (npt.multi_nouns and a != npt.adjectives[0]): 
                 break
             asem = CreateHelper.create_npt_adj(gr, npt, a)
             if (asem is not None): 
                 gr.add_link(SemLinkType.DETAIL, sem, asem, "какой", False, None)
     if (npt.internal_noun is not None): 
         intsem = CreateHelper.create_noun_group(gr, npt.internal_noun)
         if (intsem is not None): 
             gr.add_link(SemLinkType.DETAIL, sem, intsem, None, False, None)
     gr.objects.append(sem)
     return sem
Ejemplo n.º 3
0
 def percent(self, value) -> float:
     if (value > 0):
         self.addSlot(FundsReferent.ATTR_PERCENT,
                      NumberHelper.doubleToString(value), True, 0)
     else:
         self.addSlot(FundsReferent.ATTR_PERCENT, None, True, 0)
     return value
Ejemplo n.º 4
0
 def create_referent_with_register(self,
                                   ad: 'AnalyzerData') -> 'UnitReferent':
     ur = self.ext_onto
     if (self.unit is not None):
         ur = UnitToken.__create_referent(self.unit)
     elif (self.unknown_name is not None):
         ur = UnitReferent()
         ur.add_slot(UnitReferent.ATTR_NAME, self.unknown_name, False, 0)
         ur.is_unknown = True
     if (self.pow0_ != 1):
         ur.add_slot(UnitReferent.ATTR_POW, str(self.pow0_), False, 0)
     owns = list()
     owns.append(ur)
     if (self.unit is not None):
         uu = self.unit.base_unit
         while uu is not None:
             ur0 = UnitToken.__create_referent(uu)
             owns.append(ur0)
             uu = uu.base_unit
     for i in range(len(owns) - 1, -1, -1):
         if (ad is not None):
             owns[i] = (Utils.asObjectOrNull(ad.register_referent(owns[i]),
                                             UnitReferent))
         if (i > 0):
             owns[i - 1].add_slot(UnitReferent.ATTR_BASEUNIT, owns[i],
                                  False, 0)
             if (owns[i - 1].tag.base_multiplier != 0):
                 owns[i - 1].add_slot(
                     UnitReferent.ATTR_BASEFACTOR,
                     NumberHelper.double_to_string(
                         owns[i - 1].tag.base_multiplier), False, 0)
     return owns[0]
Ejemplo n.º 5
0
 def tryAttach(t: 'Token',
               can_be_pure_number: bool = False,
               typ: 'OrgItemTypeToken' = None) -> 'OrgItemNumberToken':
     if (t is None):
         return None
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is not None):
         t1 = MiscHelper.checkNumberPrefix(tt)
         if ((isinstance(t1, NumberToken)) and not t1.is_newline_before):
             return OrgItemNumberToken._new1704(tt, t1, str((t1).value))
     if ((t.is_hiphen and (isinstance(t.next0_, NumberToken))
          and not t.is_whitespace_before) and not t.is_whitespace_after):
         if (NumberHelper.tryParseAge(t.next0_) is None):
             return OrgItemNumberToken._new1704(t, t.next0_,
                                                str((t.next0_).value))
     if (isinstance(t, NumberToken)):
         if ((not t.is_whitespace_before and t.previous is not None
              and t.previous.is_hiphen)):
             return OrgItemNumberToken._new1704(t, t, str((t).value))
         if (typ is not None and typ.typ is not None and ((
             (typ.typ == "войсковая часть" or typ.typ == "військова частина"
              or "колония" in typ.typ) or "колонія" in typ.typ))):
             if (t.length_char >= 4 or t.length_char <= 6):
                 res = OrgItemNumberToken._new1704(t, t, str((t).value))
                 if (t.next0_ is not None and
                     ((t.next0_.is_hiphen or t.next0_.isCharOf("\\/")))
                         and not t.next0_.is_whitespace_after):
                     if ((isinstance(t.next0_.next0_, NumberToken)) and
                         ((t.length_char + t.next0_.next0_.length_char) <
                          9)):
                         res.end_token = t.next0_.next0_
                         res.number = "{0}-{1}".format(
                             res.number, (res.end_token).value)
                     elif ((isinstance(t.next0_.next0_, TextToken))
                           and t.next0_.next0_.length_char == 1
                           and t.next0_.next0_.chars.is_letter):
                         res.end_token = t.next0_.next0_
                         res.number = "{0}{1}".format(
                             res.number, (res.end_token).term)
                 elif ((isinstance(t.next0_, TextToken))
                       and t.next0_.length_char == 1
                       and t.next0_.chars.is_letter):
                     res.end_token = t.next0_
                     res.number = "{0}{1}".format(res.number,
                                                  (res.end_token).term)
                 return res
     if (((isinstance(t, TextToken)) and t.length_char == 1
          and t.chars.is_letter) and not t.is_whitespace_after):
         if (typ is not None and typ.typ is not None and ((
             (typ.typ == "войсковая часть" or typ.typ == "військова частина"
              or "колония" in typ.typ) or "колонія" in typ.typ))):
             tt1 = t.next0_
             if (tt1 is not None and tt1.is_hiphen):
                 tt1 = tt1.next0_
             if ((isinstance(tt1, NumberToken))
                     and not tt1.is_whitespace_before):
                 res = OrgItemNumberToken(t, tt1)
                 res.number = "{0}{1}".format((t).term, (tt1).value)
                 return res
     return None
Ejemplo n.º 6
0
 def _addFioIdentity(self, last_name : 'PersonMorphCollection', first_name : 'PersonMorphCollection', middle_name : object) -> None:
     from pullenti.ner.person.internal.PersonMorphCollection import PersonMorphCollection
     if (last_name is not None): 
         if (last_name.number > 0): 
             num = NumberHelper.getNumberRoman(last_name.number)
             if (num is None): 
                 num = str(last_name.number)
             self.addSlot(PersonReferent.ATTR_NICKNAME, num, False, 0)
         else: 
             last_name.correct()
             self.__m_surname_occurs.append(last_name)
             for v in last_name.values: 
                 self.addSlot(PersonReferent.ATTR_LASTNAME, v, False, 0)
     if (first_name is not None): 
         first_name.correct()
         if (first_name.head is not None and len(first_name.head) > 2): 
             self.__m_name_occurs.append(first_name)
         for v in first_name.values: 
             self.addSlot(PersonReferent.ATTR_FIRSTNAME, v, False, 0)
         if (isinstance(middle_name, str)): 
             self.addSlot(PersonReferent.ATTR_MIDDLENAME, middle_name, False, 0)
         elif (isinstance(middle_name, PersonMorphCollection)): 
             mm = (Utils.asObjectOrNull(middle_name, PersonMorphCollection))
             if (mm.head is not None and len(mm.head) > 2): 
                 self.__m_sec_occurs.append(mm)
             for v in mm.values: 
                 self.addSlot(PersonReferent.ATTR_MIDDLENAME, v, False, 0)
     self._correctData()
Ejemplo n.º 7
0
 def initialize(lang: 'MorphLang' = None) -> None:
     """ Инициализация сервиса.  
      Внимание! После этого нужно инициализровать анализаторы (см. документацию)
      <param name="lang">необходимые языки (по умолчанию, русский и английский)</param> """
     from pullenti.ner.core.internal.NumberExHelper import NumberExHelper
     from pullenti.ner.core.internal.NounPhraseItem import NounPhraseItem
     if (ProcessorService.__m_inited):
         return
     ProcessorService.__m_inited = True
     Morphology.initialize(lang)
     Explanatory.initialize(lang)
     Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = True
     NounPhraseItem._initialize()
     NumberHelper._initialize()
     NumberExHelper._initialize()
     BlockLine.initialize()
     Termin.ASSIGN_ALL_TEXTS_AS_NORMAL = False
Ejemplo n.º 8
0
 def percent(self) -> float:
     """ Процент от общего количества """
     val = self.getStringValue(FundsReferent.ATTR_PERCENT)
     if (val is None):
         return 0
     res = NumberHelper.stringToDouble(val)
     if (res is None):
         return 0
     return res
Ejemplo n.º 9
0
 def real_value(self, value_) -> float:
     val = NumberHelper.doubleToString(value_)
     ii = val.find('.')
     if (ii > 0):
         val = val[0:0 + ii]
     self.addSlot(MoneyReferent.ATTR_VALUE, val, True, 0)
     re = ((value_ - self.value)) * (100)
     self.addSlot(MoneyReferent.ATTR_REST, str((math.floor((re + .0001)))),
                  True, 0)
     return value_
Ejemplo n.º 10
0
 def __attach_spec_postfix(t: 'Token') -> 'NumberExToken':
     if (t is None):
         return None
     if (t.is_char_of("%")):
         return NumberExToken(t, t, "", NumberSpellingType.DIGIT,
                              NumberExType.PERCENT)
     money = NumberHelper._is_money_char(t)
     if (money is not None):
         return NumberExToken._new415(t, t, "", NumberSpellingType.DIGIT,
                                      NumberExType.MONEY, money)
     return None
Ejemplo n.º 11
0
 def __attachSpecPostfix(t: 'Token') -> 'NumberExToken':
     if (t is None):
         return None
     if (t.isCharOf("%")):
         return NumberExToken(t, t, "", NumberSpellingType.DIGIT,
                              NumberExType.PERCENT)
     money = NumberHelper._isMoneyChar(t)
     if (money is not None):
         return NumberExToken._new478(t, t, "", NumberSpellingType.DIGIT,
                                      NumberExType.MONEY, money)
     return None
Ejemplo n.º 12
0
 def value(self, value_) -> str:
     from pullenti.ner.core.NumberHelper import NumberHelper
     self.__m_value = (Utils.ifNotNull(value_, ""))
     if (len(self.__m_value) > 2 and self.__m_value.endswith(".0")):
         self.__m_value = self.__m_value[0:0 + len(self.__m_value) - 2]
     while len(self.__m_value) > 1 and self.__m_value[
             0] == '0' and self.__m_value[1] != '.':
         self.__m_value = self.__m_value[1:]
     wrapn2664 = RefOutArgWrapper(0)
     inoutres2665 = Utils.tryParseInt(self.__m_value, wrapn2664)
     n = wrapn2664.value
     if (inoutres2665):
         self.__m_int_val = n
     else:
         self.__m_int_val = (None)
     d = NumberHelper.stringToDouble(self.__m_value)
     if (d is None):
         self.__m_real_val = math.nan
     else:
         self.__m_real_val = d
     return value_
Ejemplo n.º 13
0
 def create(t: 'Token', names: 'TerminCollection') -> 'BlockLine':
     if (t is None):
         return None
     res = BlockLine(t, t)
     tt = t
     while tt is not None:
         if (tt != t and tt.is_newline_before):
             break
         else:
             res.end_token = tt
         tt = tt.next0_
     nums = 0
     while t is not None and t.next0_ is not None and t.end_char <= res.end_char:
         if (isinstance(t, NumberToken)):
             pass
         else:
             rom = NumberHelper.tryParseRoman(t)
             if (rom is not None and rom.end_token.next0_ is not None):
                 t = rom.end_token
             else:
                 break
         if (t.next0_.isChar('.')):
             pass
         elif ((isinstance(t.next0_, TextToken))
               and not t.next0_.chars.is_all_lower):
             pass
         else:
             break
         res.number_end = t
         t = t.next0_
         if (t.isChar('.') and t.next0_ is not None):
             res.number_end = t
             t = t.next0_
         if (t.is_newline_before):
             return res
         nums += 1
     tok = BlockLine.__m_ontology.tryParse(t, TerminParseAttr.NO)
     if (tok is None):
         npt1 = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt1 is not None and npt1.end_token != npt1.begin_token):
             tok = BlockLine.__m_ontology.tryParse(npt1.noun.begin_token,
                                                   TerminParseAttr.NO)
     if (tok is not None):
         if (t.previous is not None and t.previous.isChar(':')):
             tok = (None)
     if (tok is not None):
         typ_ = Utils.valToEnum(tok.termin.tag, BlkTyps)
         if (typ_ == BlkTyps.CONSLUSION):
             if (t.is_newline_after):
                 pass
             elif (t.next0_ is not None
                   and t.next0_.morph.class0_.is_preposition
                   and t.next0_.next0_ is not None):
                 tok2 = BlockLine.__m_ontology.tryParse(
                     t.next0_.next0_, TerminParseAttr.NO)
                 if (tok2 is not None and (Utils.valToEnum(
                         tok2.termin.tag, BlkTyps)) == BlkTyps.CHAPTER):
                     pass
                 else:
                     tok = (None)
             else:
                 tok = (None)
         if (t.kit.base_language != t.morph.language):
             tok = (None)
         if (typ_ == BlkTyps.INDEX and not t.isValue("ОГЛАВЛЕНИЕ", None)):
             if (not t.is_newline_after and t.next0_ is not None):
                 npt = NounPhraseHelper.tryParse(t.next0_,
                                                 NounPhraseParseAttr.NO, 0)
                 if (npt is not None and npt.is_newline_after
                         and npt.morph.case_.is_genitive):
                     tok = (None)
                 elif (npt is None):
                     tok = (None)
         if ((typ_ == BlkTyps.INTRO and tok is not None
              and not tok.is_newline_after)
                 and t.isValue("ВВЕДЕНИЕ", None)):
             npt = NounPhraseHelper.tryParse(t.next0_,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None and npt.morph.case_.is_genitive):
                 tok = (None)
         if (tok is not None):
             if (res.number_end is None):
                 res.number_end = tok.end_token
                 if (res.number_end.end_char > res.end_char):
                     res.end_token = res.number_end
             res.typ = typ_
             t = tok.end_token
             if (t.next0_ is not None and t.next0_.isCharOf(":.")):
                 t = t.next0_
                 res.end_token = t
             if (t.is_newline_after or t.next0_ is None):
                 return res
             t = t.next0_
     if (t.isChar('§') and (isinstance(t.next0_, NumberToken))):
         res.typ = BlkTyps.CHAPTER
         res.number_end = t
         t = t.next0_
     if (names is not None):
         tok2 = names.tryParse(t, TerminParseAttr.NO)
         if (tok2 is not None and tok2.end_token.is_newline_after):
             res.end_token = tok2.end_token
             res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED):
                 li2 = BlockLine.create((None if res.number_end is None else
                                         res.number_end.next0_), None)
                 if (li2 is not None
                         and ((li2.typ == BlkTyps.LITERATURE
                               or li2.typ == BlkTyps.INTRO
                               or li2.typ == BlkTyps.CONSLUSION))):
                     res.typ = li2.typ
                 else:
                     res.typ = BlkTyps.CHAPTER
             return res
     t1 = res.end_token
     if ((((isinstance(t1, NumberToken)) or t1.isChar('.')))
             and t1.previous is not None):
         t1 = t1.previous
         if (t1.isChar('.')):
             res.has_content_item_tail = True
             while t1 is not None and t1.begin_char > res.begin_char:
                 if (not t1.isChar('.')):
                     break
                 t1 = t1.previous
     res.is_all_upper = True
     while t is not None and t.end_char <= t1.end_char:
         if (not ((isinstance(t, TextToken))) or not t.chars.is_letter):
             res.not_words += 1
         else:
             mc = t.getMorphClassInDictionary()
             if (mc.is_undefined):
                 res.not_words += 1
             elif (t.length_char > 2):
                 res.words += 1
             if (not t.chars.is_all_upper):
                 res.is_all_upper = False
             if ((t).is_pure_verb):
                 if (not (t).term.endswith("ING")):
                     res.has_verb = True
         t = t.next0_
     if (res.typ == BlkTyps.UNDEFINED):
         npt = NounPhraseHelper.tryParse(
             (res.begin_token if res.number_end is None else
              res.number_end.next0_), NounPhraseParseAttr.NO, 0)
         if (npt is not None):
             if (npt.noun.isValue("ХАРАКТЕРИСТИКА", None)
                     or npt.noun.isValue("СОДЕРЖАНИЕ", "ЗМІСТ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass2779 = True
                 while True:
                     if first_pass2779: first_pass2779 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.isChar('.')):
                         continue
                     npt2 = NounPhraseHelper.tryParse(
                         tt, NounPhraseParseAttr.NO, 0)
                     if (npt2 is None or not npt2.morph.case_.is_genitive):
                         ok = False
                         break
                     tt = npt2.end_token
                     if (tt.end_char > res.end_char):
                         res.end_token = tt
                         if (not tt.is_newline_after):
                             while res.end_token.next0_ is not None:
                                 if (res.end_token.is_newline_after):
                                     break
                                 res.end_token = res.end_token.next0_
                 if (ok):
                     res.typ = BlkTyps.INTRO
                     res.is_exist_name = True
             elif (npt.noun.isValue("ВЫВОД", "ВИСНОВОК")
                   or npt.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                 ok = True
                 tt = npt.end_token.next0_
                 first_pass2780 = True
                 while True:
                     if first_pass2780: first_pass2780 = False
                     else: tt = tt.next0_
                     if (not (tt is not None
                              and tt.end_char <= res.end_char)):
                         break
                     if (tt.isCharOf(",.") or tt.is_and):
                         continue
                     npt1 = NounPhraseHelper.tryParse(
                         tt, NounPhraseParseAttr.NO, 0)
                     if (npt1 is not None):
                         if (npt1.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")
                                 or npt1.noun.isValue(
                                     "РЕКОМЕНДАЦИЯ", "РЕКОМЕНДАЦІЯ")
                                 or npt1.noun.isValue(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ")):
                             tt = npt1.end_token
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                     ok = False
                     break
                 if (ok):
                     res.typ = BlkTyps.CONSLUSION
                     res.is_exist_name = True
             if (res.typ == BlkTyps.UNDEFINED and npt is not None
                     and npt.end_char <= res.end_char):
                 ok = False
                 publ = 0
                 if (BlockLine.__isPub(npt)):
                     ok = True
                     publ = 1
                 elif ((npt.noun.isValue("СПИСОК", None)
                        or npt.noun.isValue("УКАЗАТЕЛЬ", "ПОКАЖЧИК")
                        or npt.noun.isValue("ПОЛОЖЕНИЕ", "ПОЛОЖЕННЯ"))
                       or npt.noun.isValue("ВЫВОД", "ВИСНОВОК")
                       or npt.noun.isValue("РЕЗУЛЬТАТ", "ДОСЛІДЖЕННЯ")):
                     if (npt.end_char == res.end_char):
                         return None
                     ok = True
                 if (ok):
                     if (npt.begin_token == npt.end_token
                             and npt.noun.isValue("СПИСОК", None)
                             and npt.end_char == res.end_char):
                         ok = False
                     tt = npt.end_token.next0_
                     first_pass2781 = True
                     while True:
                         if first_pass2781: first_pass2781 = False
                         else: tt = tt.next0_
                         if (not (tt is not None
                                  and tt.end_char <= res.end_char)):
                             break
                         if (tt.isCharOf(",.:") or tt.is_and
                                 or tt.morph.class0_.is_preposition):
                             continue
                         if (tt.isValue("ОТРАЖЕНЫ", "ВІДОБРАЖЕНІ")):
                             continue
                         npt = NounPhraseHelper.tryParse(
                             tt, NounPhraseParseAttr.NO, 0)
                         if (npt is None):
                             ok = False
                             break
                         if (((BlockLine.__isPub(npt) or npt.noun.isValue(
                                 "РАБОТА", "РОБОТА") or npt.noun.isValue(
                                     "ИССЛЕДОВАНИЕ", "ДОСЛІДЖЕННЯ"))
                              or npt.noun.isValue("АВТОР", None)
                              or npt.noun.isValue("ТРУД", "ПРАЦЯ"))
                                 or npt.noun.isValue("ТЕМА", None)
                                 or npt.noun.isValue(
                                     "ДИССЕРТАЦИЯ", "ДИСЕРТАЦІЯ")):
                             tt = npt.end_token
                             if (BlockLine.__isPub(npt)):
                                 publ += 1
                             if (tt.end_char > res.end_char):
                                 res.end_token = tt
                                 if (not tt.is_newline_after):
                                     while res.end_token.next0_ is not None:
                                         if (res.end_token.is_newline_after
                                             ):
                                             break
                                         res.end_token = res.end_token.next0_
                             continue
                         ok = False
                         break
                     if (ok):
                         res.typ = BlkTyps.LITERATURE
                         res.is_exist_name = True
                         if (publ == 0 and (res.end_char < ((math.floor(
                             (len(res.kit.sofa.text) * 2) / 3))))):
                             if (res.number_end is not None):
                                 res.typ = BlkTyps.MISC
                             else:
                                 res.typ = BlkTyps.UNDEFINED
     return res
Ejemplo n.º 14
0
 def __parse_number(t : 'Token', res : 'InstrToken1', prev : 'InstrToken1') -> None:
     if (((isinstance(t, NumberToken)) and t.int_value is not None and t.typ == NumberSpellingType.DIGIT) and (t.int_value < 3000)): 
         if (len(res.numbers) >= 4): 
             pass
         if (t.morph.class0_.is_adjective and res.typ_container_rank == 0): 
             return
         nwp = NumberHelper.try_parse_number_with_postfix(t)
         if (nwp is not None): 
             if (nwp.end_token.is_whitespace_before): 
                 pass
             else: 
                 return
         if ((t.next0_ is not None and (t.whitespaces_after_count < 3) and t.next0_.chars.is_letter) and t.next0_.chars.is_all_lower): 
             if (not t.is_whitespace_after and t.next0_.length_char == 1): 
                 pass
             elif (len(res.numbers) == 0): 
                 res.num_typ = NumberTypes.DIGIT
                 res.numbers.append(str(t.value))
                 res.end_token = t
                 res.num_end_token = res.end_token
                 res.num_begin_token = res.num_end_token
                 return
             else: 
                 return
         if (res.num_typ == NumberTypes.UNDEFINED): 
             res.num_typ = NumberTypes.DIGIT
         else: 
             res.num_typ = NumberTypes.COMBO
         if (len(res.numbers) > 0 and t.is_whitespace_before): 
             return
         if (len(res.numbers) == 0): 
             res.num_begin_token = t
         if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, NumberToken))) and t.next0_.next0_.int_value is not None and t.next0_.next0_.int_value > t.int_value): 
             res.min_number = str(t.value)
             t = t.next0_.next0_
         elif (((t.next0_ is not None and t.next0_.is_char_of(")") and t.next0_.next0_ is not None) and t.next0_.next0_.is_hiphen and (isinstance(t.next0_.next0_.next0_, NumberToken))) and t.next0_.next0_.next0_.int_value is not None and t.next0_.next0_.next0_.int_value > t.int_value): 
             res.min_number = str(t.value)
             t = t.next0_.next0_.next0_
         res.numbers.append(str(t.value))
         res.num_end_token = t
         res.end_token = res.num_end_token
         res.num_suffix = (None)
         ttt = t.next0_
         first_pass3281 = True
         while True:
             if first_pass3281: first_pass3281 = False
             else: ttt = ttt.next0_
             if (not (ttt is not None and (len(res.numbers) < 4))): break
             ok1 = False
             ok2 = False
             if ((ttt.is_char_of("._") and not ttt.is_whitespace_after and (isinstance(ttt.next0_, NumberToken))) and ((ttt.next0_.typ == NumberSpellingType.DIGIT or (((ttt.next0_.typ == NumberSpellingType.WORDS)) and ttt.next0_.chars.is_latin_letter and not ttt.is_whitespace_after)))): 
                 ok1 = True
             elif ((ttt.is_char_of("(<") and (isinstance(ttt.next0_, NumberToken)) and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.is_char_of(")>")): 
                 ok2 = True
             if (ok1 or ok2): 
                 ttt = ttt.next0_
                 res.numbers.append(str(ttt.value))
                 res.num_typ = (NumberTypes.TWODIGITS if len(res.numbers) == 2 else ((NumberTypes.THREEDIGITS if len(res.numbers) == 3 else NumberTypes.FOURDIGITS)))
                 if ((ttt.next0_ is not None and ttt.next0_.is_char_of(")>") and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.is_char('.')): 
                     ttt = ttt.next0_
                 elif (ok2): 
                     ttt = ttt.next0_
                 res.num_end_token = ttt
                 res.end_token = res.num_end_token
                 t = res.end_token
                 continue
             if (((isinstance(ttt, TextToken)) and ttt.length_char == 1 and ttt.chars.is_letter) and not ttt.is_whitespace_before and len(res.numbers) == 1): 
                 res.numbers.append(ttt.term)
                 res.num_typ = NumberTypes.COMBO
                 res.num_end_token = ttt
                 res.end_token = res.num_end_token
                 t = res.end_token
                 continue
             break
         if (t.next0_ is not None and t.next0_.is_char_of(").")): 
             res.num_suffix = t.next0_.get_source_text()
             res.num_end_token = t.next0_
             res.end_token = res.num_end_token
             t = res.end_token
         return
     if (((isinstance(t, NumberToken)) and t.typ == NumberSpellingType.WORDS and res.typ_container_rank > 0) and len(res.numbers) == 0): 
         res.numbers.append(str(t.value))
         res.num_typ = NumberTypes.DIGIT
         res.num_begin_token = t
         if (t.next0_ is not None and t.next0_.is_char('.')): 
             t = t.next0_
             res.num_suffix = "."
         res.num_end_token = t
         res.end_token = res.num_end_token
         return
     nt = NumberHelper.try_parse_roman(t)
     if ((nt is not None and nt.value == "10" and t.next0_ is not None) and t.next0_.is_char(')')): 
         nt = (None)
     if (nt is not None and nt.value == "100"): 
         nt = (None)
     if (nt is not None and nt.typ == NumberSpellingType.ROMAN): 
         if (res.num_typ == NumberTypes.UNDEFINED): 
             res.num_typ = NumberTypes.ROMAN
         else: 
             res.num_typ = NumberTypes.COMBO
         if (len(res.numbers) > 0 and t.is_whitespace_before): 
             return
         if (len(res.numbers) == 0): 
             res.num_begin_token = t
         res.numbers.append(str(nt.value))
         res.num_end_token = nt.end_token
         res.end_token = res.num_end_token
         t = res.end_token
         if (res.num_typ == NumberTypes.ROMAN and ((res.typ == InstrToken1.Types.CHAPTER or res.typ == InstrToken1.Types.SECTION or res.typ == InstrToken1.Types.LINE))): 
             if ((t.next0_ is not None and t.next0_.is_char_of("._<") and (isinstance(t.next0_.next0_, NumberToken))) and t.next0_.next0_.typ == NumberSpellingType.DIGIT): 
                 t = t.next0_.next0_
                 res.numbers.append(str(t.value))
                 res.num_typ = NumberTypes.TWODIGITS
                 if (t.next0_ is not None and t.next0_.is_char('>')): 
                     t = t.next0_
                 res.num_end_token = t
                 res.end_token = res.num_end_token
                 if ((t.next0_ is not None and t.next0_.is_char_of("._<") and (isinstance(t.next0_.next0_, NumberToken))) and t.next0_.next0_.typ == NumberSpellingType.DIGIT): 
                     t = t.next0_.next0_
                     res.numbers.append(str(t.value))
                     res.num_typ = NumberTypes.THREEDIGITS
                     if (t.next0_ is not None and t.next0_.is_char('>')): 
                         t = t.next0_
                     res.num_end_token = t
                     res.end_token = res.num_end_token
         if (t.next0_ is not None and t.next0_.is_char_of(").")): 
             res.num_suffix = t.next0_.get_source_text()
             res.num_end_token = t.next0_
             res.end_token = res.num_end_token
             t = res.end_token
         return
     if (((isinstance(t, TextToken)) and t.length_char == 1 and t.chars.is_letter) and t == res.begin_token): 
         if ((not t.is_whitespace_after and (isinstance(t.next0_, NumberToken)) and t.next0_.next0_ is not None) and t.next0_.next0_.is_char('.')): 
             res.num_begin_token = t
             res.num_typ = NumberTypes.DIGIT
             res.numbers.append(str(t.next0_.value))
             res.num_suffix = (t.term + ".")
             res.num_end_token = t.next0_.next0_
             res.end_token = res.num_end_token
             t = res.end_token
             return
         if (t.next0_ is not None and t.next0_.is_char_of(".)")): 
             if (((t.next0_.is_char('.') and (isinstance(t.next0_.next0_, NumberToken)) and t.next0_.next0_.next0_ is not None) and t.next0_.next0_.next0_.is_char(')') and not t.next0_.is_whitespace_after) and not t.next0_.next0_.is_whitespace_after): 
                 res.num_typ = NumberTypes.TWODIGITS
                 res.numbers.append(t.term)
                 res.numbers.append(str(t.next0_.next0_.value))
                 res.num_suffix = ")"
                 res.num_begin_token = t
                 res.num_end_token = t.next0_.next0_.next0_
                 res.end_token = res.num_end_token
                 t = res.end_token
                 return
             if (t.next0_.is_char('.') and ((t.chars.is_all_upper or (isinstance(t.next0_.next0_, NumberToken))))): 
                 pass
             else: 
                 tmp1 = InstrToken1(t, t.next0_)
                 tmp1.numbers.append(t.term)
                 if (tmp1.last_number > 1 and t.next0_.is_char_of(".") and ((prev is None or (prev.last_number + 1) != tmp1.last_number))): 
                     pass
                 else: 
                     if (len(res.numbers) == 0): 
                         res.num_begin_token = t
                     res.num_typ = NumberTypes.LETTER
                     res.numbers.append(t.term)
                     res.num_begin_token = t
                     res.num_end_token = t.next0_
                     res.end_token = res.num_end_token
                     t = res.end_token
                     res.num_suffix = t.get_source_text()
                     return
Ejemplo n.º 15
0
 def tryParseNumberWithPostfix(t: 'Token') -> 'NumberExToken':
     """ Выделение стандартных мер, типа: 10 кв.м. """
     if (t is None):
         return None
     t0 = t
     is_dollar = None
     if (t.length_char == 1 and t.next0_ is not None):
         is_dollar = NumberHelper._isMoneyChar(t)
         if ((is_dollar) is not None):
             t = t.next0_
     nt = Utils.asObjectOrNull(t, NumberToken)
     if (nt is None):
         if ((not ((isinstance(t.previous, NumberToken))) and t.isChar('(')
              and (isinstance(t.next0_, NumberToken)))
                 and t.next0_.next0_ is not None
                 and t.next0_.next0_.isChar(')')):
             toks1 = NumberExHelper._m_postfixes.tryParse(
                 t.next0_.next0_.next0_, TerminParseAttr.NO)
             if (toks1 is not None
                     and (Utils.valToEnum(toks1.termin.tag, NumberExType))
                     == NumberExType.MONEY):
                 nt0 = Utils.asObjectOrNull(t.next0_, NumberToken)
                 res = NumberExToken._new471(t, toks1.end_token, nt0.value,
                                             nt0.typ, NumberExType.MONEY,
                                             nt0.real_value,
                                             toks1.begin_token.morph)
                 return NumberExHelper.__correctMoney(
                     res, toks1.begin_token)
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None or not tt.morph.class0_.is_adjective):
             return None
         val = tt.term
         i = 4
         first_pass2785 = True
         while True:
             if first_pass2785: first_pass2785 = False
             else: i += 1
             if (not (i < (len(val) - 5))): break
             v = val[0:0 + i]
             li = NumberHelper._m_nums.tryAttachStr(v, tt.morph.language)
             if (li is None):
                 continue
             vv = val[i:]
             lii = NumberExHelper._m_postfixes.tryAttachStr(
                 vv, tt.morph.language)
             if (lii is not None and len(lii) > 0):
                 re = NumberExToken._new472(
                     t, t, str((li[0].tag)), NumberSpellingType.WORDS,
                     Utils.valToEnum(lii[0].tag, NumberExType), t.morph)
                 NumberExHelper.__correctExtTypes(re)
                 return re
             break
         return None
     if (t.next0_ is None and is_dollar is None):
         return None
     f = nt.real_value
     t1 = nt.next0_
     if (((t1 is not None and t1.isCharOf(",.")))
             or (((isinstance(t1, NumberToken)) and
                  (t1.whitespaces_before_count < 3)))):
         tt11 = NumberHelper.tryParseRealNumber(nt, False)
         if (tt11 is not None):
             t1 = tt11.end_token.next0_
             f = tt11.real_value
     if (t1 is None):
         if (is_dollar is None):
             return None
     elif ((t1.next0_ is not None and t1.next0_.isValue("С", "З")
            and t1.next0_.next0_ is not None)
           and t1.next0_.next0_.isValue("ПОЛОВИНА", None)):
         f += .5
         t1 = t1.next0_.next0_
     if (t1 is not None and t1.is_hiphen and t1.next0_ is not None):
         t1 = t1.next0_
     det = False
     altf = f
     if (((isinstance(t1, NumberToken)) and t1.previous is not None
          and t1.previous.is_hiphen) and (t1).int_value == 0
             and t1.length_char == 2):
         t1 = t1.next0_
     if ((t1 is not None and t1.next0_ is not None and t1.isChar('('))
             and (((isinstance(t1.next0_, NumberToken))
                   or t1.next0_.isValue("НОЛЬ", None)))
             and t1.next0_.next0_ is not None):
         nt1 = Utils.asObjectOrNull(t1.next0_, NumberToken)
         val = 0
         if (nt1 is not None):
             val = nt1.real_value
         if (math.floor(f) == math.floor(val)):
             ttt = t1.next0_.next0_
             if (ttt.isChar(')')):
                 t1 = ttt.next0_
                 det = True
                 if ((isinstance(t1, NumberToken))
                         and (t1).int_value is not None
                         and (t1).int_value == 0):
                     t1 = t1.next0_
             elif (
                 ((((isinstance(ttt, NumberToken)) and
                    ((ttt).real_value < 100) and ttt.next0_ is not None) and
                   ttt.next0_.isChar('/') and ttt.next0_.next0_ is not None)
                  and ttt.next0_.next0_.getSourceText() == "100"
                  and ttt.next0_.next0_.next0_ is not None)
                     and ttt.next0_.next0_.next0_.isChar(')')):
                 rest = NumberExHelper.__getDecimalRest100(f)
                 if ((ttt).int_value is not None
                         and rest == (ttt).int_value):
                     t1 = ttt.next0_.next0_.next0_.next0_
                     det = True
             elif ((ttt.isValue("ЦЕЛЫХ", None) and
                    (isinstance(ttt.next0_, NumberToken))
                    and ttt.next0_.next0_ is not None)
                   and ttt.next0_.next0_.next0_ is not None
                   and ttt.next0_.next0_.next0_.isChar(')')):
                 num2 = Utils.asObjectOrNull(ttt.next0_, NumberToken)
                 altf = num2.real_value
                 if (ttt.next0_.next0_.isValue("ДЕСЯТЫЙ", None)):
                     altf /= (10)
                 elif (ttt.next0_.next0_.isValue("СОТЫЙ", None)):
                     altf /= (100)
                 elif (ttt.next0_.next0_.isValue("ТЫСЯЧНЫЙ", None)):
                     altf /= (1000)
                 elif (ttt.next0_.next0_.isValue("ДЕСЯТИТЫСЯЧНЫЙ", None)):
                     altf /= (10000)
                 elif (ttt.next0_.next0_.isValue("СТОТЫСЯЧНЫЙ", None)):
                     altf /= (100000)
                 elif (ttt.next0_.next0_.isValue("МИЛЛИОННЫЙ", None)):
                     altf /= (1000000)
                 if (altf < 1):
                     altf += val
                     t1 = ttt.next0_.next0_.next0_.next0_
                     det = True
             else:
                 toks1 = NumberExHelper._m_postfixes.tryParse(
                     ttt, TerminParseAttr.NO)
                 if (toks1 is not None):
                     if ((Utils.valToEnum(
                             toks1.termin.tag,
                             NumberExType)) == NumberExType.MONEY):
                         if (toks1.end_token.next0_ is not None
                                 and toks1.end_token.next0_.isChar(')')):
                             res = NumberExToken._new473(
                                 t, toks1.end_token.next0_, nt.value,
                                 nt.typ, NumberExType.MONEY, f, altf,
                                 toks1.begin_token.morph)
                             return NumberExHelper.__correctMoney(
                                 res, toks1.begin_token)
                 res2 = NumberExHelper.tryParseNumberWithPostfix(t1.next0_)
                 if (res2 is not None and res2.end_token.next0_ is not None
                         and res2.end_token.next0_.isChar(')')):
                     if (res2.int_value is not None):
                         res2.begin_token = t
                         res2.end_token = res2.end_token.next0_
                         res2.alt_real_value = res2.real_value
                         res2.real_value = f
                         NumberExHelper.__correctExtTypes(res2)
                         if (res2.whitespaces_after_count < 2):
                             toks2 = NumberExHelper._m_postfixes.tryParse(
                                 res2.end_token.next0_, TerminParseAttr.NO)
                             if (toks2 is not None):
                                 if ((Utils.valToEnum(
                                         toks2.termin.tag, NumberExType)
                                      ) == NumberExType.MONEY):
                                     res2.end_token = toks2.end_token
                         return res2
         elif (nt1 is not None and nt1.typ == NumberSpellingType.WORDS
               and nt.typ == NumberSpellingType.DIGIT):
             altf = nt1.real_value
             ttt = t1.next0_.next0_
             if (ttt.isChar(')')):
                 t1 = ttt.next0_
                 det = True
             if (not det):
                 altf = f
     if ((t1 is not None and t1.isChar('(') and t1.next0_ is not None)
             and t1.next0_.isValue("СУММА", None)):
         br = BracketHelper.tryParse(t1, BracketParseAttr.NO, 100)
         if (br is not None):
             t1 = br.end_token.next0_
     if (is_dollar is not None):
         te = None
         if (t1 is not None):
             te = t1.previous
         else:
             t1 = t0
             while t1 is not None:
                 if (t1.next0_ is None):
                     te = t1
                 t1 = t1.next0_
         if (te is None):
             return None
         if (te.is_hiphen and te.next0_ is not None):
             if (te.next0_.isValue("МИЛЛИОННЫЙ", None)):
                 f *= (1000000)
                 altf *= (1000000)
                 te = te.next0_
             elif (te.next0_.isValue("МИЛЛИАРДНЫЙ", None)):
                 f *= (1000000000)
                 altf *= (1000000000)
                 te = te.next0_
         if (not te.is_whitespace_after
                 and (isinstance(te.next0_, TextToken))):
             if (te.next0_.isValue("M", None)):
                 f *= (1000000)
                 altf *= (1000000)
                 te = te.next0_
             elif (te.next0_.isValue("BN", None)):
                 f *= (1000000000)
                 altf *= (1000000000)
                 te = te.next0_
         return NumberExToken._new474(t0, te, "", nt.typ,
                                      NumberExType.MONEY, f, altf,
                                      is_dollar)
     if (t1 is None or ((t1.is_newline_before and not det))):
         return None
     toks = NumberExHelper._m_postfixes.tryParse(t1, TerminParseAttr.NO)
     if ((toks is None and det and (isinstance(t1, NumberToken)))
             and (t1).value == "0"):
         toks = NumberExHelper._m_postfixes.tryParse(
             t1.next0_, TerminParseAttr.NO)
     if (toks is not None):
         t1 = toks.end_token
         if (not t1.isChar('.') and t1.next0_ is not None
                 and t1.next0_.isChar('.')):
             if ((isinstance(t1, TextToken)) and t1.isValue(
                     toks.termin.terms[0].canonical_text, None)):
                 pass
             elif (not t1.chars.is_letter):
                 pass
             else:
                 t1 = t1.next0_
         if (toks.termin.canonic_text == "LTL"):
             return None
         if (toks.begin_token == t1):
             if (t1.morph.class0_.is_preposition
                     or t1.morph.class0_.is_conjunction):
                 if (t1.is_whitespace_before and t1.is_whitespace_after):
                     return None
         ty = Utils.valToEnum(toks.termin.tag, NumberExType)
         res = NumberExToken._new473(t, t1, nt.value, nt.typ, ty, f, altf,
                                     toks.begin_token.morph)
         if (ty != NumberExType.MONEY):
             NumberExHelper.__correctExtTypes(res)
             return res
         return NumberExHelper.__correctMoney(res, toks.begin_token)
     pfx = NumberExHelper.__attachSpecPostfix(t1)
     if (pfx is not None):
         pfx.begin_token = t
         pfx.value = nt.value
         pfx.typ = nt.typ
         pfx.real_value = f
         pfx.alt_real_value = altf
         return pfx
     if (t1.next0_ is not None and ((t1.morph.class0_.is_preposition
                                     or t1.morph.class0_.is_conjunction))):
         if (t1.isValue("НА", None)):
             pass
         else:
             nn = NumberExHelper.tryParseNumberWithPostfix(t1.next0_)
             if (nn is not None):
                 return NumberExToken._new476(t, t, nt.value, nt.typ,
                                              nn.ex_typ, f, altf,
                                              nn.ex_typ2, nn.ex_typ_param)
     if (not t1.is_whitespace_after and (isinstance(t1.next0_, NumberToken))
             and (isinstance(t1, TextToken))):
         term = (t1).term
         ty = NumberExType.UNDEFINED
         if (term == "СМХ" or term == "CMX"):
             ty = NumberExType.SANTIMETER
         elif (term == "MX" or term == "МХ"):
             ty = NumberExType.METER
         elif (term == "MMX" or term == "ММХ"):
             ty = NumberExType.MILLIMETER
         if (ty != NumberExType.UNDEFINED):
             return NumberExToken._new477(t, t1, nt.value, nt.typ, ty, f,
                                          altf, True)
     return None
Ejemplo n.º 16
0
 def try_parse(t : 'Token') -> 'ReferentToken':
     if (t is None): 
         return None
     if (not (isinstance(t, NumberToken)) and t.length_char != 1): 
         return None
     nex = NumberHelper.try_parse_number_with_postfix(t)
     if (nex is None or nex.ex_typ != NumberExType.MONEY): 
         if ((isinstance(t, NumberToken)) and (isinstance(t.next0_, TextToken)) and (isinstance(t.next0_.next0_, NumberToken))): 
             if (t.next0_.is_hiphen or t.next0_.morph.class0_.is_preposition): 
                 res1 = NumberHelper.try_parse_number_with_postfix(t.next0_.next0_)
                 if (res1 is not None and res1.ex_typ == NumberExType.MONEY): 
                     res0 = MoneyReferent()
                     if ((t.next0_.is_hiphen and res1.real_value == 0 and res1.end_token.next0_ is not None) and res1.end_token.next0_.is_char('(')): 
                         nex2 = NumberHelper.try_parse_number_with_postfix(res1.end_token.next0_.next0_)
                         if ((nex2 is not None and nex2.ex_typ_param == res1.ex_typ_param and nex2.end_token.next0_ is not None) and nex2.end_token.next0_.is_char(')')): 
                             if (nex2.value == t.value): 
                                 res0.currency = nex2.ex_typ_param
                                 res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0)
                                 return ReferentToken(res0, t, nex2.end_token.next0_)
                             if (isinstance(t.previous, NumberToken)): 
                                 if (nex2.value == (((t.previous.real_value * (1000)) + t.value))): 
                                     res0.currency = nex2.ex_typ_param
                                     res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0)
                                     return ReferentToken(res0, t.previous, nex2.end_token.next0_)
                                 elif (isinstance(t.previous.previous, NumberToken)): 
                                     if (nex2.real_value == (((t.previous.previous.real_value * (1000000)) + (t.previous.real_value * (1000)) + t.real_value))): 
                                         res0.currency = nex2.ex_typ_param
                                         res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0)
                                         return ReferentToken(res0, t.previous.previous, nex2.end_token.next0_)
                     res0.currency = res1.ex_typ_param
                     res0.add_slot(MoneyReferent.ATTR_VALUE, t.value, False, 0)
                     return ReferentToken(res0, t, t)
         return None
     res = MoneyReferent()
     res.currency = nex.ex_typ_param
     val = nex.value
     if (val.find('.') > 0): 
         val = val[0:0+val.find('.')]
     res.add_slot(MoneyReferent.ATTR_VALUE, val, True, 0)
     re = math.floor(round(((nex.real_value - res.value)) * (100), 6))
     if (re != 0): 
         res.add_slot(MoneyReferent.ATTR_REST, str(re), True, 0)
     if (nex.real_value != nex.alt_real_value): 
         if (math.floor(res.value) != math.floor(nex.alt_real_value)): 
             val = NumberHelper.double_to_string(nex.alt_real_value)
             if (val.find('.') > 0): 
                 val = val[0:0+val.find('.')]
             res.add_slot(MoneyReferent.ATTR_ALTVALUE, val, True, 0)
         re = (math.floor(round(((nex.alt_real_value - (math.floor(nex.alt_real_value)))) * (100), 6)))
         if (re != res.rest and re != 0): 
             res.add_slot(MoneyReferent.ATTR_ALTREST, str(re), True, 0)
     if (nex.alt_rest_money > 0): 
         res.add_slot(MoneyReferent.ATTR_ALTREST, str(nex.alt_rest_money), True, 0)
     t1 = nex.end_token
     if (t1.next0_ is not None and t1.next0_.is_char('(')): 
         rt = MoneyAnalyzer.try_parse(t1.next0_.next0_)
         if ((rt is not None and rt.referent.can_be_equals(res, ReferentsEqualType.WITHINONETEXT) and rt.end_token.next0_ is not None) and rt.end_token.next0_.is_char(')')): 
             t1 = rt.end_token.next0_
         else: 
             rt = MoneyAnalyzer.try_parse(t1.next0_)
             if (rt is not None and rt.referent.can_be_equals(res, ReferentsEqualType.WITHINONETEXT)): 
                 t1 = rt.end_token
     if (res.alt_value is not None and res.alt_value > res.value): 
         if (t.whitespaces_before_count == 1 and (isinstance(t.previous, NumberToken))): 
             delt = math.floor((res.alt_value - res.value))
             if ((((res.value < 1000) and ((delt % 1000)) == 0)) or (((res.value < 1000000) and ((delt % 1000000)) == 0))): 
                 t = t.previous
                 res.add_slot(MoneyReferent.ATTR_VALUE, res.get_string_value(MoneyReferent.ATTR_ALTVALUE), True, 0)
                 res.add_slot(MoneyReferent.ATTR_ALTVALUE, None, True, 0)
     return ReferentToken(res, t, t1)
Ejemplo n.º 17
0
 def __try_parse(t: 'Token', prev: 'PersonIdToken') -> 'PersonIdToken':
     if (t.is_value("СВИДЕТЕЛЬСТВО", None)):
         tt1 = t
         ip = False
         reg = False
         tt = t.next0_
         first_pass3372 = True
         while True:
             if first_pass3372: first_pass3372 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if (tt.is_comma_and or tt.morph.class0_.is_preposition):
                 continue
             if (tt.is_value("РЕГИСТРАЦИЯ", None)
                     or tt.is_value("РЕЕСТР", None)
                     or tt.is_value("ЗАРЕГИСТРИРОВАТЬ", None)):
                 reg = True
                 tt1 = tt
             elif (tt.is_value("ИНДИВИДУАЛЬНЫЙ", None)
                   or tt.is_value("ИП", None)):
                 ip = True
                 tt1 = tt
             elif ((tt.is_value("ВНЕСЕНИЕ", None)
                    or tt.is_value("ГОСУДАРСТВЕННЫЙ", None)
                    or tt.is_value("ЕДИНЫЙ", None))
                   or tt.is_value("ЗАПИСЬ", None)
                   or tt.is_value("ПРЕДПРИНИМАТЕЛЬ", None)):
                 tt1 = tt
             elif (tt.get_referent() is not None
                   and tt.get_referent().type_name == "DATERANGE"):
                 tt1 = tt
             else:
                 break
         if (reg and ip):
             return PersonIdToken._new2505(
                 t, tt1, PersonIdToken.Typs.KEYWORD,
                 "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ"
             )
     tok = PersonIdToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
     if (tok is not None):
         ty = Utils.valToEnum(tok.termin.tag, PersonIdToken.Typs)
         res = PersonIdToken._new2505(tok.begin_token, tok.end_token, ty,
                                      tok.termin.canonic_text)
         if (prev is None):
             if (ty != PersonIdToken.Typs.KEYWORD):
                 return None
             t = tok.end_token.next0_
             first_pass3373 = True
             while True:
                 if first_pass3373: first_pass3373 = False
                 else: t = t.next0_
                 if (not (t is not None)): break
                 r = t.get_referent()
                 if (r is not None and (isinstance(r, GeoReferent))):
                     res.referent = r
                     res.end_token = t
                     continue
                 if (t.is_value("ГРАЖДАНИН", None) and t.next0_ is not None
                         and
                     (isinstance(t.next0_.get_referent(), GeoReferent))):
                     res.referent = t.next0_.get_referent()
                     res.end_token = t.next0_
                     t = res.end_token
                     continue
                 if (r is not None):
                     break
                 ait = PersonAttrToken.try_attach(
                     t, None, PersonAttrToken.PersonAttrAttachAttrs.NO)
                 if (ait is not None):
                     if (ait.referent is not None):
                         for s in ait.referent.slots:
                             if (s.type_name
                                     == PersonPropertyReferent.ATTR_REF and
                                 (isinstance(s.value, GeoReferent))):
                                 res.referent = (Utils.asObjectOrNull(
                                     s.value, Referent))
                     res.end_token = ait.end_token
                     break
                 if (t.is_value("ДАННЫЙ", None)):
                     res.end_token = t
                     continue
                 break
             if ((isinstance(res.referent, GeoReferent))
                     and not res.referent.is_state):
                 res.referent = (None)
             return res
         if (ty == PersonIdToken.Typs.NUMBER):
             tmp = io.StringIO()
             tt = tok.end_token.next0_
             if (tt is not None and tt.is_char(':')):
                 tt = tt.next0_
             while tt is not None:
                 if (tt.is_newline_before):
                     break
                 if (not (isinstance(tt, NumberToken))):
                     break
                 print(tt.get_source_text(), end="", file=tmp)
                 res.end_token = tt
                 tt = tt.next0_
             if (tmp.tell() < 1):
                 return None
             res.value = Utils.toStringStringIO(tmp)
             res.has_prefix = True
             return res
         if (ty == PersonIdToken.Typs.SERIA):
             tmp = io.StringIO()
             tt = tok.end_token.next0_
             if (tt is not None and tt.is_char(':')):
                 tt = tt.next0_
             next_num = False
             first_pass3374 = True
             while True:
                 if first_pass3374: first_pass3374 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (MiscHelper.check_number_prefix(tt) is not None):
                     next_num = True
                     break
                 if (not (isinstance(tt, NumberToken))):
                     if (not (isinstance(tt, TextToken))):
                         break
                     if (not tt.chars.is_all_upper):
                         break
                     nu = NumberHelper.try_parse_roman(tt)
                     if (nu is not None):
                         print(nu.get_source_text(), end="", file=tmp)
                         tt = nu.end_token
                     elif (tt.length_char != 2):
                         break
                     else:
                         print(tt.term, end="", file=tmp)
                         res.end_token = tt
                     if (tt.next0_ is not None and tt.next0_.is_hiphen):
                         tt = tt.next0_
                     continue
                 if (tmp.tell() >= 4):
                     break
                 print(tt.get_source_text(), end="", file=tmp)
                 res.end_token = tt
             if (tmp.tell() < 4):
                 if (tmp.tell() < 2):
                     return None
                 tt1 = res.end_token.next0_
                 if (tt1 is not None and tt1.is_comma):
                     tt1 = tt1.next0_
                 next0__ = PersonIdToken.__try_parse(tt1, res)
                 if (next0__ is not None
                         and next0__.typ == PersonIdToken.Typs.NUMBER):
                     pass
                 else:
                     return None
             res.value = Utils.toStringStringIO(tmp)
             res.has_prefix = True
             return res
         if (ty == PersonIdToken.Typs.CODE):
             tt = res.end_token.next0_
             first_pass3375 = True
             while True:
                 if first_pass3375: first_pass3375 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_char_of(":") or tt.is_hiphen):
                     continue
                 if (isinstance(tt, NumberToken)):
                     res.end_token = tt
                     continue
                 break
         if (ty == PersonIdToken.Typs.ADDRESS):
             if (isinstance(t.get_referent(), AddressReferent)):
                 res.referent = t.get_referent()
                 res.end_token = t
                 return res
             tt = res.end_token.next0_
             first_pass3376 = True
             while True:
                 if first_pass3376: first_pass3376 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_char_of(":") or tt.is_hiphen
                         or tt.morph.class0_.is_preposition):
                     continue
                 if (isinstance(tt.get_referent(), AddressReferent)):
                     res.referent = tt.get_referent()
                     res.end_token = tt
                 break
             if (res.referent is None):
                 return None
         return res
     elif (prev is None):
         return None
     t0 = t
     t1 = MiscHelper.check_number_prefix(t0)
     if (t1 is not None):
         t = t1
     if (isinstance(t, NumberToken)):
         tmp = io.StringIO()
         res = PersonIdToken._new2507(t0, t, PersonIdToken.Typs.NUMBER)
         tt = t
         while tt is not None:
             if (tt.is_newline_before or not (isinstance(tt, NumberToken))):
                 break
             print(tt.get_source_text(), end="", file=tmp)
             res.end_token = tt
             tt = tt.next0_
         if (tmp.tell() < 4):
             if (tmp.tell() < 2):
                 return None
             if (prev is None or prev.typ != PersonIdToken.Typs.KEYWORD):
                 return None
             ne = PersonIdToken.__try_parse(res.end_token.next0_, prev)
             if (ne is not None and ne.typ == PersonIdToken.Typs.NUMBER):
                 res.typ = PersonIdToken.Typs.SERIA
             else:
                 return None
         res.value = Utils.toStringStringIO(tmp)
         if (t0 != t):
             res.has_prefix = True
         return res
     if (isinstance(t, ReferentToken)):
         r = t.get_referent()
         if (r is not None):
             if (r.type_name == "DATE"):
                 return PersonIdToken._new2508(t, t,
                                               PersonIdToken.Typs.DATE, r)
             if (r.type_name == "ORGANIZATION"):
                 return PersonIdToken._new2508(t, t, PersonIdToken.Typs.ORG,
                                               r)
             if (r.type_name == "ADDRESS"):
                 return PersonIdToken._new2508(t, t,
                                               PersonIdToken.Typs.ADDRESS,
                                               r)
     if ((prev is not None and prev.typ == PersonIdToken.Typs.KEYWORD and
          (isinstance(t, TextToken))) and not t.chars.is_all_lower
             and t.chars.is_letter):
         rr = PersonIdToken.__try_parse(t.next0_, prev)
         if (rr is not None and rr.typ == PersonIdToken.Typs.NUMBER):
             return PersonIdToken._new2505(t, t, PersonIdToken.Typs.SERIA,
                                           t.term)
     if ((t is not None and t.is_value("ОТ", "ВІД") and
          (isinstance(t.next0_, ReferentToken)))
             and t.next0_.get_referent().type_name == "DATE"):
         return PersonIdToken._new2508(t, t.next0_, PersonIdToken.Typs.DATE,
                                       t.next0_.get_referent())
     return None
Ejemplo n.º 18
0
 def real_value(self, value_) -> float:
     from pullenti.ner.core.NumberHelper import NumberHelper
     self.value = NumberHelper.double_to_string(value_)
     return value_
Ejemplo n.º 19
0
 def __try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken':
     if (t is None): 
         return None
     if (BracketHelper.is_bracket(t, True)): 
         wit = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, attach_high)
         if (wit is not None): 
             if (wit.end_token.next0_ is None): 
                 wit.begin_token = t
                 return wit
             if (BracketHelper.is_bracket(wit.end_token.next0_, True)): 
                 wit.begin_token = t
                 wit.end_token = wit.end_token.next0_
                 return wit
     tok = WeaponItemToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
     if (tok is not None): 
         res = WeaponItemToken(t, tok.end_token)
         res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs))
         if (res.typ == WeaponItemToken.Typs.NOUN): 
             res.value = tok.termin.canonic_text
             if (tok.termin.tag2 is not None): 
                 res.is_doubt = True
             tt = res.end_token.next0_
             first_pass3426 = True
             while True:
                 if first_pass3426: first_pass3426 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.whitespaces_before_count > 2): 
                     break
                 wit = WeaponItemToken.__try_parse(tt, None, False, False)
                 if (wit is not None): 
                     if (wit.typ == WeaponItemToken.Typs.BRAND): 
                         res.__inner_tokens.append(wit)
                         tt = wit.end_token
                         res.end_token = tt
                         continue
                     break
                 if (not (isinstance(tt, TextToken))): 
                     break
                 mc = tt.get_morph_class_in_dictionary()
                 if (mc == MorphClass.ADJECTIVE): 
                     if (res.alt_value is None): 
                         res.alt_value = res.value
                     if (res.alt_value.endswith(res.value)): 
                         res.alt_value = res.alt_value[0:0+len(res.alt_value) - len(res.value)]
                     res.alt_value = "{0}{1} {2}".format(res.alt_value, tt.term, res.value)
                     res.end_token = tt
                     continue
                 break
             return res
         if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): 
             res.value = tok.termin.canonic_text
             return res
         if (res.typ == WeaponItemToken.Typs.MODEL): 
             res.value = tok.termin.canonic_text
             if (isinstance(tok.termin.tag2, list)): 
                 li = Utils.asObjectOrNull(tok.termin.tag2, list)
                 for to in li: 
                     wit = WeaponItemToken._new2758(t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token)
                     res.__inner_tokens.append(wit)
                     if (to.additional_vars is not None and len(to.additional_vars) > 0): 
                         wit.alt_value = to.additional_vars[0].canonic_text
             res.__correct_model()
             return res
     nnn = MiscHelper.check_number_prefix(t)
     if (nnn is not None): 
         tit = TransItemToken._attach_number(nnn, True)
         if (tit is not None): 
             res = WeaponItemToken._new2759(t, tit.end_token, WeaponItemToken.Typs.NUMBER)
             res.value = tit.value
             res.alt_value = tit.alt_value
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): 
         if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.is_char('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): 
             res = WeaponItemToken._new2760(t, t.next0_, WeaponItemToken.Typs.MODEL, True)
             res.value = t.term
             res.__correct_model()
             return res
         if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): 
             res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.MODEL, True)
             res.value = t.term
             res.__correct_model()
             return res
         if (t.term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): 
             pp = WeaponItemToken.__try_parse(t.next0_, None, False, False)
             if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): 
                 res = WeaponItemToken._new2759(t, t, WeaponItemToken.Typs.NOUN)
                 res.value = "ПИСТОЛЕТ"
                 res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"
                 return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): 
         ok = False
         if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): 
             ok = True
         elif (prev is None and t.previous is not None and t.previous.is_comma_and): 
             ok = True
         if (ok): 
             res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.NAME, True)
             res.value = t.term
             if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): 
                 res.value = "{0}-{1}".format(res.value, t.next0_.next0_.term)
                 res.end_token = t.next0_.next0_
             if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): 
                 res.typ = WeaponItemToken.Typs.BRAND
             if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): 
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correct_model()
             elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): 
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correct_model()
             return res
     if (t.is_value("МАРКА", None)): 
         res = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, False)
         if (res is not None and res.typ == WeaponItemToken.Typs.BRAND): 
             res.begin_token = t
             return res
         if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): 
             br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100)
             if (br is not None): 
                 return WeaponItemToken._new2764(t, br.end_token, WeaponItemToken.Typs.BRAND, MiscHelper.get_text_value(br.begin_token, br.end_token, GetTextAttr.NO))
         if (((isinstance(t, TextToken)) and (isinstance(t.next0_, TextToken)) and t.next0_.length_char > 1) and not t.next0_.chars.is_all_lower): 
             return WeaponItemToken._new2764(t, t.next0_, WeaponItemToken.Typs.BRAND, t.term)
     if (t.is_value("КАЛИБР", "КАЛІБР")): 
         tt1 = t.next0_
         if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): 
             tt1 = tt1.next0_
         num = NumbersWithUnitToken.try_parse(tt1, None, False, False, False, False)
         if (num is not None and num.single_val is not None): 
             return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
     if (isinstance(t, NumberToken)): 
         num = NumbersWithUnitToken.try_parse(t, None, False, False, False, False)
         if (num is not None and num.single_val is not None): 
             if (len(num.units) == 1 and num.units[0].unit is not None and num.units[0].unit.name_cyr == "мм"): 
                 return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
             if (num.end_token.next0_ is not None and num.end_token.next0_.is_value("КАЛИБР", "КАЛІБР")): 
                 return WeaponItemToken._new2764(t, num.end_token.next0_, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
     if (t.is_value("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")): 
         tt1 = t.next0_
         if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): 
             tt1 = tt1.next0_
         if (isinstance(tt1, ReferentToken)): 
             if ((isinstance(tt1.get_referent(), OrganizationReferent)) or (isinstance(tt1.get_referent(), GeoReferent))): 
                 return WeaponItemToken._new2769(t, tt1, WeaponItemToken.Typs.DEVELOPER, tt1.get_referent())
     return None
Ejemplo n.º 20
0
 def _to_string(self, short_variant: bool, lang: 'MorphLang', lev: int,
                from_range: int) -> str:
     from pullenti.ner.date.internal.DateRelHelper import DateRelHelper
     res = io.StringIO()
     p = self.pointer
     if (lang is None):
         lang = MorphLang.RU
     if (self.is_relative):
         if (self.pointer == DatePointerType.TODAY):
             print("сейчас".format(), end="", file=res, flush=True)
             if (not short_variant):
                 DateRelHelper.append_to_string(self, res)
             return Utils.toStringStringIO(res)
         word = None
         val = 0
         back = False
         is_local_rel = self.get_string_value(
             DateReferent.ATTR_ISRELATIVE) == "true"
         for s in self.slots:
             if (s.type_name == DateReferent.ATTR_CENTURY):
                 word = "век"
                 wrapval784 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval784)
                 val = wrapval784.value
             elif (s.type_name == DateReferent.ATTR_YEAR):
                 word = "год"
                 wrapval785 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval785)
                 val = wrapval785.value
             elif (s.type_name == DateReferent.ATTR_MONTH):
                 word = "месяц"
                 wrapval786 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval786)
                 val = wrapval786.value
                 if (not is_local_rel and val >= 1 and val <= 12):
                     print(DateReferent.__m_month0[val - 1],
                           end="",
                           file=res)
             elif (s.type_name == DateReferent.ATTR_DAY):
                 word = "день"
                 wrapval787 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval787)
                 val = wrapval787.value
                 if ((not is_local_rel and self.month > 0
                      and self.month <= 12) and self.higher is not None
                         and self.higher.get_string_value(
                             DateReferent.ATTR_ISRELATIVE) != "true"):
                     print("{0} {1}".format(
                         val, DateReferent.__m_month[self.month - 1]),
                           end="",
                           file=res,
                           flush=True)
                 elif (not is_local_rel):
                     print("{0} число".format(val),
                           end="",
                           file=res,
                           flush=True)
             elif (s.type_name == DateReferent.ATTR_QUARTAL):
                 word = "квартал"
                 wrapval788 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval788)
                 val = wrapval788.value
             elif (s.type_name == DateReferent.ATTR_WEEK):
                 word = "неделя"
                 wrapval789 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval789)
                 val = wrapval789.value
             elif (s.type_name == DateReferent.ATTR_HOUR):
                 word = "час"
                 wrapval790 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval790)
                 val = wrapval790.value
                 if (not is_local_rel):
                     print("{0}:{1}".format("{:02d}".format(val),
                                            "{:02d}".format(self.minute)),
                           end="",
                           file=res,
                           flush=True)
             elif (s.type_name == DateReferent.ATTR_MINUTE):
                 word = "минута"
                 wrapval791 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval791)
                 val = wrapval791.value
             elif (s.type_name == DateReferent.ATTR_DAYOFWEEK):
                 wrapval792 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval792)
                 val = wrapval792.value
                 if (not is_local_rel):
                     print((DateReferent.__m_week_day_ex[val - 1]
                            if val >= 1 and val <= 7 else "?"),
                           end="",
                           file=res)
                 else:
                     if (val < 0):
                         val = (-val)
                         back = True
                     if (val >= 0 and val <= 7):
                         print("{0} {1}".format(
                             ((("прошлое" if back else "будущее"))
                              if val == 7 else
                              ((("прошлая" if back else "будущая")) if
                               (val == 3 or val == 6) else
                               (("прошлый" if back else "будущий")))),
                             DateReferent.__m_week_day_ex[val - 1]),
                               end="",
                               file=res,
                               flush=True)
                         break
         if (word is not None and is_local_rel):
             if (val == 0):
                 print("{0} {1}".format(
                     ("текущая" if word == "неделя" or word == "минута" else
                      "текущий"), word),
                       end="",
                       file=res,
                       flush=True)
             elif (val > 0 and not back):
                 print("{0} {1} вперёд".format(
                     val,
                     MiscHelper.get_text_morph_var_by_case_and_number_ex(
                         word, None, MorphNumber.UNDEFINED, str(val))),
                       end="",
                       file=res,
                       flush=True)
             else:
                 val = (-val)
                 print("{0} {1} назад".format(
                     val,
                     MiscHelper.get_text_morph_var_by_case_and_number_ex(
                         word, None, MorphNumber.UNDEFINED, str(val))),
                       end="",
                       file=res,
                       flush=True)
         elif (not is_local_rel and res.tell() == 0):
             print("{0} {1}".format(
                 val,
                 MiscHelper.get_text_morph_var_by_case_and_number_ex(
                     word, None, MorphNumber.UNDEFINED, str(val))),
                   end="",
                   file=res,
                   flush=True)
         if (not short_variant):
             DateRelHelper.append_to_string(self, res)
         if (from_range == 1):
             Utils.insertStringIO(
                 res, 0, "{0} ".format(("з" if lang.is_ua else
                                        ("from" if lang.is_en else "с"))))
         elif (from_range == 2):
             Utils.insertStringIO(res, 0, ("to " if lang.is_en else "по "))
         return Utils.toStringStringIO(res)
     if (from_range == 1):
         print("{0} ".format(("з" if lang.is_ua else
                              ("from" if lang.is_en else "с"))),
               end="",
               file=res,
               flush=True)
     elif (from_range == 2):
         print(("to " if lang.is_en else "по ").format(),
               end="",
               file=res,
               flush=True)
     if (p != DatePointerType.NO):
         val = MetaDate.POINTER.convert_inner_value_to_outer_value(
             Utils.enumToString(p), lang)
         if (from_range == 0 or lang.is_en):
             pass
         elif (from_range == 1):
             if (p == DatePointerType.BEGIN):
                 val = ("початку" if lang.is_ua else "начала")
             elif (p == DatePointerType.CENTER):
                 val = ("середини" if lang.is_ua else "середины")
             elif (p == DatePointerType.END):
                 val = ("кінця" if lang.is_ua else "конца")
             elif (p == DatePointerType.TODAY):
                 val = ("цього часу"
                        if lang.is_ua else "настоящего времени")
         elif (from_range == 2):
             if (p == DatePointerType.BEGIN):
                 val = ("початок" if lang.is_ua else "начало")
             elif (p == DatePointerType.CENTER):
                 val = ("середину" if lang.is_ua else "середину")
             elif (p == DatePointerType.END):
                 val = ("кінець" if lang.is_ua else "конец")
             elif (p == DatePointerType.TODAY):
                 val = ("теперішній час"
                        if lang.is_ua else "настоящее время")
         print("{0} ".format(val), end="", file=res, flush=True)
     if (self.day_of_week > 0):
         if (lang.is_en):
             print("{0}, ".format(
                 DateReferent.__m_week_day_en[self.day_of_week - 1]),
                   end="",
                   file=res,
                   flush=True)
         else:
             print("{0}, ".format(
                 DateReferent.__m_week_day[self.day_of_week - 1]),
                   end="",
                   file=res,
                   flush=True)
     y = self.year
     m = self.month
     d = self.day
     cent = self.century
     if (y == 0 and cent != 0):
         is_bc = cent < 0
         if (cent < 0):
             cent = (-cent)
         print(NumberHelper.get_number_roman(cent), end="", file=res)
         if (lang.is_ua):
             print(" century", end="", file=res)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1):
             print((" віка" if lang.is_ua else " века"), end="", file=res)
         else:
             print((" вік" if lang.is_ua else " век"), end="", file=res)
         if (is_bc):
             print((" до н.е." if lang.is_ua else " до н.э."),
                   end="",
                   file=res)
         return Utils.toStringStringIO(res)
     if (d > 0):
         print(d, end="", file=res)
     if (m > 0 and m <= 12):
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) != ' '):
             print(' ', end="", file=res)
         if (lang.is_ua):
             print((DateReferent.__m_monthua[m - 1]
                    if d > 0 or p != DatePointerType.NO or from_range != 0
                    else DateReferent.__m_month0ua[m - 1]),
                   end="",
                   file=res)
         elif (lang.is_en):
             print(DateReferent.__m_monthen[m - 1], end="", file=res)
         else:
             print((DateReferent.__m_month[m - 1]
                    if d > 0 or p != DatePointerType.NO or from_range != 0
                    else DateReferent.__m_month0[m - 1]),
                   end="",
                   file=res)
     if (y != 0):
         is_bc = y < 0
         if (y < 0):
             y = (-y)
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) != ' '):
             print(' ', end="", file=res)
         if (lang is not None and lang.is_en):
             print("{0}".format(y), end="", file=res, flush=True)
         elif (short_variant):
             print("{0}{1}".format(y, ("р" if lang.is_ua else "г")),
                   end="",
                   file=res,
                   flush=True)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1):
             print("{0} {1}".format(y, ("року" if lang.is_ua else "года")),
                   end="",
                   file=res,
                   flush=True)
         else:
             print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")),
                   end="",
                   file=res,
                   flush=True)
         if (is_bc):
             print((" до н.е." if lang.is_ua else
                    ("BC" if lang.is_en else " до н.э.")),
                   end="",
                   file=res)
     h = self.hour
     mi = self.minute
     se = self.second
     if (h >= 0 and mi >= 0):
         if (res.tell() > 0):
             print(' ', end="", file=res)
         print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)),
               end="",
               file=res,
               flush=True)
         if (se >= 0):
             print(":{0}".format("{:02d}".format(se)),
                   end="",
                   file=res,
                   flush=True)
     if (res.tell() == 0):
         if (self.quartal != 0):
             print("{0}-й квартал".format(self.quartal),
                   end="",
                   file=res,
                   flush=True)
     if (res.tell() == 0):
         return "?"
     while Utils.getCharAtStringIO(
             res,
             res.tell() - 1) == ' ' or Utils.getCharAtStringIO(
                 res,
                 res.tell() - 1) == ',':
         Utils.setLengthStringIO(res, res.tell() - 1)
     if (not short_variant and self.is_relative):
         DateRelHelper.append_to_string(self, res)
     return Utils.toStringStringIO(res).strip()
Ejemplo n.º 21
0
 def tryParse(t: 'Token',
              add_units: 'TerminCollection',
              can_be_set: bool = True,
              can_units_absent: bool = False) -> 'MeasureToken':
     """ Выделение вместе с наименованием
     
     Args:
         t(Token): 
     
     """
     if (not ((isinstance(t, TextToken)))):
         return None
     if (t.is_table_control_char):
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1516 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516)
     minmax = wrapminmax1516.value
     if (tt is not None):
         t = tt.next0_
     npt = NounPhraseHelper.tryParse(
         t,
         Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) |
                         (NounPhraseParseAttr.IGNOREBRACKETS),
                         NounPhraseParseAttr), 0)
     if (npt is None):
         whd = NumbersWithUnitToken._tryParseWHL(t)
         if (whd is not None):
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.isValue("КПД", None)):
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3
               and t0.getMorphClassInDictionary().is_undefined):
             npt = NounPhraseToken(t0, t0)
         else:
             return None
     elif (NumberHelper.tryParseRealNumber(t, True) is not None):
         return None
     else:
         dtok = DateItemToken.tryAttach(t, None)
         if (dtok is not None):
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3037 = True
     while True:
         if first_pass3037: first_pass3037 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before):
             break
         if (tt.is_table_control_char):
             break
         wrapminmax1510 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510)
         minmax = wrapminmax1510.value
         if (tt2 is not None):
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None)
              or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None)
                 or
             ((tt.isValue("СОСТАВЛЯТЬ", None)
               and not tt.getMorphClassInDictionary().is_adjective))):
             t = tt
             t1 = t
             if (tt.previous.isValue("НЕ", None)):
                 not0_ = True
             continue
         www = NumbersWithUnitToken._tryParseWHL(tt)
         if (www is not None):
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0 and tt.is_comma_and):
             continue
         if (tt.isValue("ПРИ", None) or len(internals_) > 0):
             mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False)
             if (mt1 is not None and mt1.reliable):
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken))
                 and (tt).typ == NumberSpellingType.WORDS):
             npt3 = NounPhraseHelper.tryParse(
                 tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0)
             if (npt3 is not None):
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0):
                     name_.end_token = t1
                 continue
         mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False)
         if (mt0 is not None):
             break
         if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None):
             www = NumbersWithUnitToken._tryParseWHL(tt.next0_)
             if (www is not None):
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma):
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.isChar(')')):
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.tryParseList(tt.next0_, add_units, False)
             if (uu is not None):
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.isChar('(') and t1.next0_ is not None
                         and t1.next0_.isChar(')')):
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.isChar('(')):
                     uu = UnitToken.tryParseList(t1.next0_.next0_,
                                                 add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_
                             is not None and
                             uu[len(uu) - 1].end_token.next0_.isChar(')')):
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt):
                     break
         if (BracketHelper.canBeStartOfSequence(tt, False, False)):
             br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100)
             if (br is not None):
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.isValue("НЕ", None) and tt.next0_ is not None):
             mc = tt.next0_.getMorphClassInDictionary()
             if (mc.is_adverb or mc.is_misc):
                 break
             continue
         if (tt.isValue("ЯМЗ", None)):
             pass
         npt2 = NounPhraseHelper.tryParse(
             tt,
             Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) |
                             (NounPhraseParseAttr.IGNOREBRACKETS),
                             NounPhraseParseAttr), 0)
         if (npt2 is None):
             if (tt.morph.class0_.is_preposition
                     or tt.morph.class0_.is_conjunction):
                 to = NumbersWithUnitToken.M_TERMINS.tryParse(
                     tt, TerminParseAttr.NO)
                 if (to is not None):
                     if ((isinstance(to.end_token.next0_, TextToken))
                             and to.end_token.next0_.is_letters):
                         pass
                     else:
                         break
                 t1 = tt
                 continue
             mc = tt.getMorphClassInDictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter
                  and tt.length_char > 1)
                     and (((tt.chars.is_all_upper or mc.is_adverb
                            or mc.is_undefined) or mc.is_adjective))):
                 uu = UnitToken.tryParseList(tt, add_units, False)
                 if (uu is not None):
                     if (uu[0].length_char > 2 or len(uu) > 1):
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0):
                     name_.end_token = tt
                 continue
             if (tt.is_comma):
                 continue
             if (tt.isChar('.')):
                 if (not MiscHelper.canBeStartOfSentence(tt.next0_)):
                     continue
                 uu = UnitToken.tryParseList(tt.next0_, add_units, False)
                 if (uu is not None):
                     if (uu[0].length_char > 2 or len(uu) > 1):
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0):
             pass
         elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None)
               or t.isValue("ДИАПАЗОН", None)):
             pass
         elif (t.chars.is_letter):
             name_.end_token = t1
     t1 = t1.next0_
     first_pass3038 = True
     while True:
         if first_pass3038: first_pass3038 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char):
             pass
         elif (t1.isCharOf(":,_")):
             www = NumbersWithUnitToken._tryParseWHL(t1.next0_)
             if (www is not None):
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
         elif (t1.is_hiphen and t1.is_whitespace_after
               and t1.is_whitespace_before):
             pass
         else:
             break
     if (t1 is None):
         return None
     mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_)
     if (mts is None):
         return None
     mt = mts[0]
     if (name_.begin_token.morph.class0_.is_preposition):
         name_.begin_token = name_.begin_token.next0_
     if (len(mts) > 1 and len(internals_) == 0):
         if (len(mt.units) == 0):
             if (units is not None):
                 for m in mts:
                     m.units = units
         res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token,
                                      name_.morph, True)
         res1.name = MiscHelper.getTextValueOfMetaToken(
             name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts):
             ttt = MeasureToken._new1506(mts[k].begin_token,
                                         mts[k].end_token, mts[k])
             if (whd is not None):
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)):
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.isChar('±')):
             nn = NumbersWithUnitToken._tryParse(tt1, add_units, True,
                                                 False)
             if (nn is not None and nn.plus_minus_percent):
                 res1.end_token = nn.end_token
                 res1.nums = nn
         return res1
     if (not mt.is_whitespace_before):
         if (mt.begin_token.previous is None):
             return None
         if (mt.begin_token.previous.isCharOf(":),")
                 or mt.begin_token.previous.is_table_control_char):
             pass
         else:
             return None
     if (len(mt.units) == 0 and units is not None):
         mt.units = units
         if (mt.div_num is not None and len(units) > 1
                 and len(mt.div_num.units) == 0):
             i = 1
             while i < len(units):
                 if (units[i].pow0_ == -1):
                     j = i
                     while j < len(units):
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (-units[j].pow0_)
                         j += 1
                     del mt.units[i:i + len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None):
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None):
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0):
         units = UnitToken.tryParseList(mt.end_token.next0_, add_units,
                                        True)
         if (units is None):
             if (can_units_absent):
                 pass
             else:
                 return None
         else:
             mt.units = units
     res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None
           and t0 == name_.begin_token) and t0.previous.is_hiphen
          and not t0.previous.is_whitespace_before)
             and (isinstance(t0.previous.previous, TextToken))):
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.getTextValueOfMetaToken(
         name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
     res.nums = mt
     for u in res.nums.units:
         if (u.keyword is not None):
             if (u.keyword.begin_char >= res.begin_char):
                 res.reliable = True
     res.__parseInternals(add_units)
     if (len(res.internals) > 0 or not can_be_set):
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and):
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False)
     if ((mts1 is not None and len(mts1) == 1 and
          (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0
             and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)):
         res.is_set = True
         res.nums = (None)
         res.internals.append(
             MeasureToken._new1506(mt.begin_token, mt.end_token, mt))
         res.internals.append(
             MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token,
                                   mts1[0]))
         res.end_token = mts1[0].end_token
     return res
Ejemplo n.º 22
0
 def __try_attach(t0: 'Token') -> 'PhoneItemToken':
     if (t0 is None):
         return None
     if (isinstance(t0, NumberToken)):
         if (NumberHelper.try_parse_number_with_postfix(t0) is not None
                 and not t0.is_whitespace_after):
             rt = t0.kit.process_referent("PHONE", t0.next0_)
             if (rt is None):
                 return None
         if (t0.typ == NumberSpellingType.DIGIT
                 and not t0.morph.class0_.is_adjective):
             return PhoneItemToken._new2625(
                 t0, t0, PhoneItemToken.PhoneItemType.NUMBER,
                 t0.get_source_text())
         return None
     if (t0.is_char('.')):
         return PhoneItemToken._new2625(t0, t0,
                                        PhoneItemToken.PhoneItemType.DELIM,
                                        ".")
     if (t0.is_hiphen):
         return PhoneItemToken._new2625(t0, t0,
                                        PhoneItemToken.PhoneItemType.DELIM,
                                        "-")
     if (t0.is_char('+')):
         if (not (isinstance(t0.next0_, NumberToken))
                 or t0.next0_.typ != NumberSpellingType.DIGIT):
             return None
         else:
             val = t0.next0_.get_source_text()
             i = 0
             i = 0
             while i < len(val):
                 if (val[i] != '0'):
                     break
                 i += 1
             if (i >= len(val)):
                 return None
             if (i > 0):
                 val = val[i:]
             return PhoneItemToken._new2625(
                 t0, t0.next0_, PhoneItemToken.PhoneItemType.COUNTRYCODE,
                 val)
     if (t0.is_char(chr(0x2011)) and (isinstance(t0.next0_, NumberToken))
             and t0.next0_.length_char == 2):
         return PhoneItemToken._new2625(t0, t0,
                                        PhoneItemToken.PhoneItemType.DELIM,
                                        "-")
     if (t0.is_char_of("(")):
         if (isinstance(t0.next0_, NumberToken)):
             et = t0.next0_
             val = io.StringIO()
             while et is not None:
                 if (et.is_char(')')):
                     break
                 if ((isinstance(et, NumberToken))
                         and et.typ == NumberSpellingType.DIGIT):
                     print(et.get_source_text(), end="", file=val)
                 elif (not et.is_hiphen and not et.is_char('.')):
                     return None
                 et = et.next0_
             if (et is None or val.tell() == 0):
                 return None
             else:
                 return PhoneItemToken._new2630(
                     t0, et, PhoneItemToken.PhoneItemType.CITYCODE,
                     Utils.toStringStringIO(val), True)
         else:
             tt1 = PhoneItemToken.M_PHONE_TERMINS.try_parse(
                 t0.next0_, TerminParseAttr.NO)
             if (tt1 is None or tt1.termin.tag is not None):
                 pass
             elif (tt1.end_token.next0_ is None
                   or not tt1.end_token.next0_.is_char(')')):
                 pass
             else:
                 return PhoneItemToken._new2631(
                     t0, tt1.end_token.next0_,
                     PhoneItemToken.PhoneItemType.PREFIX, True, "")
             return None
     if ((t0.is_char('/') and (isinstance(t0.next0_, NumberToken))
          and t0.next0_.next0_ is not None)
             and t0.next0_.next0_.is_char('/')
             and t0.next0_.length_char == 3):
         return PhoneItemToken._new2630(
             t0, t0.next0_.next0_, PhoneItemToken.PhoneItemType.CITYCODE,
             str(t0.next0_.value), True)
     t1 = None
     ki = PhoneKind.UNDEFINED
     if ((t0.is_value("Т", None) and t0.next0_ is not None
          and t0.next0_.is_char_of("\\/")) and t0.next0_.next0_ is not None
             and ((t0.next0_.next0_.is_value("Р", None)
                   or t0.next0_.next0_.is_value("М", None)))):
         t1 = t0.next0_.next0_
         ki = (PhoneKind.WORK
               if t1.is_value("Р", None) else PhoneKind.MOBILE)
     else:
         tt = PhoneItemToken.M_PHONE_TERMINS.try_parse(
             t0, TerminParseAttr.NO)
         if (tt is None or tt.termin.tag is not None):
             if (t0.is_value("НОМЕР", None)):
                 rr = PhoneItemToken.__try_attach(t0.next0_)
                 if (rr is not None and rr.item_type
                         == PhoneItemToken.PhoneItemType.PREFIX):
                     rr.begin_token = t0
                     return rr
             return None
         if (isinstance(tt.termin.tag2, PhoneKind)):
             ki = (Utils.valToEnum(tt.termin.tag2, PhoneKind))
         t1 = tt.end_token
     res = PhoneItemToken._new2633(t0, t1,
                                   PhoneItemToken.PhoneItemType.PREFIX, "",
                                   ki)
     while True:
         if (t1.next0_ is not None and t1.next0_.is_char_of(".:")):
             t1 = t1.next0_
             res.end_token = t1
         elif (t1.next0_ is not None and t1.next0_.is_table_control_char):
             t1 = t1.next0_
         else:
             break
     if (t0 == t1
             and ((t0.begin_char == t0.end_char or t0.chars.is_all_upper))):
         if (not t0.is_whitespace_after):
             return None
     return res
Ejemplo n.º 23
0
 def __correctTailAttributes(p : 'PersonReferent', t0 : 'Token') -> 'Token':
     res = t0
     t = t0
     if (t is not None and t.isChar(',')): 
         t = t.next0_
     born = False
     die = False
     if (t is not None and ((t.isValue("РОДИТЬСЯ", "НАРОДИТИСЯ") or t.isValue("BORN", None)))): 
         t = t.next0_
         born = True
     elif (t is not None and ((t.isValue("УМЕРЕТЬ", "ПОМЕРТИ") or t.isValue("СКОНЧАТЬСЯ", None) or t.isValue("DIED", None)))): 
         t = t.next0_
         die = True
     elif ((t is not None and t.isValue("ДАТА", None) and t.next0_ is not None) and t.next0_.isValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")): 
         t = t.next0_.next0_
         born = True
     while t is not None:
         if (t.morph.class0_.is_preposition or t.is_hiphen or t.isChar(':')): 
             t = t.next0_
         else: 
             break
     if (t is not None and t.getReferent() is not None): 
         r = t.getReferent()
         if (r.type_name == "DATE"): 
             t1 = t
             if (t.next0_ is not None and ((t.next0_.isValue("Р", None) or t.next0_.isValue("РОЖДЕНИЕ", "НАРОДЖЕННЯ")))): 
                 born = True
                 t1 = t.next0_
                 if (t1.next0_ is not None and t1.next0_.isChar('.')): 
                     t1 = t1.next0_
             if (born): 
                 if (p is not None): 
                     p.addSlot(PersonReferent.ATTR_BORN, r, False, 0)
                 res = t1
                 t = t1
             elif (die): 
                 if (p is not None): 
                     p.addSlot(PersonReferent.ATTR_DIE, r, False, 0)
                 res = t1
                 t = t1
     if (die and t is not None): 
         ag = NumberHelper.tryParseAge(t.next0_)
         if (ag is not None): 
             if (p is not None): 
                 p.addSlot(PersonReferent.ATTR_AGE, str(ag.value), False, 0)
             t = ag.end_token.next0_
             res = ag.end_token
     if (t is None): 
         return res
     if (t.isChar('(')): 
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             t1 = t.next0_
             born = False
             if (t1.isValue("РОД", None)): 
                 born = True
                 t1 = t1.next0_
                 if (t1 is not None and t1.isChar('.')): 
                     t1 = t1.next0_
             if (isinstance(t1, ReferentToken)): 
                 r = t1.getReferent()
                 if (r.type_name == "DATERANGE" and t1.next0_ == br.end_token): 
                     bd = Utils.asObjectOrNull(r.getSlotValue("FROM"), Referent)
                     to = Utils.asObjectOrNull(r.getSlotValue("TO"), Referent)
                     if (bd is not None and to is not None): 
                         if (p is not None): 
                             p.addSlot(PersonReferent.ATTR_BORN, bd, False, 0)
                             p.addSlot(PersonReferent.ATTR_DIE, to, False, 0)
                         res = br.end_token
                         t = res
                 elif (r.type_name == "DATE" and t1.next0_ == br.end_token): 
                     if (p is not None): 
                         p.addSlot(PersonReferent.ATTR_BORN, r, False, 0)
                     res = br.end_token
                     t = res
     return res
Ejemplo n.º 24
0
 def tryParse(t: 'Token',
              prev: 'FundsItemToken' = None) -> 'FundsItemToken':
     if (t is None):
         return None
     typ0 = FundsItemTyp.UNDEFINED
     tt = t
     first_pass2766 = True
     while True:
         if first_pass2766: first_pass2766 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_adverb):
             continue
         if ((tt.isValue("СУММА", None) or tt.isValue("ОКОЛО", None)
              or tt.isValue("БОЛЕЕ", None)) or tt.isValue("МЕНЕЕ", None)
                 or tt.isValue("СВЫШЕ", None)):
             continue
         if ((tt.isValue("НОМИНАЛ", None) or tt.isValue("ЦЕНА", None)
              or tt.isValue("СТОИМОСТЬ", None))
                 or tt.isValue("СТОИТЬ", None)):
             typ0 = FundsItemTyp.PRICE
             continue
         if (tt.isValue("НОМИНАЛЬНАЯ", None) or tt.isValue("ОБЩАЯ", None)):
             continue
         if (tt.isValue("СОСТАВЛЯТЬ", None)):
             continue
         re = tt.getReferent()
         if (isinstance(re, OrganizationReferent)):
             return FundsItemToken._new428(t, tt, FundsItemTyp.ORG, re)
         if (isinstance(re, MoneyReferent)):
             if (typ0 == FundsItemTyp.UNDEFINED):
                 typ0 = FundsItemTyp.SUM
             if ((tt.next0_ is not None and tt.next0_.isValue("ЗА", None)
                  and tt.next0_.next0_ is not None)
                     and ((tt.next0_.next0_.isValue("АКЦИЯ", None)
                           or tt.next0_.next0_.isValue("АКЦІЯ", None)))):
                 typ0 = FundsItemTyp.PRICE
             res = FundsItemToken._new428(t, tt, typ0, re)
             return res
         if (re is not None):
             break
         npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
         if (npt is not None and npt.noun.isValue("ПАКЕТ", None)):
             npt = NounPhraseHelper.tryParse(npt.end_token.next0_,
                                             NounPhraseParseAttr.NO, 0)
         if (npt is not None):
             res = None
             if (npt.noun.isValue("АКЦІЯ", None)
                     or npt.noun.isValue("АКЦИЯ", None)):
                 res = FundsItemToken._new430(t, npt.end_token,
                                              FundsItemTyp.NOUN,
                                              FundsKind.STOCK)
                 if (len(npt.adjectives) > 0):
                     for v in FundsItemToken.__m_act_types:
                         if (npt.adjectives[0].isValue(v, None)):
                             res.string_val = npt.getNormalCaseText(
                                 None, True, MorphGender.UNDEFINED,
                                 False).lower()
                             if (res.string_val == "голосовавшая акция"):
                                 res.string_val = "голосующая акция"
                             break
             elif (((npt.noun.isValue("БУМАГА", None)
                     or npt.noun.isValue("ПАПІР", None)))
                   and npt.end_token.previous is not None and
                   ((npt.end_token.previous.isValue("ЦЕННЫЙ", None)
                     or npt.end_token.previous.isValue("ЦІННИЙ", None)))):
                 res = FundsItemToken._new431(t, npt.end_token,
                                              FundsItemTyp.NOUN,
                                              FundsKind.STOCK,
                                              "ценные бумаги")
             elif (((npt.noun.isValue("КАПИТАЛ", None)
                     or npt.noun.isValue("КАПІТАЛ", None)))
                   and len(npt.adjectives) > 0
                   and ((npt.adjectives[0].isValue("УСТАВНОЙ", None)
                         or npt.adjectives[0].isValue("УСТАВНЫЙ", None)
                         or npt.adjectives[0].isValue("СТАТУТНИЙ", None)))):
                 res = FundsItemToken._new430(t, npt.end_token,
                                              FundsItemTyp.NOUN,
                                              FundsKind.CAPITAL)
             if (res is not None):
                 rt = res.kit.processReferent(
                     OrganizationAnalyzer.ANALYZER_NAME,
                     res.end_token.next0_)
                 if (rt is not None):
                     res.ref = rt.referent
                     res.end_token = rt.end_token
                 return res
         if (prev is not None and prev.typ == FundsItemTyp.COUNT):
             val = None
             for v in FundsItemToken.__m_act_types:
                 if (tt.isValue(v, None)):
                     val = v
                     break
             if (val is not None):
                 cou = 0
                 ok = False
                 ttt = tt.previous
                 first_pass2767 = True
                 while True:
                     if first_pass2767: first_pass2767 = False
                     else: ttt = ttt.previous
                     if (not (ttt is not None)): break
                     cou += 1
                     if ((cou) > 100):
                         break
                     refs = ttt.getReferents()
                     if (refs is None):
                         continue
                     for r in refs:
                         if (isinstance(r, FundsReferent)):
                             ok = True
                             break
                     if (ok):
                         break
                 cou = 0
                 if (not ok):
                     ttt = tt.next0_
                     while ttt is not None:
                         cou += 1
                         if ((cou) > 100):
                             break
                         fi = FundsItemToken.tryParse(ttt, None)
                         if (fi is not None and fi.kind == FundsKind.STOCK):
                             ok = True
                             break
                         ttt = ttt.next0_
                 if (ok):
                     res = FundsItemToken._new433(t, tt, FundsKind.STOCK,
                                                  FundsItemTyp.NOUN)
                     res.string_val = "{0}ая акция".format(
                         val[0:0 + len(val) - 2].lower())
                     return res
         if (isinstance(tt, NumberToken)):
             num = NumberHelper.tryParseNumberWithPostfix(tt)
             if (num is not None):
                 if (tt.previous is not None
                         and tt.previous.isValue("НА", None)):
                     break
                 if (num.ex_typ == NumberExType.PERCENT):
                     res = FundsItemToken._new434(t, num.end_token,
                                                  FundsItemTyp.PERCENT, num)
                     t = num.end_token.next0_
                     if (t is not None and
                         ((t.isChar('+') or t.isValue("ПЛЮС", None)))
                             and (isinstance(t.next0_, NumberToken))):
                         res.end_token = t.next0_
                         t = res.end_token.next0_
                     if ((t is not None and t.is_hiphen
                          and t.next0_ is not None)
                             and t.next0_.chars.is_all_lower
                             and not t.is_whitespace_after):
                         t = t.next0_.next0_
                     if (t is not None
                             and ((t.isValue("ДОЛЯ", None)
                                   or t.isValue("ЧАСТКА", None)))):
                         res.end_token = t
                     return res
                 break
             t1 = tt
             if (t1.next0_ is not None
                     and t1.next0_.isValue("ШТУКА", None)):
                 t1 = t1.next0_
             return FundsItemToken._new434(
                 t, t1, FundsItemTyp.COUNT,
                 Utils.asObjectOrNull(tt, NumberToken))
         break
     return None
Ejemplo n.º 25
0
 def get_normal_case_text(self,
                          mc: 'MorphClass' = None,
                          num: 'MorphNumber' = MorphNumber.UNDEFINED,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
     if ((isinstance(self.begin_token, ReferentToken))
             and self.begin_token == self.end_token):
         return self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     res = None
     max_coef = 0
     def_coef = -1
     for it in self.morph.items:
         v = Utils.asObjectOrNull(it, NounPhraseItemTextVar)
         if (v is None):
             continue
         if (v.undef_coef > 0
                 and (((v.undef_coef < max_coef) or def_coef >= 0))):
             continue
         if (num == MorphNumber.SINGULAR
                 and v.single_number_value is not None):
             if (mc is not None and ((gender == MorphGender.NEUTER
                                      or gender == MorphGender.FEMINIE))
                     and mc.is_adjective):
                 bi = MorphBaseInfo._new401(MorphClass._new53(mc.value),
                                            gender, MorphNumber.SINGULAR,
                                            MorphCase.NOMINATIVE,
                                            self.morph.language)
                 str0_ = MorphologyService.get_wordform(
                     v.single_number_value, bi)
                 if (str0_ is not None):
                     res = str0_
             else:
                 res = v.single_number_value
             if (v.undef_coef == 0):
                 break
             max_coef = v.undef_coef
             continue
         if (Utils.isNullOrEmpty(v.normal_value)):
             continue
         if (str.isdigit(v.normal_value[0]) and mc is not None
                 and mc.is_adjective):
             val = 0
             wrapval402 = RefOutArgWrapper(0)
             inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402)
             val = wrapval402.value
             if (inoutres403):
                 str0_ = NumberHelper.get_number_adjective(
                     val, gender,
                     (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR
                      or val == 1 else MorphNumber.PLURAL))
                 if (str0_ is not None):
                     res = str0_
                     if (v.undef_coef == 0):
                         break
                     max_coef = v.undef_coef
                     continue
         res1 = it.normal_value
         if (num == MorphNumber.SINGULAR):
             if (res1 == "ДЕТИ"):
                 res1 = "РЕБЕНОК"
             elif (res1 == "ЛЮДИ"):
                 res1 = "ЧЕЛОВЕК"
         max_coef = v.undef_coef
         if (v.undef_coef > 0):
             res = res1
             continue
         def_co = 0
         if (mc is not None and mc.is_adjective and v.undef_coef == 0):
             pass
         elif (
             ((isinstance(self.begin_token, TextToken))
              and res1 == self.begin_token.term and it.case_.is_nominative)
                 and it.number == MorphNumber.SINGULAR):
             def_co = 1
         if (num == MorphNumber.PLURAL and
             ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)):
             def_co += 3
         if (res is None or def_co > def_coef):
             res = res1
             def_coef = def_co
             if (def_co > 0):
                 break
     if (res is not None):
         return self.__corr_chars(res, keep_chars)
     if (res is None and self.begin_token == self.end_token):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     elif (res is None):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
         if (res is None):
             res = MiscHelper.get_text_value_of_meta_token(
                 self, (GetTextAttr.KEEPREGISTER
                        if keep_chars else GetTextAttr.NO))
         else:
             res = "{0} {1}".format(
                 res,
                 MiscHelper.get_text_value(
                     self.begin_token.next0_, self.end_token,
                     (GetTextAttr.KEEPREGISTER
                      if keep_chars else GetTextAttr.NO)))
     return Utils.ifNotNull(res, "?")
Ejemplo n.º 26
0
 def __try_attach(t: 'Token', prev: typing.List['DateItemToken'],
                  detail_regime: bool) -> 'DateItemToken':
     from pullenti.ner.measure.internal.MeasureToken import MeasureToken
     if (t is None):
         return None
     nt = Utils.asObjectOrNull(t, NumberToken)
     begin = t
     end = t
     is_in_brack = False
     if ((BracketHelper.can_be_start_of_sequence(t, False, False)
          and t.next0_ is not None and (isinstance(t.next0_, NumberToken)))
             and BracketHelper.can_be_end_of_sequence(
                 t.next0_.next0_, False, None, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if ((t.is_newline_before and BracketHelper.is_bracket(t, False) and
          (isinstance(t.next0_, NumberToken)))
             and BracketHelper.is_bracket(t.next0_.next0_, False)):
         nt = (Utils.asObjectOrNull(t.next0_, NumberToken))
         end = t.next0_.next0_
         is_in_brack = True
     if (nt is not None):
         if (nt.int_value is None):
             return None
         if (nt.typ == NumberSpellingType.WORDS):
             if (nt.morph.class0_.is_noun
                     and not nt.morph.class0_.is_adjective):
                 if (t.next0_ is not None
                         and ((t.next0_.is_value("КВАРТАЛ", None)
                               or t.next0_.is_value("ПОЛУГОДИЕ", None)
                               or t.next0_.is_value("ПІВРІЧЧЯ", None)))):
                     pass
                 else:
                     return None
         if (NumberHelper.try_parse_age(nt) is not None):
             return None
         tt = None
         res = DateItemToken._new628(begin, end,
                                     DateItemToken.DateItemType.NUMBER,
                                     nt.int_value, nt.morph)
         if ((res.int_value == 20 and (isinstance(nt.next0_, NumberToken))
              and nt.next0_.int_value is not None)
                 and nt.next0_.length_char == 2 and prev is not None):
             num = 2000 + nt.next0_.int_value
             if ((num < 2030) and len(prev) > 0 and prev[len(prev) - 1].typ
                     == DateItemToken.DateItemType.MONTH):
                 ok = False
                 if (nt.whitespaces_after_count == 1):
                     ok = True
                 elif (nt.is_newline_after and nt.is_newline_after):
                     ok = True
                 if (ok):
                     nt = (Utils.asObjectOrNull(nt.next0_, NumberToken))
                     res.end_token = nt
                     res.int_value = num
         if (res.int_value == 20 or res.int_value == 201):
             tt = t.next0_
             if (tt is not None and tt.is_char('_')):
                 while tt is not None:
                     if (not tt.is_char('_')):
                         break
                     tt = tt.next0_
                 tt = DateItemToken.__test_year_rus_word(tt, False)
                 if (tt is not None):
                     res.int_value = 0
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.YEAR
                     return res
         if (res.int_value <= 12 and t.next0_ is not None
                 and (t.whitespaces_after_count < 3)):
             tt = t.next0_
             if (tt.is_value("ЧАС", None)):
                 if (((isinstance(t.previous, TextToken))
                      and not t.previous.chars.is_letter
                      and not t.is_whitespace_before)
                         and (isinstance(t.previous.previous, NumberToken))
                         and not t.previous.is_whitespace_before):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = tt
                     tt = tt.next0_
                     if (tt is not None and tt.is_char('.')):
                         res.end_token = tt
                         tt = tt.next0_
             first_pass3072 = True
             while True:
                 if first_pass3072: first_pass3072 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_value("УТРО", "РАНОК")):
                     res.end_token = tt
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("ВЕЧЕР", "ВЕЧІР")):
                     res.end_token = tt
                     res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("ДЕНЬ", None)):
                     res.end_token = tt
                     if (res.int_value < 10):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_value("НОЧЬ", "НІЧ")):
                     res.end_token = tt
                     if (res.int_value == 12):
                         res.int_value = 0
                     elif (res.int_value > 9):
                         res.int_value += 12
                     res.typ = DateItemToken.DateItemType.HOUR
                     return res
                 if (tt.is_comma or tt.morph.class0_.is_adverb):
                     continue
                 break
             if (res.typ == DateItemToken.DateItemType.HOUR):
                 return res
         can_be_year_ = True
         if (prev is not None and len(prev) > 0 and prev[len(prev) - 1].typ
                 == DateItemToken.DateItemType.MONTH):
             pass
         elif ((prev is not None and len(prev) >= 4 and
                prev[len(prev) - 1].typ == DateItemToken.DateItemType.DELIM)
               and prev[len(prev) - 2].can_by_month):
             pass
         elif (nt.next0_ is not None
               and ((nt.next0_.is_value("ГОД", None)
                     or nt.next0_.is_value("РІК", None)))):
             if (res.int_value < 1000):
                 can_be_year_ = False
         tt = DateItemToken.__test_year_rus_word(nt.next0_, False)
         if (tt is not None and DateItemToken.__is_new_age(tt.next0_)):
             res.typ = DateItemToken.DateItemType.YEAR
             res.end_token = tt
         elif (can_be_year_):
             if (res.can_be_year
                     or res.typ == DateItemToken.DateItemType.NUMBER):
                 tt = DateItemToken.__test_year_rus_word(
                     nt.next0_, res.is_newline_before)
                 if ((tt) is not None):
                     if ((tt.is_value("Г", None)
                          and not tt.is_whitespace_before
                          and t.previous is not None)
                             and ((t.previous.is_value("КОРПУС", None)
                                   or t.previous.is_value("КОРП", None)))):
                         pass
                     elif (
                         (((nt.next0_.is_value("Г", None) and
                            (t.whitespaces_before_count < 3) and t.previous
                            is not None) and t.previous.is_value("Я", None)
                           and t.previous.previous is not None)
                          and t.previous.previous.is_char_of("\\/")
                          and t.previous.previous.previous is not None)
                             and t.previous.previous.previous.is_value(
                                 "А", None)):
                         return None
                     elif (nt.next0_.length_char == 1
                           and not res.can_be_year
                           and ((prev is None or
                                 ((len(prev) > 0 and prev[len(prev) - 1].typ
                                   != DateItemToken.DateItemType.DELIM))))):
                         pass
                     else:
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
             elif (tt is not None and (nt.whitespaces_after_count < 2)
                   and (nt.end_char - nt.begin_char) == 1):
                 res.end_token = tt
                 res.typ = DateItemToken.DateItemType.YEAR
                 res.lang = tt.morph.language
         if (nt.previous is not None):
             if (nt.previous.is_value("В", "У")
                     or nt.previous.is_value("К", None)
                     or nt.previous.is_value("ДО", None)):
                 tt = DateItemToken.__test_year_rus_word(nt.next0_, False)
                 if ((tt) is not None):
                     ok = False
                     if ((res.int_value < 100)
                             and (isinstance(tt, TextToken)) and
                         ((tt.term == "ГОДА" or tt.term == "РОКИ"))):
                         pass
                     else:
                         ok = True
                         if (nt.previous.is_value("ДО", None)
                                 and nt.next0_.is_value("Г", None)):
                             cou = 0
                             ttt = nt.previous.previous
                             while ttt is not None and (cou < 10):
                                 mt = MeasureToken.try_parse(
                                     ttt, None, False, False, False, False)
                                 if (mt is not None
                                         and mt.end_char > nt.end_char):
                                     ok = False
                                     break
                                 ttt = ttt.previous
                                 cou += 1
                     if (ok):
                         res.end_token = tt
                         res.typ = DateItemToken.DateItemType.YEAR
                         res.lang = tt.morph.language
                         res.begin_token = nt.previous
             elif (((nt.previous.is_value("IN", None)
                     or nt.previous.is_value("SINCE", None)))
                   and res.can_be_year):
                 uu = (NumbersWithUnitToken.try_parse(
                     nt, None, False, False, False, False)
                       if nt.previous.is_value("IN", None) else None)
                 if (uu is not None and len(uu.units) > 0):
                     pass
                 else:
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.begin_token = nt.previous
             elif (nt.previous.is_value("NEL", None)
                   or nt.previous.is_value("DEL", None)):
                 if (res.can_be_year):
                     res.typ = DateItemToken.DateItemType.YEAR
                     res.lang = MorphLang.IT
                     res.begin_token = nt.previous
             elif (nt.previous.is_value("IL", None) and res.can_be_day):
                 res.lang = MorphLang.IT
                 res.begin_token = nt.previous
         t1 = res.end_token.next0_
         if (t1 is not None):
             if (t1.is_value("ЧАС", "ГОДИНА") or t1.is_value("HOUR", None)):
                 if ((((prev is not None and len(prev) == 2
                        and prev[0].can_be_hour)
                       and prev[1].typ == DateItemToken.DateItemType.DELIM
                       and not prev[1].is_whitespace_after)
                      and not prev[1].is_whitespace_after
                      and res.int_value >= 0) and (res.int_value < 59)):
                     prev[0].typ = DateItemToken.DateItemType.HOUR
                     res.typ = DateItemToken.DateItemType.MINUTE
                     res.end_token = t1
                 elif (res.int_value < 24):
                     if (t1.next0_ is not None and t1.next0_.is_char('.')):
                         t1 = t1.next0_
                     res.typ = DateItemToken.DateItemType.HOUR
                     res.end_token = t1
             elif ((res.int_value < 60)
                   and ((t1.is_value("МИНУТА", "ХВИЛИНА") or t1.is_value(
                       "МИН", None) or t.is_value("MINUTE", None)))):
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.MINUTE
                 res.end_token = t1
             elif (
                 (res.int_value < 60) and
                 ((t1.is_value("СЕКУНДА", None) or t1.is_value("СЕК", None)
                   or t1.is_value("SECOND", None)))):
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 res.typ = DateItemToken.DateItemType.SECOND
                 res.end_token = t1
             elif ((res.int_value < 30)
                   and ((t1.is_value("ВЕК", "ВІК")
                         or t1.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")))):
                 res.typ = DateItemToken.DateItemType.CENTURY
                 res.end_token = t1
             elif (res.int_value <= 4 and t1.is_value("КВАРТАЛ", None)):
                 res.typ = DateItemToken.DateItemType.QUARTAL
                 res.end_token = t1
             elif (res.int_value <= 2
                   and ((t1.is_value("ПОЛУГОДИЕ", None)
                         or t1.is_value("ПІВРІЧЧЯ", None)))):
                 res.typ = DateItemToken.DateItemType.HALFYEAR
                 res.end_token = t1
         return res
     t0 = Utils.asObjectOrNull(t, TextToken)
     if (t0 is None):
         return None
     txt = t0.get_source_text()
     if ((txt[0] == 'I' or txt[0] == 'X' or txt[0] == 'Х')
             or txt[0] == 'V'):
         lat = NumberHelper.try_parse_roman(t)
         if (lat is not None and lat.end_token.next0_ is not None
                 and lat.int_value is not None):
             val = lat.int_value
             tt = lat.end_token.next0_
             if (tt.is_value("КВАРТАЛ", None) and val > 0 and val <= 4):
                 return DateItemToken._new629(
                     t, tt, DateItemToken.DateItemType.QUARTAL, val)
             if (tt.is_value("ПОЛУГОДИЕ", "ПІВРІЧЧЯ") and val > 0
                     and val <= 2):
                 return DateItemToken._new629(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.HALFYEAR, val)
             if (tt.is_value("ВЕК", "ВІК")
                     or tt.is_value("СТОЛЕТИЕ", "СТОЛІТТЯ")):
                 return DateItemToken._new629(
                     t, lat.end_token.next0_,
                     DateItemToken.DateItemType.CENTURY, val)
             if (tt.is_value("В", None) and tt.next0_ is not None
                     and tt.next0_.is_char('.')):
                 if (prev is not None and len(prev) > 0
                         and prev[len(prev) - 1].typ
                         == DateItemToken.DateItemType.POINTER):
                     return DateItemToken._new629(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
                 if (DateItemToken.__is_new_age(tt.next0_.next0_)):
                     return DateItemToken._new629(
                         t, tt.next0_, DateItemToken.DateItemType.CENTURY,
                         val)
             if (tt.is_hiphen):
                 lat2 = NumberHelper.try_parse_roman(tt.next0_)
                 if (lat2 is not None and lat2.int_value is not None
                         and lat2.end_token.next0_ is not None):
                     if (lat2.end_token.next0_.is_value("ВЕК", "ВІК")
                             or lat2.end_token.next0_.is_value(
                                 "СТОЛЕТИЕ", "СТОЛІТТЯ")):
                         ddd = DateItemToken.try_attach(
                             tt.next0_, None, False)
                         return DateItemToken._new634(
                             t, lat.end_token,
                             DateItemToken.DateItemType.CENTURY, val,
                             ((ddd.new_age if ddd is not None else 0)))
     if (t is not None and t.is_value("НАПРИКІНЦІ", None)):
         return DateItemToken._new635(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "конец")
     if (t is not None and t.is_value("ДОНЕДАВНА", None)):
         return DateItemToken._new635(t, t,
                                      DateItemToken.DateItemType.POINTER,
                                      "сегодня")
     if (prev is None):
         if (t is not None):
             if (t.is_value("ОКОЛО", "БІЛЯ")
                     or t.is_value("ПРИМЕРНО", "ПРИБЛИЗНО")
                     or t.is_value("ABOUT", None)):
                 return DateItemToken._new635(
                     t, t, DateItemToken.DateItemType.POINTER, "около")
         if (t.is_value("ОК", None) or t.is_value("OK", None)):
             if (t.next0_ is not None and t.next0_.is_char('.')):
                 return DateItemToken._new635(
                     t, t.next0_, DateItemToken.DateItemType.POINTER,
                     "около")
             return DateItemToken._new635(
                 t, t, DateItemToken.DateItemType.POINTER, "около")
     tok = DateItemToken.M_SEASONS.try_parse(t, TerminParseAttr.NO)
     if ((tok is not None and
          (Utils.valToEnum(tok.termin.tag, DatePointerType))
          == DatePointerType.SUMMER and t.morph.language.is_ru)
             and (isinstance(t, TextToken))):
         str0_ = t.term
         if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
             tok = (None)
     if (tok is not None):
         return DateItemToken._new629(
             t, tok.end_token, DateItemToken.DateItemType.POINTER,
             Utils.valToEnum(tok.termin.tag, DatePointerType))
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is not None):
         tok = DateItemToken.M_SEASONS.try_parse(npt.end_token,
                                                 TerminParseAttr.NO)
         if ((tok is not None and
              (Utils.valToEnum(tok.termin.tag, DatePointerType))
              == DatePointerType.SUMMER and t.morph.language.is_ru)
                 and (isinstance(t, TextToken))):
             str0_ = t.term
             if (str0_ != "ЛЕТОМ" and str0_ != "ЛЕТА" and str0_ != "ЛЕТО"):
                 tok = (None)
         if (tok is not None):
             return DateItemToken._new629(
                 t, tok.end_token, DateItemToken.DateItemType.POINTER,
                 Utils.valToEnum(tok.termin.tag, DatePointerType))
         typ_ = DateItemToken.DateItemType.NUMBER
         if (npt.noun.is_value("КВАРТАЛ", None)):
             typ_ = DateItemToken.DateItemType.QUARTAL
         elif (npt.end_token.is_value("ПОЛУГОДИЕ", None)
               or npt.end_token.is_value("ПІВРІЧЧЯ", None)):
             typ_ = DateItemToken.DateItemType.HALFYEAR
         elif (npt.end_token.is_value("НАЧАЛО", None)
               or npt.end_token.is_value("ПОЧАТОК", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "начало")
         elif (npt.end_token.is_value("СЕРЕДИНА", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "середина")
         elif (npt.end_token.is_value("КОНЕЦ", None)
               or npt.end_token.is_value("КІНЕЦЬ", None)
               or npt.end_token.is_value("НАПРИКІНЕЦЬ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "конец")
         elif (npt.end_token.is_value("ВРЕМЯ", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.is_value("НАСТОЯЩЕЕ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         elif (npt.end_token.is_value("ЧАС", None)
               and len(npt.adjectives) > 0
               and npt.end_token.previous.is_value("ДАНИЙ", None)):
             return DateItemToken._new635(
                 t, npt.end_token, DateItemToken.DateItemType.POINTER,
                 "сегодня")
         if (typ_ != DateItemToken.DateItemType.NUMBER or detail_regime):
             delta = 0
             if (len(npt.adjectives) > 0):
                 if (npt.adjectives[0].is_value("ПОСЛЕДНИЙ", "ОСТАННІЙ")):
                     return DateItemToken._new629(
                         t0, npt.end_token, typ_,
                         (4 if typ_ == DateItemToken.DateItemType.QUARTAL
                          else 2))
                 if (npt.adjectives[0].is_value("ПРЕДЫДУЩИЙ", "ПОПЕРЕДНІЙ")
                         or npt.adjectives[0].is_value("ПРОШЛЫЙ", None)):
                     delta = -1
                 elif (npt.adjectives[0].is_value("СЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].is_value("ПОСЛЕДУЮЩИЙ", None)
                       or npt.adjectives[0].is_value("НАСТУПНИЙ", None)):
                     delta = 1
                 else:
                     return None
             cou = 0
             tt = t.previous
             first_pass3073 = True
             while True:
                 if first_pass3073: first_pass3073 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (cou > 200):
                     break
                 dr = Utils.asObjectOrNull(tt.get_referent(),
                                           DateRangeReferent)
                 if (dr is None):
                     continue
                 if (typ_ == DateItemToken.DateItemType.QUARTAL):
                     ii = dr.quarter_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 4):
                         continue
                     return DateItemToken._new629(t0, npt.end_token, typ_,
                                                  ii)
                 if (typ_ == DateItemToken.DateItemType.HALFYEAR):
                     ii = dr.halfyear_number
                     if (ii < 1):
                         continue
                     ii += delta
                     if ((ii < 1) or ii > 2):
                         continue
                     return DateItemToken._new629(t0, npt.end_token, typ_,
                                                  ii)
     term = t0.term
     if (not str.isalnum(term[0])):
         if (t0.is_char_of(".\\/:") or t0.is_hiphen):
             return DateItemToken._new635(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         elif (t0.is_char(',')):
             return DateItemToken._new635(t0, t0,
                                          DateItemToken.DateItemType.DELIM,
                                          term)
         else:
             return None
     if (term == "O" or term == "О"):
         if ((isinstance(t.next0_, NumberToken))
                 and not t.is_whitespace_after
                 and len(t.next0_.value) == 1):
             return DateItemToken._new629(t, t.next0_,
                                          DateItemToken.DateItemType.NUMBER,
                                          t.next0_.int_value)
     if (str.isalpha(term[0])):
         inf = DateItemToken.M_MONTHES.try_parse(t, TerminParseAttr.NO)
         if (inf is not None and inf.termin.tag is None):
             inf = DateItemToken.M_MONTHES.try_parse(
                 inf.end_token.next0_, TerminParseAttr.NO)
         if (inf is not None and (isinstance(inf.termin.tag, int))):
             return DateItemToken._new653(inf.begin_token, inf.end_token,
                                          DateItemToken.DateItemType.MONTH,
                                          inf.termin.tag, inf.termin.lang)
     return None
Ejemplo n.º 27
0
 def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken':
     if (not (isinstance(t, TextToken))): 
         return None
     if (t.is_table_control_char): 
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1625 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625)
     minmax = wrapminmax1625.value
     if (tt is not None): 
         t = tt.next0_
     npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None)
     if (npt is None): 
         whd = NumbersWithUnitToken._try_parsewhl(t)
         if (whd is not None): 
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.is_value("КПД", None)): 
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): 
             npt = NounPhraseToken(t0, t0)
         elif (t0.is_value("T", None) and t0.chars.is_all_lower): 
             npt = NounPhraseToken(t0, t0)
             t = t0
             if (t.next0_ is not None and t.next0_.is_char('=')): 
                 npt.end_token = t.next0_
         elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): 
             if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                 return None
             npt = NounPhraseToken(t0, t0)
             t = t0.next0_
             while t is not None: 
                 if (t.whitespaces_before_count > 2): 
                     break
                 elif (not (isinstance(t, TextToken))): 
                     break
                 elif (not t.chars.is_letter): 
                     br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
                     if (br is not None): 
                         t = br.end_token
                         npt.end_token = t
                     else: 
                         break
                 elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                     break
                 else: 
                     npt.end_token = t
                 t = t.next0_
         else: 
             return None
     elif (NumberHelper.try_parse_real_number(t, True, False) is not None): 
         return None
     else: 
         dtok = DateItemToken.try_attach(t, None, False)
         if (dtok is not None): 
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3305 = True
     while True:
         if first_pass3305: first_pass3305 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before): 
             break
         if (tt.is_table_control_char): 
             break
         wrapminmax1617 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617)
         minmax = wrapminmax1617.value
         if (tt2 is not None): 
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): 
             t = tt
             t1 = t
             if (tt.previous.is_value("НЕ", None)): 
                 not0_ = True
             continue
         www = NumbersWithUnitToken._try_parsewhl(tt)
         if (www is not None): 
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (tt.is_value("ПРИ", None)): 
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
             n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False)
             if (n1 is not None and len(n1.units) > 0): 
                 mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1)
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): 
             tt = tt.next0_
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0): 
             if (tt.is_char(':')): 
                 break
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None and mt1.reliable): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): 
             npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None)
             if (npt3 is not None): 
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
         if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt.next0_
             tt = t
             t1 = tt
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt
             t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): 
             tt = tt.next0_.next0_
             t = tt
             t1 = t
             npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
             if (npt1 is not None and npt1.end_char > tt.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((isinstance(tt, NumberToken)) and tt.previous is not None): 
             if (tt.previous.is_value("USB", None)): 
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 ttt = tt.next0_
                 while ttt is not None: 
                     if (ttt.is_whitespace_before): 
                         break
                     if (ttt.is_char_of(",:")): 
                         break
                     tt = ttt
                     t = tt
                     t1 = t
                     if (len(internals_) == 0): 
                         name_.end_token = t1
                     ttt = ttt.next0_
                 continue
         mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False)
         if (mt0 is not None): 
             npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
             if (npt1 is not None and npt1.end_char > mt0.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
             break
         if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): 
             www = NumbersWithUnitToken._try_parsewhl(tt.next0_)
             if (www is not None): 
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma): 
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.is_char(')')): 
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
             if (uu is not None): 
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): 
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.is_char('(')): 
                     uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): 
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                     www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
                     if (www is not None): 
                         whd = www
                         tt = www.end_token
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): 
                     break
                 if (t1.next0_ is not None): 
                     if (t1.next0_.is_table_control_char or t1.is_newline_after): 
                         break
                 units = (None)
         if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): 
             br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
             if (br is not None): 
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("НЕ", None) and tt.next0_ is not None): 
             mc = tt.next0_.get_morph_class_in_dictionary()
             if (mc.is_adverb or mc.is_misc): 
                 break
             continue
         if (tt.is_value("ЯМЗ", None)): 
             pass
         npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None)
         if (npt2 is None): 
             if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): 
                 to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO)
                 if (to is not None): 
                     if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): 
                         pass
                     else: 
                         break
                 t1 = tt
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): 
                 uu = UnitToken.try_parse_list(tt, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 1 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = tt
                 continue
             if (tt.is_comma): 
                 continue
             if (tt.is_char('.')): 
                 if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): 
                     continue
                 uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 2 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0): 
             pass
         elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): 
             pass
         elif (t.chars.is_letter): 
             name_.end_token = t1
     t11 = t1
     t1 = t1.next0_
     first_pass3306 = True
     while True:
         if first_pass3306: first_pass3306 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char): 
             pass
         elif (t1.is_char_of(":,_")): 
             if (is_resctriction): 
                 return None
             www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
             if (www is not None): 
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
             uu = UnitToken.try_parse_list(t1.next0_, add_units, False)
             if (uu is not None): 
                 if (uu[0].length_char > 1 or len(uu) > 1): 
                     units = uu
                     t = uu[len(uu) - 1].end_token
                     t1 = t
                     continue
             if (t1.is_char(':')): 
                 li = list()
                 ttt = t1.next0_
                 first_pass3307 = True
                 while True:
                     if first_pass3307: first_pass3307 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_hiphen or ttt.is_table_control_char): 
                         continue
                     if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): 
                         continue
                     mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True)
                     if (mt1 is None): 
                         break
                     li.append(mt1)
                     ttt = mt1.end_token
                     if (ttt.next0_ is not None and ttt.next0_.is_char(';')): 
                         ttt = ttt.next0_
                     if (ttt.is_char(';')): 
                         pass
                     elif (ttt.is_newline_after and mt1.is_newline_before): 
                         pass
                     else: 
                         break
                 if (len(li) > 1): 
                     res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True)
                     if (internals_ is not None and len(internals_) > 0): 
                         res0.internal_ex = internals_[0]
                     nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                     li[0].begin_token = t0
                     for v in li: 
                         v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip()
                         if (v.nums is not None and len(v.nums.units) == 0 and units is not None): 
                             v.nums.units = units
                     return res0
         elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): 
             pass
         elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): 
             pass
         else: 
             break
     if (t1 is None): 
         return None
     mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction)
     if (mts is None): 
         if (units is not None and len(units) > 0): 
             if (t1 is None or t1.previous.is_char(':')): 
                 mts = list()
                 if (t1 is None): 
                     t1 = t11
                     while t1 is not None and t1.next0_ is not None: 
                         pass
                         t1 = t1.next0_
                 else: 
                     t1 = t1.previous
                 mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan))
         if (mts is None): 
             return None
     mt = mts[0]
     if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): 
         return None
     if (not is_subval and name_.begin_token.morph.class0_.is_preposition): 
         name_.begin_token = name_.begin_token.next0_
     if (mt.whl is not None): 
         whd = mt.whl
     for kk in range(10):
         if (whd is not None and whd.end_token == name_.end_token): 
             name_.end_token = whd.begin_token.previous
             continue
         if (units is not None): 
             if (units[len(units) - 1].end_token == name_.end_token): 
                 name_.end_token = units[0].begin_token.previous
                 continue
         break
     if (len(mts) > 1 and len(internals_) == 0): 
         if (len(mt.units) == 0): 
             if (units is not None): 
                 for m in mts: 
                     m.units = units
         res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True)
         res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts): 
             ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k])
             if (whd is not None): 
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)): 
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.is_char('±')): 
             nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False)
             if (nn is not None and nn.plus_minus_percent): 
                 res1.end_token = nn.end_token
                 res1.nums = nn
                 if (len(nn.units) > 0 and units is None and len(mt.units) == 0): 
                     for m in mts: 
                         m.units = nn.units
         return res1
     if (not mt.is_whitespace_before): 
         if (mt.begin_token.previous is None): 
             return None
         if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): 
             pass
         elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): 
             pass
         else: 
             return None
     if (len(mt.units) == 0 and units is not None): 
         mt.units = units
         if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): 
             i = 1
             while i < len(units): 
                 if (units[i].pow0_ == -1): 
                     j = i
                     while j < len(units): 
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (- units[j].pow0_)
                         j += 1
                     del mt.units[i:i+len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None): 
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None): 
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0): 
         units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True)
         if (units is None): 
             if (can_units_absent): 
                 pass
             else: 
                 return None
         else: 
             mt.units = units
     res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): 
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO))
     res.nums = mt
     for u in res.nums.units: 
         if (u.keyword is not None): 
             if (u.keyword.begin_char >= res.begin_char): 
                 res.reliable = True
     res.__parse_internals(add_units)
     if (len(res.internals) > 0 or not can_be_set): 
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and): 
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False)
     if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): 
         res.is_set = True
         res.nums = (None)
         res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt))
         res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0]))
         res.end_token = mts1[0].end_token
     return res
Ejemplo n.º 28
0
 def __init__(self, sofa_ : 'SourceOfAnalysis'=None, only_tokenizing : bool=False, lang : 'MorphLang'=None, progress : EventHandler=None) -> None:
     self._start_date = datetime.datetime(1, 1, 1, 0, 0, 0)
     self.corrected_tokens = None
     self.first_token = None;
     self.__m_entities = list()
     self.ontology = None;
     self.base_language = MorphLang()
     self.__m_sofa = None;
     self.statistics = None;
     self.__m_datas = dict()
     self.misc_data = dict()
     self.processor = None;
     self.recurse_level = 0
     self._m_analyzer_stack = list()
     if (sofa_ is None): 
         return
     self.__m_sofa = sofa_
     self._start_date = datetime.datetime.now()
     tokens = Morphology.process(sofa_.text, lang, None)
     t0 = None
     if (tokens is not None): 
         ii = 0
         while ii < len(tokens): 
             mt = tokens[ii]
             if (mt.begin_char == 733860): 
                 pass
             tt = TextToken(mt, self)
             if (sofa_.correction_dict is not None): 
                 wrapcorw539 = RefOutArgWrapper(None)
                 inoutres540 = Utils.tryGetValue(sofa_.correction_dict, mt.term, wrapcorw539)
                 corw = wrapcorw539.value
                 if (inoutres540): 
                     ccc = Morphology.process(corw, lang, None)
                     if (ccc is not None and len(ccc) == 1): 
                         tt1 = TextToken._new538(ccc[0], self, tt.term)
                         tt1.begin_char = tt.begin_char
                         tt1.end_char = tt.end_char
                         tt1.chars = tt.chars
                         tt = tt1
                         if (self.corrected_tokens is None): 
                             self.corrected_tokens = dict()
                         self.corrected_tokens[tt] = tt.getSourceText()
             if (t0 is None): 
                 self.first_token = (tt)
             else: 
                 t0.next0_ = tt
             t0 = (tt)
             ii += 1
     if (sofa_.clear_dust): 
         self.__clearDust()
     if (sofa_.do_words_merging_by_morph): 
         self.__correctWordsByMerging(lang)
     if (sofa_.do_word_correction_by_morph): 
         self.__correctWordsByMorph(lang)
     self.__mergeLetters()
     self.__defineBaseLanguage()
     t = self.first_token
     first_pass2794 = True
     while True:
         if first_pass2794: first_pass2794 = False
         else: t = t.next0_
         if (not (t is not None)): break
         nt = NumberHelper._tryParseNumber(t)
         if (nt is None): 
             continue
         self.embedToken(nt)
         t = (nt)
     if (only_tokenizing): 
         return
     t = self.first_token
     first_pass2795 = True
     while True:
         if first_pass2795: first_pass2795 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.morph.class0_.is_preposition): 
             continue
         mc = t.getMorphClassInDictionary()
         if (mc.is_undefined and t.chars.is_cyrillic_letter and t.length_char > 4): 
             tail = sofa_.text[t.end_char - 1:t.end_char - 1+2]
             tte = None
             tt = t.previous
             if (tt is not None and ((tt.is_comma_and or tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction))): 
                 tt = tt.previous
             if ((tt is not None and not tt.getMorphClassInDictionary().is_undefined and (((tt.morph.class0_.value) & (t.morph.class0_.value))) != 0) and tt.length_char > 4): 
                 tail2 = sofa_.text[tt.end_char - 1:tt.end_char - 1+2]
                 if (tail2 == tail): 
                     tte = tt
             if (tte is None): 
                 tt = t.next0_
                 if (tt is not None and ((tt.is_comma_and or tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction))): 
                     tt = tt.next0_
                 if ((tt is not None and not tt.getMorphClassInDictionary().is_undefined and (((tt.morph.class0_.value) & (t.morph.class0_.value))) != 0) and tt.length_char > 4): 
                     tail2 = sofa_.text[tt.end_char - 1:tt.end_char - 1+2]
                     if (tail2 == tail): 
                         tte = tt
             if (tte is not None): 
                 t.morph.removeItemsEx(tte.morph, tte.getMorphClassInDictionary())
         continue
     self.__createStatistics()
Ejemplo n.º 29
0
 def try_attach_list(t: 'Token',
                     max_count: int = 20) -> typing.List['DateItemToken']:
     p = DateItemToken.try_attach(t, None, False)
     if (p is None):
         return None
     if (p.typ == DateItemToken.DateItemType.DELIM):
         return None
     res = list()
     res.append(p)
     tt = p.end_token.next0_
     while tt is not None:
         if (isinstance(tt, TextToken)):
             if (tt.check_value(DateItemToken.M_EMPTY_WORDS) is not None):
                 tt = tt.next0_
                 continue
         p0 = DateItemToken.try_attach(tt, res, False)
         if (p0 is None):
             if (tt.is_newline_before):
                 break
             if (tt.chars.is_latin_letter):
                 break
             if (tt.morph is not None
                     and tt.morph.check((MorphClass.ADJECTIVE)
                                        | MorphClass.PRONOUN)):
                 tt = tt.next0_
                 continue
             break
         if (tt.is_newline_before):
             if (p.typ == DateItemToken.DateItemType.MONTH
                     and p0.can_be_year):
                 pass
             elif (p.typ == DateItemToken.DateItemType.NUMBER
                   and p.can_be_day
                   and p0.typ == DateItemToken.DateItemType.MONTH):
                 pass
             else:
                 break
         if (p0.can_be_year
                 and p0.typ == DateItemToken.DateItemType.NUMBER):
             if (p.typ == DateItemToken.DateItemType.HALFYEAR
                     or p.typ == DateItemToken.DateItemType.QUARTAL):
                 p0.typ = DateItemToken.DateItemType.YEAR
             elif (p.typ == DateItemToken.DateItemType.POINTER
                   and p0.int_value > 1990):
                 p0.typ = DateItemToken.DateItemType.YEAR
         p = p0
         res.append(p)
         if (max_count > 0 and len(res) >= max_count):
             break
         tt = p.end_token.next0_
     for i in range(len(res) - 1, -1, -1):
         if (res[i].typ == DateItemToken.DateItemType.DELIM):
             del res[i]
         else:
             break
     if (len(res) > 0 and res[len(res) - 1].typ
             == DateItemToken.DateItemType.NUMBER):
         nex = NumberHelper.try_parse_number_with_postfix(
             res[len(res) - 1].begin_token)
         if (nex is not None and nex.ex_typ != NumberExType.HOUR):
             if (len(res) > 3 and res[len(res) - 2].typ
                     == DateItemToken.DateItemType.DELIM
                     and res[len(res) - 2].string_value == ":"):
                 pass
             else:
                 del res[len(res) - 1]
     if (len(res) == 0):
         return None
     i = 1
     while i < (len(res) - 1):
         if (res[i].typ == DateItemToken.DateItemType.DELIM
                 and res[i].begin_token.is_comma):
             if ((i == 1
                  and res[i - 1].typ == DateItemToken.DateItemType.MONTH
                  and res[i + 1].can_be_year)
                     and (i + 1) == (len(res) - 1)):
                 del res[i]
         i += 1
     if (res[len(res) - 1].typ == DateItemToken.DateItemType.NUMBER):
         rr = res[len(res) - 1]
         npt = NounPhraseHelper.try_parse(rr.begin_token,
                                          NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and npt.end_char > rr.end_char):
             del res[len(res) - 1]
             if (len(res) > 0 and res[len(res) - 1].typ
                     == DateItemToken.DateItemType.DELIM):
                 del res[len(res) - 1]
     if (len(res) == 0):
         return None
     if (len(res) == 2 and not res[0].is_whitespace_after):
         if (not res[0].is_whitespace_before
                 and not res[1].is_whitespace_after):
             return None
     return res
Ejemplo n.º 30
0
 def try_attach(t: 'Token',
                must_has_prefix: bool = False) -> 'OrgItemEponymToken':
     from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is None):
         if (t is None):
             return None
         r1 = t.get_referent()
         if (r1 is not None and r1.type_name == "DATE"):
             str0_ = str(r1).upper()
             if ((str0_ == "1 МАЯ" or str0_ == "7 ОКТЯБРЯ"
                  or str0_ == "9 МАЯ") or str0_ == "8 МАРТА"):
                 dt = OrgItemEponymToken._new1797(t, t, list())
                 dt.eponyms.append(str0_)
                 return dt
         age = NumberHelper.try_parse_age(t)
         if ((age is not None and
              (((isinstance(age.end_token.next0_, TextToken)) or
                (isinstance(age.end_token.next0_, ReferentToken)))) and
              (age.whitespaces_after_count < 3))
                 and not age.end_token.next0_.chars.is_all_lower
                 and age.end_token.next0_.chars.is_cyrillic_letter):
             dt = OrgItemEponymToken._new1797(t, age.end_token.next0_,
                                              list())
             dt.eponyms.append("{0} {1}".format(
                 age.value,
                 dt.end_token.get_source_text().upper()))
             return dt
         return None
     t1 = None
     full = False
     has_name = False
     if (tt.term == "ИМЕНИ" or tt.term == "ІМЕНІ"):
         t1 = t.next0_
         full = True
         has_name = True
     elif (((tt.term == "ИМ" or tt.term == "ІМ"))
           and tt.next0_ is not None):
         if (tt.next0_.is_char('.')):
             t1 = tt.next0_.next0_
             full = True
         elif ((isinstance(tt.next0_, TextToken)) and tt.chars.is_all_lower
               and not tt.next0_.chars.is_all_lower):
             t1 = tt.next0_
         has_name = True
     elif (tt.previous is not None
           and ((tt.previous.is_value("ФОНД", None)
                 or tt.previous.is_value("ХРАМ", None)
                 or tt.previous.is_value("ЦЕРКОВЬ", "ЦЕРКВА")))):
         if ((not tt.chars.is_cyrillic_letter
              or tt.morph.class0_.is_preposition
              or tt.morph.class0_.is_conjunction)
                 or not tt.chars.is_letter):
             return None
         if (tt.whitespaces_before_count != 1):
             return None
         if (tt.chars.is_all_lower):
             return None
         if (tt.morph.class0_.is_adjective):
             npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None and npt.begin_token != npt.end_token):
                 return None
         na = OrgItemNameToken.try_attach(tt, None, False, True)
         if (na is not None):
             if (na.is_empty_word or na.is_std_name or na.is_std_tail):
                 return None
         t1 = (tt)
     if (t1 is None or ((t1.is_newline_before and not full))):
         return None
     if (tt.previous is not None
             and tt.previous.morph.class0_.is_preposition):
         return None
     if (must_has_prefix and not has_name):
         return None
     r = t1.get_referent()
     if ((r is not None and r.type_name == "DATE" and full)
             and r.find_slot("DAY", None, True) is not None
             and r.find_slot("YEAR", None, True) is None):
         dt = OrgItemEponymToken._new1797(t, t1, list())
         dt.eponyms.append(str(r).upper())
         return dt
     holy = False
     if ((t1.is_value("СВЯТОЙ", None) or t1.is_value("СВЯТИЙ", None)
          or t1.is_value("СВ", None)) or t1.is_value("СВЯТ", None)):
         t1 = t1.next0_
         holy = True
         if (t1 is not None and t1.is_char('.')):
             t1 = t1.next0_
     if (t1 is None):
         return None
     cl = t1.get_morph_class_in_dictionary()
     if (cl.is_noun or cl.is_adjective):
         rt = t1.kit.process_referent("PERSON", t1)
         if (rt is not None and rt.referent.type_name == "PERSON"
                 and rt.begin_token != rt.end_token):
             e0_ = rt.referent.get_string_value("LASTNAME")
             if (e0_ is not None):
                 if (rt.end_token.is_value(e0_, None)):
                     re = OrgItemEponymToken(t, rt.end_token)
                     re.eponyms.append(rt.end_token.get_source_text())
                     return re
     nt = NumberHelper.try_parse_anniversary(t1)
     if (nt is not None and nt.typ == NumberSpellingType.AGE):
         npt = NounPhraseHelper.try_parse(nt.end_token.next0_,
                                          NounPhraseParseAttr.NO, 0, None)
         if (npt is not None):
             s = "{0}-{1} {2}".format(
                 nt.value,
                 ("РОКІВ" if t.kit.base_language.is_ua else "ЛЕТ"),
                 MiscHelper.get_text_value(npt.begin_token, npt.end_token,
                                           GetTextAttr.NO))
             res = OrgItemEponymToken(t, npt.end_token)
             res.eponyms.append(s)
             return res
     its = OrgItemEponymToken.PersonItemToken.try_attach(t1)
     if (its is None):
         if ((isinstance(t1, ReferentToken))
                 and (isinstance(t1.get_referent(), GeoReferent))):
             s = MiscHelper.get_text_value(t1, t1, GetTextAttr.NO)
             re = OrgItemEponymToken(t, t1)
             re.eponyms.append(s)
             return re
         return None
     eponims = list()
     i = 0
     j = 0
     if (its[i].typ == OrgItemEponymToken.PersonItemType.LOCASEWORD):
         i += 1
     if (i >= len(its)):
         return None
     if (not full):
         if (its[i].begin_token.morph.class0_.is_adjective and
                 not its[i].begin_token.morph.class0_.is_proper_surname):
             return None
     if (its[i].typ == OrgItemEponymToken.PersonItemType.INITIAL):
         i += 1
         while True:
             if ((i < len(its)) and its[i].typ
                     == OrgItemEponymToken.PersonItemType.INITIAL):
                 i += 1
             if (i >= len(its) or
                 ((its[i].typ != OrgItemEponymToken.PersonItemType.SURNAME
                   and
                   its[i].typ != OrgItemEponymToken.PersonItemType.NAME))):
                 break
             eponims.append(its[i].value)
             t1 = its[i].end_token
             if ((i + 2) >= len(its) or
                     its[i + 1].typ != OrgItemEponymToken.PersonItemType.AND
                     or its[i + 2].typ !=
                     OrgItemEponymToken.PersonItemType.INITIAL):
                 break
             i += 3
     elif (((i + 1) < len(its))
           and its[i].typ == OrgItemEponymToken.PersonItemType.NAME
           and its[i + 1].typ == OrgItemEponymToken.PersonItemType.SURNAME):
         eponims.append(its[i + 1].value)
         t1 = its[i + 1].end_token
         i += 2
         if ((((i + 2) < len(its))
              and its[i].typ == OrgItemEponymToken.PersonItemType.AND
              and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME)
                 and its[i + 2].typ
                 == OrgItemEponymToken.PersonItemType.SURNAME):
             eponims.append(its[i + 2].value)
             t1 = its[i + 2].end_token
     elif (its[i].typ == OrgItemEponymToken.PersonItemType.SURNAME):
         if (len(its) == (i + 2) and its[i].chars == its[i + 1].chars):
             its[i].value += (" " + its[i + 1].value)
             its[i].end_token = its[i + 1].end_token
             del its[i + 1]
         eponims.append(its[i].value)
         if (((i + 1) < len(its)) and its[i + 1].typ
                 == OrgItemEponymToken.PersonItemType.NAME):
             if ((i + 2) == len(its)):
                 i += 1
             elif (its[i + 2].typ !=
                   OrgItemEponymToken.PersonItemType.SURNAME):
                 i += 1
         elif (((i + 1) < len(its)) and its[i + 1].typ
               == OrgItemEponymToken.PersonItemType.INITIAL):
             if ((i + 2) == len(its)):
                 i += 1
             elif (its[i + 2].typ
                   == OrgItemEponymToken.PersonItemType.INITIAL
                   and (i + 3) == len(its)):
                 i += 2
         elif (((i + 2) < len(its))
               and its[i + 1].typ == OrgItemEponymToken.PersonItemType.AND
               and its[i + 2].typ
               == OrgItemEponymToken.PersonItemType.SURNAME):
             ok = True
             npt = NounPhraseHelper.try_parse(its[i + 2].begin_token,
                                              NounPhraseParseAttr.NO, 0,
                                              None)
             if (npt is not None and not npt.morph.case_.is_genitive
                     and not npt.morph.case_.is_undefined):
                 ok = False
             if (ok):
                 eponims.append(its[i + 2].value)
                 i += 2
         t1 = its[i].end_token
     elif (its[i].typ == OrgItemEponymToken.PersonItemType.NAME and holy):
         t1 = its[i].end_token
         sec = False
         if (((i + 1) < len(its)) and its[i].chars == its[i + 1].chars
                 and its[i + 1].typ !=
                 OrgItemEponymToken.PersonItemType.INITIAL):
             sec = True
             t1 = its[i + 1].end_token
         if (sec):
             eponims.append("СВЯТ.{0} {1}".format(its[i].value,
                                                  its[i + 1].value))
         else:
             eponims.append("СВЯТ.{0}".format(its[i].value))
     elif (full and (i + 1) == len(its) and
           ((its[i].typ == OrgItemEponymToken.PersonItemType.NAME
             or its[i].typ == OrgItemEponymToken.PersonItemType.SURNAME))):
         t1 = its[i].end_token
         eponims.append(its[i].value)
     elif ((its[i].typ == OrgItemEponymToken.PersonItemType.NAME
            and len(its) == 3
            and its[i + 1].typ == OrgItemEponymToken.PersonItemType.NAME)
           and its[i + 2].typ == OrgItemEponymToken.PersonItemType.SURNAME):
         t1 = its[i + 2].end_token
         eponims.append("{0} {1} {2}".format(its[i].value, its[i + 1].value,
                                             its[i + 2].value))
         i += 2
     if (len(eponims) == 0):
         return None
     return OrgItemEponymToken._new1797(t, t1, eponims)