Python MetaToken Exemples, pullenti.ner.MetaToken.MetaToken Python Exemples

Exemple #1

0

Afficher le fichier

 def __str__(self) -> str:
     if (self.begin_name_token is None or self.end_name_token is None):
         return "?"
     mt = MetaToken(self.begin_name_token, self.end_name_token)
     if (self.type_value is None):
         return "{0}: {1}".format(self.rank, str(mt))
     else:
         return "{0}: {1} ({2})".format(self.rank, str(mt), self.type_value)

Exemple #2

0

Afficher le fichier

Fichier : MiscLocationHelper.py Projet : pullenti/PullentiPython

 def try_attach_nord_west(t: 'Token') -> 'MetaToken':
     if (not (isinstance(t, TextToken))):
         return None
     tok = MiscLocationHelper.__m_nords.try_parse(t, TerminParseAttr.NO)
     if (tok is None):
         return None
     res = MetaToken._new509(t, t, t.morph)
     t1 = None
     if ((t.next0_ is not None and t.next0_.is_hiphen
          and not t.is_whitespace_after) and not t.is_whitespace_after):
         t1 = t.next0_.next0_
     elif (t.morph.class0_.is_adjective
           and (t.whitespaces_after_count < 2)):
         t1 = t.next0_
     if (t1 is not None):
         tok = MiscLocationHelper.__m_nords.try_parse(
             t1, TerminParseAttr.NO)
         if ((tok) is not None):
             res.end_token = tok.end_token
             res.morph = tok.morph
     return res

Exemple #3

0

Afficher le fichier

 def __deserialize_token(stream : Stream, kit : 'AnalysisKit', vers : int) -> 'Token':
     from pullenti.ner.MetaToken import MetaToken
     from pullenti.ner.ReferentToken import ReferentToken
     typ = SerializerHelper.deserialize_short(stream)
     if (typ == (0)): 
         return None
     t = None
     if (typ == (1)): 
         t = (TextToken(None, kit))
     elif (typ == (2)): 
         t = (NumberToken(None, None, None, NumberSpellingType.DIGIT, kit))
     elif (typ == (3)): 
         t = (ReferentToken(None, None, None, kit))
     else: 
         t = (MetaToken(None, None, kit))
     t._deserialize(stream, kit, vers)
     if (isinstance(t, MetaToken)): 
         tt = SerializerHelper.deserialize_tokens(stream, kit, vers)
         if (tt is not None): 
             t._m_begin_token = tt
             while tt is not None: 
                 t._m_end_token = tt
                 tt = tt.next0_
     return t

Exemple #4

0

Afficher le fichier

 def tryAttachNordWest(t : 'Token') -> 'MetaToken':
     """ Выделение существительных и прилагательных типа "северо-западное", "южное"
     
     Args:
         t(Token): 
     
     """
     if (not ((isinstance(t, TextToken)))): 
         return None
     tok = MiscLocationHelper.__m_nords.tryParse(t, TerminParseAttr.NO)
     if (tok is None): 
         return None
     res = MetaToken._new561(t, t, t.morph)
     t1 = None
     if ((t.next0_ is not None and t.next0_.is_hiphen and not t.is_whitespace_after) and not t.is_whitespace_after): 
         t1 = t.next0_.next0_
     elif (t.morph.class0_.is_adjective and (t.whitespaces_after_count < 2)): 
         t1 = t.next0_
     if (t1 is not None): 
         tok = MiscLocationHelper.__m_nords.tryParse(t1, TerminParseAttr.NO)
         if ((tok) is not None): 
             res.end_token = tok.end_token
             res.morph = tok.morph
     return res

Exemple #5

0

Afficher le fichier

 def tryParse(t: 'Token',
              add_units: 'TerminCollection',
              can_be_set: bool = True,
              can_units_absent: bool = False) -> 'MeasureToken':
     """ Выделение вместе с наименованием
     
     Args:
         t(Token): 
     
     """
     if (not ((isinstance(t, TextToken)))):
         return None
     if (t.is_table_control_char):
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1516 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516)
     minmax = wrapminmax1516.value
     if (tt is not None):
         t = tt.next0_
     npt = NounPhraseHelper.tryParse(
         t,
         Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) |
                         (NounPhraseParseAttr.IGNOREBRACKETS),
                         NounPhraseParseAttr), 0)
     if (npt is None):
         whd = NumbersWithUnitToken._tryParseWHL(t)
         if (whd is not None):
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.isValue("КПД", None)):
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3
               and t0.getMorphClassInDictionary().is_undefined):
             npt = NounPhraseToken(t0, t0)
         else:
             return None
     elif (NumberHelper.tryParseRealNumber(t, True) is not None):
         return None
     else:
         dtok = DateItemToken.tryAttach(t, None)
         if (dtok is not None):
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3037 = True
     while True:
         if first_pass3037: first_pass3037 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before):
             break
         if (tt.is_table_control_char):
             break
         wrapminmax1510 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510)
         minmax = wrapminmax1510.value
         if (tt2 is not None):
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None)
              or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None)
                 or
             ((tt.isValue("СОСТАВЛЯТЬ", None)
               and not tt.getMorphClassInDictionary().is_adjective))):
             t = tt
             t1 = t
             if (tt.previous.isValue("НЕ", None)):
                 not0_ = True
             continue
         www = NumbersWithUnitToken._tryParseWHL(tt)
         if (www is not None):
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0 and tt.is_comma_and):
             continue
         if (tt.isValue("ПРИ", None) or len(internals_) > 0):
             mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False)
             if (mt1 is not None and mt1.reliable):
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken))
                 and (tt).typ == NumberSpellingType.WORDS):
             npt3 = NounPhraseHelper.tryParse(
                 tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0)
             if (npt3 is not None):
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0):
                     name_.end_token = t1
                 continue
         mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False)
         if (mt0 is not None):
             break
         if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None):
             www = NumbersWithUnitToken._tryParseWHL(tt.next0_)
             if (www is not None):
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma):
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.isChar(')')):
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.tryParseList(tt.next0_, add_units, False)
             if (uu is not None):
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.isChar('(') and t1.next0_ is not None
                         and t1.next0_.isChar(')')):
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.isChar('(')):
                     uu = UnitToken.tryParseList(t1.next0_.next0_,
                                                 add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_
                             is not None and
                             uu[len(uu) - 1].end_token.next0_.isChar(')')):
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt):
                     break
         if (BracketHelper.canBeStartOfSequence(tt, False, False)):
             br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100)
             if (br is not None):
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.isValue("НЕ", None) and tt.next0_ is not None):
             mc = tt.next0_.getMorphClassInDictionary()
             if (mc.is_adverb or mc.is_misc):
                 break
             continue
         if (tt.isValue("ЯМЗ", None)):
             pass
         npt2 = NounPhraseHelper.tryParse(
             tt,
             Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) |
                             (NounPhraseParseAttr.IGNOREBRACKETS),
                             NounPhraseParseAttr), 0)
         if (npt2 is None):
             if (tt.morph.class0_.is_preposition
                     or tt.morph.class0_.is_conjunction):
                 to = NumbersWithUnitToken.M_TERMINS.tryParse(
                     tt, TerminParseAttr.NO)
                 if (to is not None):
                     if ((isinstance(to.end_token.next0_, TextToken))
                             and to.end_token.next0_.is_letters):
                         pass
                     else:
                         break
                 t1 = tt
                 continue
             mc = tt.getMorphClassInDictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter
                  and tt.length_char > 1)
                     and (((tt.chars.is_all_upper or mc.is_adverb
                            or mc.is_undefined) or mc.is_adjective))):
                 uu = UnitToken.tryParseList(tt, add_units, False)
                 if (uu is not None):
                     if (uu[0].length_char > 2 or len(uu) > 1):
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0):
                     name_.end_token = tt
                 continue
             if (tt.is_comma):
                 continue
             if (tt.isChar('.')):
                 if (not MiscHelper.canBeStartOfSentence(tt.next0_)):
                     continue
                 uu = UnitToken.tryParseList(tt.next0_, add_units, False)
                 if (uu is not None):
                     if (uu[0].length_char > 2 or len(uu) > 1):
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0):
             pass
         elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None)
               or t.isValue("ДИАПАЗОН", None)):
             pass
         elif (t.chars.is_letter):
             name_.end_token = t1
     t1 = t1.next0_
     first_pass3038 = True
     while True:
         if first_pass3038: first_pass3038 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char):
             pass
         elif (t1.isCharOf(":,_")):
             www = NumbersWithUnitToken._tryParseWHL(t1.next0_)
             if (www is not None):
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
         elif (t1.is_hiphen and t1.is_whitespace_after
               and t1.is_whitespace_before):
             pass
         else:
             break
     if (t1 is None):
         return None
     mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_)
     if (mts is None):
         return None
     mt = mts[0]
     if (name_.begin_token.morph.class0_.is_preposition):
         name_.begin_token = name_.begin_token.next0_
     if (len(mts) > 1 and len(internals_) == 0):
         if (len(mt.units) == 0):
             if (units is not None):
                 for m in mts:
                     m.units = units
         res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token,
                                      name_.morph, True)
         res1.name = MiscHelper.getTextValueOfMetaToken(
             name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts):
             ttt = MeasureToken._new1506(mts[k].begin_token,
                                         mts[k].end_token, mts[k])
             if (whd is not None):
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)):
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.isChar('±')):
             nn = NumbersWithUnitToken._tryParse(tt1, add_units, True,
                                                 False)
             if (nn is not None and nn.plus_minus_percent):
                 res1.end_token = nn.end_token
                 res1.nums = nn
         return res1
     if (not mt.is_whitespace_before):
         if (mt.begin_token.previous is None):
             return None
         if (mt.begin_token.previous.isCharOf(":),")
                 or mt.begin_token.previous.is_table_control_char):
             pass
         else:
             return None
     if (len(mt.units) == 0 and units is not None):
         mt.units = units
         if (mt.div_num is not None and len(units) > 1
                 and len(mt.div_num.units) == 0):
             i = 1
             while i < len(units):
                 if (units[i].pow0_ == -1):
                     j = i
                     while j < len(units):
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (-units[j].pow0_)
                         j += 1
                     del mt.units[i:i + len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None):
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None):
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0):
         units = UnitToken.tryParseList(mt.end_token.next0_, add_units,
                                        True)
         if (units is None):
             if (can_units_absent):
                 pass
             else:
                 return None
         else:
             mt.units = units
     res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None
           and t0 == name_.begin_token) and t0.previous.is_hiphen
          and not t0.previous.is_whitespace_before)
             and (isinstance(t0.previous.previous, TextToken))):
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.getTextValueOfMetaToken(
         name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
     res.nums = mt
     for u in res.nums.units:
         if (u.keyword is not None):
             if (u.keyword.begin_char >= res.begin_char):
                 res.reliable = True
     res.__parseInternals(add_units)
     if (len(res.internals) > 0 or not can_be_set):
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and):
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False)
     if ((mts1 is not None and len(mts1) == 1 and
          (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0
             and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)):
         res.is_set = True
         res.nums = (None)
         res.internals.append(
             MeasureToken._new1506(mt.begin_token, mt.end_token, mt))
         res.internals.append(
             MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token,
                                   mts1[0]))
         res.end_token = mts1[0].end_token
     return res

Exemple #6

0

Afficher le fichier

 def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken':
     if (not (isinstance(t, TextToken))): 
         return None
     if (t.is_table_control_char): 
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1625 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625)
     minmax = wrapminmax1625.value
     if (tt is not None): 
         t = tt.next0_
     npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None)
     if (npt is None): 
         whd = NumbersWithUnitToken._try_parsewhl(t)
         if (whd is not None): 
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.is_value("КПД", None)): 
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): 
             npt = NounPhraseToken(t0, t0)
         elif (t0.is_value("T", None) and t0.chars.is_all_lower): 
             npt = NounPhraseToken(t0, t0)
             t = t0
             if (t.next0_ is not None and t.next0_.is_char('=')): 
                 npt.end_token = t.next0_
         elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): 
             if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                 return None
             npt = NounPhraseToken(t0, t0)
             t = t0.next0_
             while t is not None: 
                 if (t.whitespaces_before_count > 2): 
                     break
                 elif (not (isinstance(t, TextToken))): 
                     break
                 elif (not t.chars.is_letter): 
                     br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
                     if (br is not None): 
                         t = br.end_token
                         npt.end_token = t
                     else: 
                         break
                 elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                     break
                 else: 
                     npt.end_token = t
                 t = t.next0_
         else: 
             return None
     elif (NumberHelper.try_parse_real_number(t, True, False) is not None): 
         return None
     else: 
         dtok = DateItemToken.try_attach(t, None, False)
         if (dtok is not None): 
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3305 = True
     while True:
         if first_pass3305: first_pass3305 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before): 
             break
         if (tt.is_table_control_char): 
             break
         wrapminmax1617 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617)
         minmax = wrapminmax1617.value
         if (tt2 is not None): 
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): 
             t = tt
             t1 = t
             if (tt.previous.is_value("НЕ", None)): 
                 not0_ = True
             continue
         www = NumbersWithUnitToken._try_parsewhl(tt)
         if (www is not None): 
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (tt.is_value("ПРИ", None)): 
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
             n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False)
             if (n1 is not None and len(n1.units) > 0): 
                 mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1)
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): 
             tt = tt.next0_
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0): 
             if (tt.is_char(':')): 
                 break
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None and mt1.reliable): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): 
             npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None)
             if (npt3 is not None): 
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
         if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt.next0_
             tt = t
             t1 = tt
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt
             t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): 
             tt = tt.next0_.next0_
             t = tt
             t1 = t
             npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
             if (npt1 is not None and npt1.end_char > tt.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((isinstance(tt, NumberToken)) and tt.previous is not None): 
             if (tt.previous.is_value("USB", None)): 
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 ttt = tt.next0_
                 while ttt is not None: 
                     if (ttt.is_whitespace_before): 
                         break
                     if (ttt.is_char_of(",:")): 
                         break
                     tt = ttt
                     t = tt
                     t1 = t
                     if (len(internals_) == 0): 
                         name_.end_token = t1
                     ttt = ttt.next0_
                 continue
         mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False)
         if (mt0 is not None): 
             npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
             if (npt1 is not None and npt1.end_char > mt0.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
             break
         if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): 
             www = NumbersWithUnitToken._try_parsewhl(tt.next0_)
             if (www is not None): 
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma): 
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.is_char(')')): 
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
             if (uu is not None): 
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): 
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.is_char('(')): 
                     uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): 
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                     www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
                     if (www is not None): 
                         whd = www
                         tt = www.end_token
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): 
                     break
                 if (t1.next0_ is not None): 
                     if (t1.next0_.is_table_control_char or t1.is_newline_after): 
                         break
                 units = (None)
         if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): 
             br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
             if (br is not None): 
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("НЕ", None) and tt.next0_ is not None): 
             mc = tt.next0_.get_morph_class_in_dictionary()
             if (mc.is_adverb or mc.is_misc): 
                 break
             continue
         if (tt.is_value("ЯМЗ", None)): 
             pass
         npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None)
         if (npt2 is None): 
             if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): 
                 to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO)
                 if (to is not None): 
                     if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): 
                         pass
                     else: 
                         break
                 t1 = tt
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): 
                 uu = UnitToken.try_parse_list(tt, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 1 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = tt
                 continue
             if (tt.is_comma): 
                 continue
             if (tt.is_char('.')): 
                 if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): 
                     continue
                 uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 2 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0): 
             pass
         elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): 
             pass
         elif (t.chars.is_letter): 
             name_.end_token = t1
     t11 = t1
     t1 = t1.next0_
     first_pass3306 = True
     while True:
         if first_pass3306: first_pass3306 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char): 
             pass
         elif (t1.is_char_of(":,_")): 
             if (is_resctriction): 
                 return None
             www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
             if (www is not None): 
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
             uu = UnitToken.try_parse_list(t1.next0_, add_units, False)
             if (uu is not None): 
                 if (uu[0].length_char > 1 or len(uu) > 1): 
                     units = uu
                     t = uu[len(uu) - 1].end_token
                     t1 = t
                     continue
             if (t1.is_char(':')): 
                 li = list()
                 ttt = t1.next0_
                 first_pass3307 = True
                 while True:
                     if first_pass3307: first_pass3307 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_hiphen or ttt.is_table_control_char): 
                         continue
                     if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): 
                         continue
                     mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True)
                     if (mt1 is None): 
                         break
                     li.append(mt1)
                     ttt = mt1.end_token
                     if (ttt.next0_ is not None and ttt.next0_.is_char(';')): 
                         ttt = ttt.next0_
                     if (ttt.is_char(';')): 
                         pass
                     elif (ttt.is_newline_after and mt1.is_newline_before): 
                         pass
                     else: 
                         break
                 if (len(li) > 1): 
                     res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True)
                     if (internals_ is not None and len(internals_) > 0): 
                         res0.internal_ex = internals_[0]
                     nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                     li[0].begin_token = t0
                     for v in li: 
                         v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip()
                         if (v.nums is not None and len(v.nums.units) == 0 and units is not None): 
                             v.nums.units = units
                     return res0
         elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): 
             pass
         elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): 
             pass
         else: 
             break
     if (t1 is None): 
         return None
     mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction)
     if (mts is None): 
         if (units is not None and len(units) > 0): 
             if (t1 is None or t1.previous.is_char(':')): 
                 mts = list()
                 if (t1 is None): 
                     t1 = t11
                     while t1 is not None and t1.next0_ is not None: 
                         pass
                         t1 = t1.next0_
                 else: 
                     t1 = t1.previous
                 mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan))
         if (mts is None): 
             return None
     mt = mts[0]
     if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): 
         return None
     if (not is_subval and name_.begin_token.morph.class0_.is_preposition): 
         name_.begin_token = name_.begin_token.next0_
     if (mt.whl is not None): 
         whd = mt.whl
     for kk in range(10):
         if (whd is not None and whd.end_token == name_.end_token): 
             name_.end_token = whd.begin_token.previous
             continue
         if (units is not None): 
             if (units[len(units) - 1].end_token == name_.end_token): 
                 name_.end_token = units[0].begin_token.previous
                 continue
         break
     if (len(mts) > 1 and len(internals_) == 0): 
         if (len(mt.units) == 0): 
             if (units is not None): 
                 for m in mts: 
                     m.units = units
         res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True)
         res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts): 
             ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k])
             if (whd is not None): 
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)): 
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.is_char('±')): 
             nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False)
             if (nn is not None and nn.plus_minus_percent): 
                 res1.end_token = nn.end_token
                 res1.nums = nn
                 if (len(nn.units) > 0 and units is None and len(mt.units) == 0): 
                     for m in mts: 
                         m.units = nn.units
         return res1
     if (not mt.is_whitespace_before): 
         if (mt.begin_token.previous is None): 
             return None
         if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): 
             pass
         elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): 
             pass
         else: 
             return None
     if (len(mt.units) == 0 and units is not None): 
         mt.units = units
         if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): 
             i = 1
             while i < len(units): 
                 if (units[i].pow0_ == -1): 
                     j = i
                     while j < len(units): 
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (- units[j].pow0_)
                         j += 1
                     del mt.units[i:i+len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None): 
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None): 
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0): 
         units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True)
         if (units is None): 
             if (can_units_absent): 
                 pass
             else: 
                 return None
         else: 
             mt.units = units
     res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): 
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO))
     res.nums = mt
     for u in res.nums.units: 
         if (u.keyword is not None): 
             if (u.keyword.begin_char >= res.begin_char): 
                 res.reliable = True
     res.__parse_internals(add_units)
     if (len(res.internals) > 0 or not can_be_set): 
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and): 
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False)
     if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): 
         res.is_set = True
         res.nums = (None)
         res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt))
         res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0]))
         res.end_token = mts1[0].end_token
     return res

Exemple #7

0

Afficher le fichier

 def _tryParseWHL(t: 'Token') -> 'MetaToken':
     """ Это распознавание написаний ГхШхВ
     
     Args:
         t(Token): 
     
     """
     if (not ((isinstance(t, TextToken)))):
         return None
     if (t.isCharOf(":-")):
         re0 = NumbersWithUnitToken._tryParseWHL(t.next0_)
         if (re0 is not None):
             return re0
     if (t.isCharOf("(")):
         re0 = NumbersWithUnitToken._tryParseWHL(t.next0_)
         if (re0 is not None):
             if (re0.end_token.next0_ is not None
                     and re0.end_token.next0_.isChar(')')):
                 re0.begin_token = t
                 re0.end_token = re0.end_token.next0_
                 return re0
     txt = (t).term
     nams = None
     if (len(txt) == 5 and txt[1] == 'Х' and txt[3] == 'Х'):
         nams = list()
         for i in range(3):
             ch = txt[i * 2]
             if (ch == 'Г'):
                 nams.append("ГЛУБИНА")
             elif (ch == 'В'):
                 nams.append("ВЫСОТА")
             elif (ch == 'Ш'):
                 nams.append("ШИРИНА")
             elif (ch == 'Д'):
                 nams.append("ДЛИНА")
             else:
                 return None
         return MetaToken._new809(t, t, nams)
     t0 = t
     t1 = t
     while t is not None:
         if (not ((isinstance(t, TextToken)))
                 or ((t.whitespaces_before_count > 1 and t != t0))):
             break
         term = (t).term
         nam = None
         if ((t.isValue("ДЛИНА", None) or t.isValue("ДЛИННА", None)
              or term == "Д") or term == "ДЛ" or term == "ДЛИН"):
             nam = "ДЛИНА"
         elif ((t.isValue("ШИРИНА", None) or t.isValue("ШИРОТА", None)
                or term == "Ш") or term == "ШИР" or term == "ШИРИН"):
             nam = "ШИРИНА"
         elif ((t.isValue("ГЛУБИНА", None) or term == "Г" or term == "ГЛ")
               or term == "ГЛУБ"):
             nam = "ГЛУБИНА"
         elif (t.isValue("ВЫСОТА", None) or term == "В" or term == "ВЫС"):
             nam = "ВЫСОТА"
         else:
             break
         if (nams is None):
             nams = list()
         nams.append(nam)
         t1 = t
         if (t.next0_ is not None and t.next0_.isChar('.')):
             t = t.next0_
             t1 = t
         if (t.next0_ is None):
             break
         if (MeasureHelper.isMultChar(t.next0_) or t.next0_.is_comma
                 or t.next0_.isCharOf("\\/")):
             t = t.next0_
         t = t.next0_
     if (nams is None or (len(nams) < 2)):
         return None
     return MetaToken._new809(t0, t1, nams)

Exemple #8

0

Afficher le fichier

 def __tryParseThesis(t: 'Token') -> 'ReferentToken':
     if (t is None):
         return None
     t0 = t
     tt = t
     mc = tt.getMorphClassInDictionary()
     preamb = None
     if (mc.is_conjunction):
         return None
     if (t.isValue("LET", None)):
         return None
     if (mc.is_preposition or mc.is_misc or mc.is_adverb):
         if (not MiscHelper.isEngArticle(tt)):
             tt = tt.next0_
             first_pass2871 = True
             while True:
                 if first_pass2871: first_pass2871 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_comma):
                     break
                 if (tt.isChar('(')):
                     br = BracketHelper.tryParse(tt, BracketParseAttr.NO,
                                                 100)
                     if (br is not None):
                         tt = br.end_token
                         continue
                 if (MiscHelper.canBeStartOfSentence(tt)):
                     break
                 npt0 = NounPhraseHelper.tryParse(
                     tt,
                     Utils.valToEnum(
                         (NounPhraseParseAttr.PARSENUMERICASADJECTIVE) |
                         (NounPhraseParseAttr.REFERENTCANBENOUN),
                         NounPhraseParseAttr), 0)
                 if (npt0 is not None):
                     tt = npt0.end_token
                     continue
                 if (tt.getMorphClassInDictionary().is_verb):
                     break
             if (tt is None or not tt.is_comma or tt.next0_ is None):
                 return None
             preamb = MetaToken(t0, tt.previous)
             tt = tt.next0_
     t1 = tt
     mc = tt.getMorphClassInDictionary()
     npt = NounPhraseHelper.tryParse(
         tt,
         Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) |
                         (NounPhraseParseAttr.REFERENTCANBENOUN) |
                         (NounPhraseParseAttr.PARSEADVERBS),
                         NounPhraseParseAttr), 0)
     if (npt is None and (isinstance(tt, TextToken))):
         if (tt.chars.is_all_upper):
             npt = NounPhraseToken(tt, tt)
         elif (not tt.chars.is_all_lower):
             if (mc.is_proper or preamb is not None):
                 npt = NounPhraseToken(tt, tt)
     if (npt is None):
         return None
     if (mc.is_personal_pronoun):
         return None
     t2 = npt.end_token.next0_
     if (t2 is None or MiscHelper.canBeStartOfSentence(t2)
             or not ((isinstance(t2, TextToken)))):
         return None
     if (not t2.getMorphClassInDictionary().is_verb):
         return None
     t3 = t2
     tt = t2.next0_
     while tt is not None:
         if (not tt.getMorphClassInDictionary().is_verb):
             break
         tt = tt.next0_
     first_pass2872 = True
     while True:
         if first_pass2872: first_pass2872 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.next0_ is None):
             t3 = tt
             break
         if (tt.isCharOf(".;!?")):
             if (MiscHelper.canBeStartOfSentence(tt.next0_)):
                 t3 = tt
                 break
         if (not ((isinstance(tt, TextToken)))):
             continue
         if (BracketHelper.canBeStartOfSequence(tt, False, False)):
             br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100)
             if (br is not None):
                 tt = br.end_token
                 continue
     tt = t3
     if (t3.isCharOf(";.!?")):
         tt = tt.previous
     txt = MiscHelper.getTextValue(
         t2, tt,
         Utils.valToEnum(
             (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES),
             GetTextAttr))
     if (txt is None or (len(txt) < 15)):
         return None
     if (t0 != t1):
         tt = t1.previous
         if (tt.is_comma):
             tt = tt.previous
         txt0 = MiscHelper.getTextValue(
             t0, tt,
             Utils.valToEnum(
                 (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES),
                 GetTextAttr))
         if (txt0 is not None and len(txt0) > 10):
             if (t0.chars.is_capital_upper):
                 txt0 = ((str.lower(txt0[0])) + txt0[1:])
             txt = "{0}, {1}".format(txt, txt0)
     tt = t1
     if (MiscHelper.isEngArticle(tt)):
         tt = tt.next0_
     nam = MiscHelper.getTextValue(tt, t2.previous, GetTextAttr.KEEPQUOTES)
     if (nam.startswith("SO-CALLED")):
         nam = nam[9:].strip()
     dr = DefinitionReferent()
     dr.kind = DefinitionKind.ASSERTATION
     dr.addSlot(DefinitionReferent.ATTR_TERMIN, nam, False, 0)
     dr.addSlot(DefinitionReferent.ATTR_VALUE, txt, False, 0)
     return ReferentToken(dr, t0, t3)

Exemple #9

0

Afficher le fichier

Fichier : SentItem.py Projet : pullenti/PullentiPython

 def parse_near_items(t : 'Token', t1 : 'Token', lev : int, prev : typing.List['SentItem']) -> typing.List['SentItem']:
     if (lev > 100): 
         return None
     if (t is None or t.begin_char > t1.end_char): 
         return None
     res = list()
     if (isinstance(t, ReferentToken)): 
         res.append(SentItem(Utils.asObjectOrNull(t, MetaToken)))
         return res
     delim = DelimToken.try_parse(t)
     if (delim is not None): 
         res.append(SentItem(delim))
         return res
     conj = ConjunctionHelper.try_parse(t)
     if (conj is not None): 
         res.append(SentItem(conj))
         return res
     prep_ = PrepositionHelper.try_parse(t)
     t111 = (t if prep_ is None else prep_.end_token.next0_)
     if ((isinstance(t111, NumberToken)) and ((t111.morph.class0_.is_adjective and not t111.morph.class0_.is_noun))): 
         t111 = (None)
     num = (None if t111 is None else NumbersWithUnitToken.try_parse(t111, None, False, False, False, False))
     if (num is not None): 
         if (len(num.units) == 0): 
             npt1 = NounPhraseHelper.try_parse(num.end_token.next0_, SentItem.__m_npt_attrs, 0, None)
             if (npt1 is None and num.end_token.next0_ is not None and num.end_token.next0_.is_value("РАЗ", None)): 
                 npt1 = NounPhraseToken(num.end_token.next0_, num.end_token.next0_)
                 npt1.noun = MetaToken(num.end_token.next0_, num.end_token.next0_)
             if (npt1 is not None and prep_ is not None): 
                 if (npt1.noun.end_token.is_value("РАЗ", None)): 
                     npt1.morph.remove_items(prep_.next_case, False)
                 elif (((npt1.morph.case_) & prep_.next_case).is_undefined): 
                     npt1 = (None)
                 else: 
                     npt1.morph.remove_items(prep_.next_case, False)
             if ((npt1 is not None and npt1.end_token.is_value("ОНИ", None) and npt1.preposition is not None) and npt1.preposition.normal == "ИЗ"): 
                 npt1.morph = MorphCollection(num.end_token.morph)
                 npt1.preposition = (None)
                 nn = str(num)
                 si1 = SentItem(npt1)
                 if (nn == "1" and (isinstance(num.end_token, NumberToken)) and num.end_token.end_token.is_value("ОДИН", None)): 
                     a = SemAttribute._new2946(SemAttributeType.ONEOF, num.end_token.end_token.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False))
                     aex = SemAttributeEx._new2945(num, a)
                     si1.attrs = list()
                     si1.attrs.append(aex)
                 else: 
                     si1.quant = SemQuantity(nn, num.begin_token, num.end_token)
                 if (prep_ is not None): 
                     si1.prep = prep_.normal
                 res.append(si1)
                 return res
             if (npt1 is not None): 
                 si1 = SentItem._new2948(npt1, SemQuantity(str(num), num.begin_token, num.end_token))
                 if (prep_ is not None): 
                     si1.prep = prep_.normal
                 if (npt1.end_token.is_value("РАЗ", None)): 
                     si1.typ = SentItemType.FORMULA
                 if (((npt1.morph.number) & (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED) and si1.quant.spelling != "1"): 
                     ok = False
                     if (si1.quant.spelling.endswith("1")): 
                         ok = True
                     elif (si1.typ == SentItemType.FORMULA): 
                         ok = True
                     elif (si1.quant.spelling.endswith("2") and npt1.morph.case_.is_genitive): 
                         ok = True
                     elif (si1.quant.spelling.endswith("3") and npt1.morph.case_.is_genitive): 
                         ok = True
                     elif (si1.quant.spelling.endswith("4") and npt1.morph.case_.is_genitive): 
                         ok = True
                     if (ok): 
                         npt1.morph = MorphCollection()
                         npt1.morph.number = MorphNumber.PLURAL
                 res.append(si1)
                 return res
         num.begin_token = t
         num.morph = MorphCollection(num.end_token.morph)
         si = SentItem(num)
         if (prep_ is not None): 
             si.prep = prep_.normal
         res.append(si)
         if (si.prep == "НА"): 
             aa = AdverbToken.try_parse(si.end_token.next0_)
             if (aa is not None and ((aa.typ == SemAttributeType.LESS or aa.typ == SemAttributeType.GREAT))): 
                 si.add_attr(aa)
                 si.end_token = aa.end_token
         return res
     mc = t.get_morph_class_in_dictionary()
     adv = AdverbToken.try_parse(t)
     npt = NounPhraseHelper.try_parse(t, SentItem.__m_npt_attrs, 0, None)
     if (npt is not None and (isinstance(npt.end_token, TextToken)) and npt.end_token.term == "БЫЛИ"): 
         npt = (None)
     if (npt is not None and adv is not None): 
         if (adv.end_char > npt.end_char): 
             npt = (None)
         elif (adv.end_char == npt.end_char): 
             res.append(SentItem(npt))
             res.append(SentItem(adv))
             return res
     if (npt is not None and len(npt.adjectives) == 0): 
         if (npt.end_token.is_value("КОТОРЫЙ", None) and t.previous is not None and t.previous.is_comma_and): 
             res1 = SentItem.__parse_subsent(npt, t1, lev + 1, prev)
             if (res1 is not None): 
                 return res1
         if (npt.end_token.is_value("СКОЛЬКО", None)): 
             tt1 = npt.end_token.next0_
             if (tt1 is not None and tt1.is_value("ВСЕГО", None)): 
                 tt1 = tt1.next0_
             npt1 = NounPhraseHelper.try_parse(tt1, NounPhraseParseAttr.NO, 0, None)
             if (npt1 is not None and not npt1.morph.case_.is_undefined and prep_ is not None): 
                 if (((prep_.next_case) & npt1.morph.case_).is_undefined): 
                     npt1 = (None)
                 else: 
                     npt1.morph.remove_items(prep_.next_case, False)
             if (npt1 is not None): 
                 npt1.begin_token = npt.begin_token
                 npt1.preposition = npt.preposition
                 npt1.adjectives.append(MetaToken(npt.end_token, npt.end_token))
                 npt = npt1
         if (npt.end_token.morph.class0_.is_adjective): 
             if (VerbPhraseHelper.try_parse(t, True, False, False) is not None): 
                 npt = (None)
     vrb = None
     if (npt is not None and len(npt.adjectives) > 0): 
         vrb = VerbPhraseHelper.try_parse(t, True, False, False)
         if (vrb is not None and vrb.first_verb.is_participle): 
             npt = (None)
     elif (adv is None or npt is not None): 
         vrb = VerbPhraseHelper.try_parse(t, True, False, False)
     if (npt is not None): 
         res.append(SentItem(npt))
     if (vrb is not None and not vrb.first_verb.is_participle and not vrb.first_verb.is_dee_participle): 
         vars0_ = list()
         for wf in vrb.first_verb.morph.items: 
             if (wf.class0_.is_verb and (isinstance(wf, MorphWordForm)) and wf.is_in_dictionary): 
                 vars0_.append(Utils.asObjectOrNull(wf, MorphWordForm))
         if (len(vars0_) < 2): 
             res.append(SentItem(vrb))
         else: 
             vrb.first_verb.verb_morph = vars0_[0]
             res.append(SentItem(vrb))
             i = 1
             while i < len(vars0_): 
                 vrb = VerbPhraseHelper.try_parse(t, False, False, False)
                 if (vrb is None): 
                     break
                 vrb.first_verb.verb_morph = vars0_[i]
                 res.append(SentItem(vrb))
                 i += 1
             if (vars0_[0].misc.mood == MorphMood.IMPERATIVE and vars0_[1].misc.mood != MorphMood.IMPERATIVE): 
                 rr = res[0]
                 res[0] = res[1]
                 res[1] = rr
         return res
     if (vrb is not None): 
         res1 = SentItem.__parse_participles(vrb, t1, lev + 1)
         if (res1 is not None): 
             res.extend(res1)
     if (len(res) > 0): 
         return res
     if (adv is not None): 
         if (adv.typ == SemAttributeType.OTHER): 
             npt1 = NounPhraseHelper.try_parse(adv.end_token.next0_, SentItem.__m_npt_attrs, 0, None)
             if (npt1 is not None and npt1.end_token.is_value("ОНИ", None) and npt1.preposition is not None): 
                 si1 = SentItem(npt1)
                 a = SemAttribute._new2946(SemAttributeType.OTHER, adv.end_token.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False))
                 aex = SemAttributeEx._new2945(num, a)
                 si1.attrs = list()
                 si1.attrs.append(aex)
                 if (prep_ is not None): 
                     si1.prep = prep_.normal
                 res.append(si1)
                 return res
             for i in range(len(prev) - 1, -1, -1):
                 if (prev[i].attrs is not None): 
                     for a in prev[i].attrs: 
                         if (a.attr.typ == SemAttributeType.ONEOF): 
                             si1 = SentItem(prev[i].source)
                             aa = SemAttribute._new2946(SemAttributeType.OTHER, adv.end_token.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False))
                             aex = SemAttributeEx._new2945(adv, aa)
                             si1.attrs = list()
                             si1.attrs.append(aex)
                             if (prep_ is not None): 
                                 si1.prep = prep_.normal
                             si1.begin_token = adv.begin_token
                             si1.end_token = adv.end_token
                             res.append(si1)
                             return res
         res.append(SentItem(adv))
         return res
     if (mc.is_adjective): 
         npt = NounPhraseToken._new2953(t, t, MorphCollection(t.morph))
         npt.noun = MetaToken(t, t)
         res.append(SentItem(npt))
         return res
     return None

Exemple #10

0

Afficher le fichier

Fichier : DecreeHelper.py Projet : MihaJjDa/APCLtask

 def checkNds(t : 'Token', nds : float=18, nds_mustbe_money : bool=False) -> 'MetaToken':
     """ Проверка корректности НДС для суммы
     
     Args:
         t(Token): Указывает на значение, для которой должно далее следовать НДС
         nds(float): 
     
     """
     if (t is None or nds <= 0): 
         return None
     m = Utils.asObjectOrNull(t.getReferent(), MoneyReferent)
     if (m is None): 
         return None
     has_nds = False
     has_nds_perc = False
     has_all = False
     incl = False
     m1 = None
     ndst0 = None
     ndst1 = None
     tt = t.next0_
     first_pass2835 = True
     while True:
         if first_pass2835: first_pass2835 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.isValue("НДС", None)): 
             has_nds = True
             ndst1 = tt
             ndst0 = ndst1
             continue
         if (isinstance(tt, ReferentToken)): 
             m1 = (Utils.asObjectOrNull(tt.getReferent(), MoneyReferent))
             break
         if (isinstance(tt, NumberToken)): 
             ne = NumberHelper.tryParseNumberWithPostfix(tt)
             if (ne is not None and ne.ex_typ == NumberExType.PERCENT): 
                 if (math.fabs(ne.real_value - nds) > .0001): 
                     ok = False
                     if (has_nds): 
                         ok = True
                     if (ok): 
                         return MetaToken._new809(tt, ne.end_token, "Размер НДС должен быть {0}%, а не {1}%".format(nds, ne.real_value))
                 ndst1 = ne.end_token
                 tt = ndst1
                 has_nds_perc = True
                 continue
         if (tt.isValue("ВСЕГО", None)): 
             has_all = True
             continue
         if (tt.isValue("ТОМ", None) or tt.isValue("ЧИСЛО", None) or tt.isValue("ВКЛЮЧАЯ", None)): 
             incl = True
             continue
         if ((tt.isValue("КРОМЕ", None) or tt.isValue("ТОГО", None) or tt.isValue("РАЗМЕР", None)) or tt.isValue("СУММА", None) or tt.isValue("СТАВКА", None)): 
             continue
         if (((tt.isValue("Т", None) and tt.next0_ is not None and tt.next0_.isChar('.')) and tt.next0_.next0_ is not None and tt.next0_.next0_.isValue("Ч", None)) and tt.next0_.next0_.next0_ is not None and tt.next0_.next0_.next0_.isChar('.')): 
             incl = True
             tt = tt.next0_.next0_.next0_
             continue
         if (not tt.chars.is_letter or tt.morph.class0_.is_preposition): 
             continue
         break
     if (not has_nds): 
         return None
     if (m1 is None): 
         if (nds_mustbe_money): 
             return MetaToken._new809(ndst0, ndst1, "Размер НДС должен быть в денежном выражении")
         return None
     if (has_all): 
         return None
     must_be = m.real_value
     must_be = (must_be * ((nds / (100))))
     if (incl): 
         must_be /= (((1) + ((nds / (100)))))
     dd = must_be * (100)
     dd -= (math.floor(dd))
     dd /= (100)
     must_be -= dd
     if (dd >= .005): 
         must_be += .01
     real = m1.real_value
     delta = must_be - real
     if (delta < 0): 
         delta = (- delta)
     if (delta > .011): 
         if ((delta < 1) and m1.rest == 0 and m.rest == 0): 
             pass
         else: 
             mr = MoneyReferent._new811(m1.currency, must_be)
             return MetaToken._new809(t, tt, "Размер НДС должен быть {0}, а не {1}".format(DecreeHelper.__outMoney(mr), DecreeHelper.__outMoney(m1)))
     if (incl): 
         return None
     m2 = None
     has_all = False
     tt = tt.next0_
     first_pass2836 = True
     while True:
         if first_pass2836: first_pass2836 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (isinstance(tt, ReferentToken)): 
             m2 = (Utils.asObjectOrNull(tt.getReferent(), MoneyReferent))
             break
         if (not tt.chars.is_letter or tt.morph.class0_.is_preposition): 
             continue
         if (tt.isValue("ВСЕГО", None)): 
             has_all = True
             continue
         if (tt.isValue("НДС", None) or tt.isValue("ВМЕСТЕ", None)): 
             continue
         break
     if (m2 is not None and has_all): 
         must_be = (m.real_value + m1.real_value)
         delta = (must_be - m2.real_value)
         if (delta < 0): 
             delta = (- delta)
         if (delta > .01): 
             mr = MoneyReferent._new811(m1.currency, must_be)
             err = "Всего с НДС должно быть {0}, а не {1}".format(DecreeHelper.__outMoney(mr), DecreeHelper.__outMoney(m2))
             return MetaToken._new809(t, tt, err)
     return None

Exemple #11

0

Afficher le fichier

Fichier : NumbersWithUnitToken.py Projet : pullenti/PullentiPython

 def _try_parsewhl(t : 'Token') -> 'MetaToken':
     if (not (isinstance(t, TextToken))): 
         return None
     if (t.is_char_of(":-")): 
         re0 = NumbersWithUnitToken._try_parsewhl(t.next0_)
         if (re0 is not None): 
             return re0
     if (t.is_char_of("(")): 
         re0 = NumbersWithUnitToken._try_parsewhl(t.next0_)
         if (re0 is not None): 
             if (re0.end_token.next0_ is not None and re0.end_token.next0_.is_char(')')): 
                 re0.begin_token = t
                 re0.end_token = re0.end_token.next0_
                 return re0
     txt = t.term
     nams = None
     if (len(txt) == 5 and ((txt[1] == 'Х' or txt[1] == 'X')) and ((txt[3] == 'Х' or txt[3] == 'X'))): 
         nams = list()
         for i in range(3):
             ch = txt[i * 2]
             if (ch == 'Г'): 
                 nams.append("ГЛУБИНА")
             elif (ch == 'В' or ch == 'H' or ch == 'Н'): 
                 nams.append("ВЫСОТА")
             elif (ch == 'Ш' or ch == 'B' or ch == 'W'): 
                 nams.append("ШИРИНА")
             elif (ch == 'Д' or ch == 'L'): 
                 nams.append("ДЛИНА")
             elif (ch == 'D'): 
                 nams.append("ДИАМЕТР")
             else: 
                 return None
         return MetaToken._new836(t, t, nams)
     t0 = t
     t1 = t
     while t is not None: 
         if (not (isinstance(t, TextToken)) or ((t.whitespaces_before_count > 1 and t != t0))): 
             break
         term = t.term
         if (term.endswith("X") or term.endswith("Х")): 
             term = term[0:0+len(term) - 1]
         nam = None
         if (((t.is_value("ДЛИНА", None) or t.is_value("ДЛИННА", None) or term == "Д") or term == "ДЛ" or term == "ДЛИН") or term == "L"): 
             nam = "ДЛИНА"
         elif (((t.is_value("ШИРИНА", None) or t.is_value("ШИРОТА", None) or term == "Ш") or term == "ШИР" or term == "ШИРИН") or term == "W" or term == "B"): 
             nam = "ШИРИНА"
         elif ((t.is_value("ГЛУБИНА", None) or term == "Г" or term == "ГЛ") or term == "ГЛУБ"): 
             nam = "ГЛУБИНА"
         elif ((t.is_value("ВЫСОТА", None) or term == "В" or term == "ВЫС") or term == "H" or term == "Н"): 
             nam = "ВЫСОТА"
         elif (t.is_value("ДИАМЕТР", None) or term == "D" or term == "ДИАМ"): 
             nam = "ДИАМЕТР"
         else: 
             break
         if (nams is None): 
             nams = list()
         nams.append(nam)
         t1 = t
         if (t.next0_ is not None and t.next0_.is_char('.')): 
             t = t.next0_
             t1 = t
         if (t.next0_ is None): 
             break
         if (MeasureHelper.is_mult_char(t.next0_) or t.next0_.is_comma or t.next0_.is_char_of("\\/")): 
             t = t.next0_
         t = t.next0_
     if (nams is None or (len(nams) < 2)): 
         return None
     return MetaToken._new836(t0, t1, nams)