def createRefenetsTokensWithRegister( self, ad: 'AnalyzerData', name: str, regist: bool = True) -> typing.List['ReferentToken']: res = list() for u in self.units: rt = ReferentToken(u.createReferentWithRegister(ad), u.begin_token, u.end_token) res.append(rt) mr = MeasureReferent() templ = "1" if (self.single_val is not None): mr.addValue(self.single_val) if (self.plus_minus is not None): templ = "[1 ±2{0}]".format( ("%" if self.plus_minus_percent else "")) mr.addValue(self.plus_minus) elif (self.about): templ = "~1" else: if (self.not0_ and ((self.from_val is None or self.to_val is None))): b = self.from_include self.from_include = self.to_include self.to_include = b v = self.from_val self.from_val = self.to_val self.to_val = v num = 1 if (self.from_val is not None): mr.addValue(self.from_val) templ = ("[1" if self.from_include else "]1") num += 1 else: templ = "]" if (self.to_val is not None): mr.addValue(self.to_val) templ = "{0} .. {1}{2}".format( templ, num, (']' if self.to_include else '[')) else: templ += " .. [" mr.template = templ for rt in res: mr.addSlot(MeasureReferent.ATTR_UNIT, rt.referent, False, 0) if (name is not None): mr.addSlot(MeasureReferent.ATTR_NAME, name, False, 0) if (self.div_num is not None): dn = self.div_num.createRefenetsTokensWithRegister(ad, None, True) res.extend(dn) mr.addSlot(MeasureReferent.ATTR_REF, dn[len(dn) - 1].referent, False, 0) ki = UnitToken.calcKind(self.units) if (ki != MeasureKind.UNDEFINED): mr.kind = ki if (regist and ad is not None): mr = (Utils.asObjectOrNull(ad.registerReferent(mr), MeasureReferent)) res.append(ReferentToken(mr, self.begin_token, self.end_token)) return res
def processOntologyItem(self, begin: 'Token') -> 'ReferentToken': if (not ((isinstance(begin, TextToken)))): return None ut = UnitToken.tryParse(begin, None, None, False) if (ut is not None): return ReferentToken(ut.createReferentWithRegister(None), ut.begin_token, ut.end_token) u = UnitReferent() u.addSlot(UnitReferent.ATTR_NAME, begin.getSourceText(), False, 0) return ReferentToken(u, begin, begin)
def process_ontology_item(self, begin: 'Token') -> 'ReferentToken': if (not (isinstance(begin, TextToken))): return None ut = UnitToken.try_parse(begin, None, None, False) if (ut is not None): return ReferentToken(ut.create_referent_with_register(None), ut.begin_token, ut.end_token) u = UnitReferent() u.add_slot(UnitReferent.ATTR_NAME, begin.get_source_text(), False, 0) return ReferentToken(u, begin, begin)
def __parse_internals(self, add_units : 'TerminCollection') -> None: if (self.end_token.next0_ is not None and ((self.end_token.next0_.is_char_of("\\/") or self.end_token.next0_.is_value("ПРИ", None)))): mt1 = MeasureToken.try_parse(self.end_token.next0_.next0_, add_units, True, False, False, False) if (mt1 is not None): self.internals.append(mt1) self.end_token = mt1.end_token else: mt = NumbersWithUnitToken.try_parse(self.end_token.next0_.next0_, add_units, False, False, False, False) if (mt is not None and len(mt.units) > 0 and not UnitToken.can_be_equals(self.nums.units, mt.units)): self.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt)) self.end_token = mt.end_token
def tryParse(t: 'Token', add_units: 'TerminCollection', can_be_set: bool = True, can_units_absent: bool = False) -> 'MeasureToken': """ Выделение вместе с наименованием Args: t(Token): """ if (not ((isinstance(t, TextToken)))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1516 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._isMinOrMax(t0, wrapminmax1516) minmax = wrapminmax1516.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.tryParse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt is None): whd = NumbersWithUnitToken._tryParseWHL(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.isValue("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.getMorphClassInDictionary().is_undefined): npt = NounPhraseToken(t0, t0) else: return None elif (NumberHelper.tryParseRealNumber(t, True) is not None): return None else: dtok = DateItemToken.tryAttach(t, None) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new561(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3037 = True while True: if first_pass3037: first_pass3037 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1510 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._isMinOrMax(tt, wrapminmax1510) minmax = wrapminmax1510.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.isValue("БЫТЬ", None) or tt.isValue("ДОЛЖЕН", None) or tt.isValue("ДОЛЖНЫЙ", None)) or tt.isValue("МОЖЕТ", None) or ((tt.isValue("СОСТАВЛЯТЬ", None) and not tt.getMorphClassInDictionary().is_adjective))): t = tt t1 = t if (tt.previous.isValue("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._tryParseWHL(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (len(internals_) > 0 and tt.is_comma_and): continue if (tt.isValue("ПРИ", None) or len(internals_) > 0): mt1 = MeasureToken.tryParse(tt.next0_, add_units, False, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and (tt).typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.tryParse( tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue mt0 = NumbersWithUnitToken.tryParse(tt, add_units, False, False) if (mt0 is not None): break if (((tt.is_comma or tt.isChar('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._tryParseWHL(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.isChar(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.isChar('(') and t1.next0_ is not None and t1.next0_.isChar(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.isChar('(')): uu = UnitToken.tryParseList(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.isChar(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (BracketHelper.canBeStartOfSequence(tt, False, False)): br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.isValue("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.getMorphClassInDictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.isValue("ЯМЗ", None)): pass npt2 = NounPhraseHelper.tryParse( tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.tryParse( tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.getMorphClassInDictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.tryParseList(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.isChar('.')): if (not MiscHelper.canBeStartOfSentence(tt.next0_)): continue uu = UnitToken.tryParseList(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.isValue("ПРЕДЕЛ", None) or t.isValue("ГРАНИЦА", None) or t.isValue("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t1 = t1.next0_ first_pass3038 = True while True: if first_pass3038: first_pass3038 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.isCharOf(":,_")): www = NumbersWithUnitToken._tryParseWHL(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, not0_) if (mts is None): return None mt = mts[0] if (name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1511(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1506(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.isChar('±')): nn = NumbersWithUnitToken._tryParse(tt1, add_units, True, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.isCharOf(":),") or mt.begin_token.previous.is_table_control_char): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (-units[j].pow0_) j += 1 del mt.units[i:i + len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.tryParseList(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1513(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.getTextValueOfMetaToken( name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parseInternals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.tryParseMulti(t1, add_units, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.canBeEquals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append( MeasureToken._new1506(mt.begin_token, mt.end_token, mt)) res.internals.append( MeasureToken._new1506(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken': if (not (isinstance(t, TextToken))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1625 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625) minmax = wrapminmax1625.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None) if (npt is None): whd = NumbersWithUnitToken._try_parsewhl(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.is_value("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): npt = NounPhraseToken(t0, t0) elif (t0.is_value("T", None) and t0.chars.is_all_lower): npt = NounPhraseToken(t0, t0) t = t0 if (t.next0_ is not None and t.next0_.is_char('=')): npt.end_token = t.next0_ elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): return None npt = NounPhraseToken(t0, t0) t = t0.next0_ while t is not None: if (t.whitespaces_before_count > 2): break elif (not (isinstance(t, TextToken))): break elif (not t.chars.is_letter): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token npt.end_token = t else: break elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): break else: npt.end_token = t t = t.next0_ else: return None elif (NumberHelper.try_parse_real_number(t, True, False) is not None): return None else: dtok = DateItemToken.try_attach(t, None, False) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3305 = True while True: if first_pass3305: first_pass3305 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1617 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617) minmax = wrapminmax1617.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): t = tt t1 = t if (tt.previous.is_value("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._try_parsewhl(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (tt.is_value("ПРИ", None)): mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False) if (n1 is not None and len(n1.units) > 0): mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1) internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): tt = tt.next0_ t = tt t1 = t continue if (len(internals_) > 0): if (tt.is_char(':')): break mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt.next0_ tt = t t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): tt = tt.next0_.next0_ t = tt t1 = t npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_char > tt.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((isinstance(tt, NumberToken)) and tt.previous is not None): if (tt.previous.is_value("USB", None)): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = tt.next0_ while ttt is not None: if (ttt.is_whitespace_before): break if (ttt.is_char_of(",:")): break tt = ttt t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = ttt.next0_ continue mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False) if (mt0 is not None): npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None) if (npt1 is not None and npt1.end_char > mt0.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue break if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._try_parsewhl(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.is_char(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.is_char('(')): uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (t1.next0_ is not None): if (t1.next0_.is_table_control_char or t1.is_newline_after): break units = (None) if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.is_value("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.get_morph_class_in_dictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.is_value("ЯМЗ", None)): pass npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.get_morph_class_in_dictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.try_parse_list(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.is_char('.')): if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t11 = t1 t1 = t1.next0_ first_pass3306 = True while True: if first_pass3306: first_pass3306 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.is_char_of(":,_")): if (is_resctriction): return None www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue uu = UnitToken.try_parse_list(t1.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t continue if (t1.is_char(':')): li = list() ttt = t1.next0_ first_pass3307 = True while True: if first_pass3307: first_pass3307 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_hiphen or ttt.is_table_control_char): continue if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): continue mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True) if (mt1 is None): break li.append(mt1) ttt = mt1.end_token if (ttt.next0_ is not None and ttt.next0_.is_char(';')): ttt = ttt.next0_ if (ttt.is_char(';')): pass elif (ttt.is_newline_after and mt1.is_newline_before): pass else: break if (len(li) > 1): res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True) if (internals_ is not None and len(internals_) > 0): res0.internal_ex = internals_[0] nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) li[0].begin_token = t0 for v in li: v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip() if (v.nums is not None and len(v.nums.units) == 0 and units is not None): v.nums.units = units return res0 elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction) if (mts is None): if (units is not None and len(units) > 0): if (t1 is None or t1.previous.is_char(':')): mts = list() if (t1 is None): t1 = t11 while t1 is not None and t1.next0_ is not None: pass t1 = t1.next0_ else: t1 = t1.previous mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan)) if (mts is None): return None mt = mts[0] if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): return None if (not is_subval and name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (mt.whl is not None): whd = mt.whl for kk in range(10): if (whd is not None and whd.end_token == name_.end_token): name_.end_token = whd.begin_token.previous continue if (units is not None): if (units[len(units) - 1].end_token == name_.end_token): name_.end_token = units[0].begin_token.previous continue break if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.is_char('±')): nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn if (len(nn.units) > 0 and units is None and len(mt.units) == 0): for m in mts: m.units = nn.units return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): pass elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (- units[j].pow0_) j += 1 del mt.units[i:i+len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO)) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parse_internals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt)) res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def __calc_genetive(self) -> None: if (not self.from0_.source.can_be_noun): return if (self.from0_.source.typ == SentItemType.FORMULA): if (self.to.source.typ != SentItemType.NOUN): return self.coef = SemanticService.PARAMS.transitive_coef return frmorph = self.from_morph if (self.to.source.typ == SentItemType.FORMULA): if (self.from0_.source.typ != SentItemType.NOUN): return if (frmorph.case_.is_genitive): self.coef = SemanticService.PARAMS.transitive_coef elif (frmorph.case_.is_undefined): self.coef = (0) return if (isinstance(self.from0_.source.source, NumbersWithUnitToken)): if (self.from0_.order != (self.to.order + 1)): return num = Utils.asObjectOrNull(self.from0_.source.source, NumbersWithUnitToken) ki = UnitToken.calc_kind(num.units) if (ki != MeasureKind.UNDEFINED): if (UnitsHelper.check_keyword(ki, self.to.source.source)): self.coef = (SemanticService.PARAMS.next_model * (3)) return if (isinstance(self.to.source.source, NumbersWithUnitToken)): return non_gen_text = False if (Utils.isNullOrEmpty(self.from_prep) and not (isinstance(self.from0_.source.source, VerbPhraseToken))): if (self.from0_.order != (self.to.order + 1)): non_gen_text = True if (self.to.source.dr_groups is not None): for gr in self.to.source.dr_groups: if (gr.cm.transitive and Utils.isNullOrEmpty(self.from_prep)): ok = False if (isinstance(self.to.source.source, VerbPhraseToken)): if (frmorph.case_.is_accusative): ok = True self.can_be_pacient = True elif (frmorph.case_.is_genitive and self.from0_.order == (self.to.order + 1)): ok = True if (ok): self.coef = SemanticService.PARAMS.transitive_coef return if ((((gr.cm.questions) & (QuestionType.WHATTODO))) != (QuestionType.UNDEFINED) and (isinstance(self.from0_.source.source, VerbPhraseToken))): self.coef = SemanticService.PARAMS.transitive_coef return if (gr.cm.nexts is not None): if (self.from_prep in gr.cm.nexts): cas = gr.cm.nexts[self.from_prep] if (not ((cas) & frmorph.case_).is_undefined): if (Utils.isNullOrEmpty(self.from_prep) and self.from0_.order != (self.to.order + 1) and ((cas) & frmorph.case_).is_genitive): pass else: self.coef = SemanticService.PARAMS.next_model return if (non_gen_text or not Utils.isNullOrEmpty(self.from_prep)): return cas0 = frmorph.case_ if (cas0.is_genitive or cas0.is_instrumental or cas0.is_dative): if ((isinstance(self.to.source.source, NumbersWithUnitToken)) and cas0.is_genitive): self.coef = SemanticService.PARAMS.transitive_coef else: self.coef = SemanticService.PARAMS.ng_link if (cas0.is_nominative or self.from0_.source.typ == SentItemType.PARTBEFORE): self.coef /= (2) if (not cas0.is_genitive): self.coef /= (2) elif (isinstance(self.from0_.source.source, VerbPhraseToken)): self.coef = 0.1 if ((isinstance(self.to.source.source, NumbersWithUnitToken)) and self.to.source.end_token.is_value("ЧЕМ", None)): self.coef = (SemanticService.PARAMS.transitive_coef * (2))
def _tryParse(t: 'Token', add_units: 'TerminCollection', second: bool, can_omit_number: bool) -> 'NumbersWithUnitToken': if (t is None): return None while t is not None: if (t.is_comma_and or t.isValue("НО", None)): t = t.next0_ else: break t0 = t about_ = False min_max = 0 wrapmin_max1523 = RefOutArgWrapper(min_max) ttt = NumbersWithUnitToken._isMinOrMax(t, wrapmin_max1523) min_max = wrapmin_max1523.value if (ttt is not None): t = ttt.next0_ if (t is None): return None if (t is None): return None if (t.isChar('~') or t.isValue("ОКОЛО", None) or t.isValue("ПРИМЕРНО", None)): t = t.next0_ about_ = True if (t is None): return None if (t0.isChar('(')): mt0 = NumbersWithUnitToken._tryParse(t.next0_, add_units, False, False) if (mt0 is not None and mt0.end_token.next0_ is not None and mt0.end_token.next0_.isChar(')')): if (second): if (mt0.from_val is not None and mt0.to_val is not None and mt0.from_val == (-mt0.to_val)): pass else: return None mt0.begin_token = t0 mt0.end_token = mt0.end_token.next0_ uu = UnitToken.tryParseList(mt0.end_token.next0_, add_units, False) if (uu is not None and len(mt0.units) == 0): mt0.units = uu mt0.end_token = uu[len(uu) - 1].end_token return mt0 plusminus = False unit_before = False dty = NumbersWithUnitToken.DiapTyp.UNDEFINED uni = None tok = NumbersWithUnitToken.M_TERMINS.tryParse(t, TerminParseAttr.NO) if (tok is not None): t = tok.end_token.next0_ dty = (Utils.valToEnum(tok.termin.tag, NumbersWithUnitToken.DiapTyp)) if (not tok.is_whitespace_after): if (t is None): return None if (t.isCharOf(":")): pass elif (isinstance(t, NumberToken)): pass elif (t.is_comma and t.next0_ is not None and t.next0_.isValue("ЧЕМ", None)): t = t.next0_.next0_ if (t is not None and t.morph.class0_.is_preposition): t = t.next0_ else: return None if (t is not None and t.isChar('(')): uni = UnitToken.tryParseList(t.next0_, add_units, False) if (uni is not None): t = uni[len(uni) - 1].end_token.next0_ while t is not None: if (t.isCharOf("):")): t = t.next0_ else: break mt0 = NumbersWithUnitToken._tryParse( t, add_units, False, can_omit_number) if (mt0 is not None and len(mt0.units) == 0): mt0.begin_token = t0 mt0.units = uni return mt0 elif (t.isChar('<')): dty = NumbersWithUnitToken.DiapTyp.LS t = t.next0_ if (t is not None and t.isChar('=')): t = t.next0_ dty = NumbersWithUnitToken.DiapTyp.LE elif (t.isChar('>')): dty = NumbersWithUnitToken.DiapTyp.GT t = t.next0_ if (t is not None and t.isChar('=')): t = t.next0_ dty = NumbersWithUnitToken.DiapTyp.GE elif (t.isChar('≤')): dty = NumbersWithUnitToken.DiapTyp.LE t = t.next0_ elif (t.isChar('≥')): dty = NumbersWithUnitToken.DiapTyp.GE t = t.next0_ if (t is not None and t.isChar(':')): t = t.next0_ if (t is not None): if (t.isChar('+') or t.isValue("ПЛЮС", None)): t = t.next0_ if (t is not None and not t.is_whitespace_before): if (t.is_hiphen): t = t.next0_ plusminus = True elif ((t.isCharOf("\\/") and t.next0_ is not None and not t.is_newline_after) and t.next0_.is_hiphen): t = t.next0_.next0_ plusminus = True elif (second and ((t.isCharOf("\\/÷…~")))): t = t.next0_ elif ((t.is_hiphen and t == t0 and not second) and NumbersWithUnitToken.M_TERMINS.tryParse( t.next0_, TerminParseAttr.NO) is not None): tok = NumbersWithUnitToken.M_TERMINS.tryParse( t.next0_, TerminParseAttr.NO) t = tok.end_token.next0_ dty = (Utils.valToEnum(tok.termin.tag, NumbersWithUnitToken.DiapTyp)) elif (t.is_hiphen and t == t0 and ((t.is_whitespace_after or second))): t = t.next0_ elif (t.isChar('±')): t = t.next0_ plusminus = True elif ((second and t.isChar('.') and t.next0_ is not None) and t.next0_.isChar('.')): t = t.next0_.next0_ if (t is not None and t.isChar('.')): t = t.next0_ if (t is None): return None num = NumberHelper.tryParseRealNumber(t, True) if (num is None): uni = UnitToken.tryParseList(t, add_units, False) if (uni is not None): unit_before = True t = uni[len(uni) - 1].end_token.next0_ delim = False while t is not None: if (t.isCharOf(":,")): delim = True t = t.next0_ else: break if (not delim): if (t is None or not t.is_whitespace_before): return None if (t.next0_ is not None and t.is_hiphen and t.is_whitespace_after): delim = True t = t.next0_ num = NumberHelper.tryParseRealNumber(t, True) res = None rval = 0 if (num is None): tt = NumbersWithUnitToken.M_SPEC.tryParse(t, TerminParseAttr.NO) if (tt is not None): rval = (tt.termin.tag) unam = tt.termin.tag2 for u in UnitsHelper.UNITS: if (u.fullname_cyr == unam): uni = list() uni.append(UnitToken._new1517(t, t, u)) break if (uni is None): return None res = NumbersWithUnitToken._new1519(t0, tt.end_token, about_) t = tt.end_token.next0_ else: if (not can_omit_number): return None if ((uni is not None and len(uni) == 1 and uni[0].begin_token == uni[0].end_token) and uni[0].length_char > 3): rval = (1) res = NumbersWithUnitToken._new1519( t0, uni[len(uni) - 1].end_token, about_) t = res.end_token.next0_ else: return None else: if ((t == t0 and t0.is_hiphen and not t.is_whitespace_before) and not t.is_whitespace_after and (num.real_value < 0)): return None t = num.end_token.next0_ res = NumbersWithUnitToken._new1519(t0, num.end_token, about_) rval = num.real_value if (uni is None): uni = UnitToken.tryParseList(t, add_units, False) if (uni is not None): if ((plusminus and second and len(uni) == 1) and uni[0].unit == UnitsHelper.UPERCENT): res.end_token = uni[len(uni) - 1].end_token res.plus_minus_percent = True tt1 = uni[0].end_token.next0_ uni = UnitToken.tryParseList(tt1, add_units, False) if (uni is not None): res.units = uni res.end_token = uni[len(uni) - 1].end_token else: res.units = uni res.end_token = uni[len(uni) - 1].end_token t = res.end_token.next0_ else: res.units = uni if (len(uni) > 1): uni1 = UnitToken.tryParseList(t, add_units, False) if (((uni1 is not None and uni1[0].unit == uni[0].unit and (len(uni1) < len(uni))) and uni[len(uni1)].pow0_ == -1 and uni1[len(uni1) - 1].end_token.next0_ is not None) and uni1[len(uni1) - 1].end_token.next0_.isCharOf("/\\")): num2 = NumbersWithUnitToken._tryParse( uni1[len(uni1) - 1].end_token.next0_.next0_, add_units, False, False) if (num2 is not None and num2.units is not None and num2.units[0].unit == uni[len(uni1)].unit): res.units = uni1 res.div_num = num2 res.end_token = num2.end_token if (dty != NumbersWithUnitToken.DiapTyp.UNDEFINED): if (dty == NumbersWithUnitToken.DiapTyp.GE or dty == NumbersWithUnitToken.DiapTyp.FROM): res.from_include = True res.from_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.GT): res.from_include = False res.from_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.LE or dty == NumbersWithUnitToken.DiapTyp.TO): res.to_include = True res.to_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.LS): res.to_include = False res.to_val = rval is_second_max = False if (not second): iii = 0 wrapiii1522 = RefOutArgWrapper(iii) ttt = NumbersWithUnitToken._isMinOrMax(t, wrapiii1522) iii = wrapiii1522.value if (ttt is not None and iii > 0): is_second_max = True t = ttt.next0_ next0__ = (None if second or plusminus or ((t is not None and t.is_newline_before)) else NumbersWithUnitToken._tryParse(t, add_units, True, False)) if (next0__ is not None and ((next0__.to_val is not None or next0__.single_val is not None)) and next0__.from_val is None): if (len(next0__.units) > 0): if (len(res.units) == 0): res.units = next0__.units elif (not UnitToken.canBeEquals(res.units, next0__.units)): next0__ = (None) elif (len(res.units) > 0 and not unit_before and not next0__.plus_minus_percent): next0__ = (None) if (next0__ is not None): res.end_token = next0__.end_token if (next0__ is not None and next0__.to_val is not None): res.to_val = next0__.to_val res.to_include = next0__.to_include elif (next0__ is not None and next0__.single_val is not None): if (next0__.begin_token.isCharOf("/\\")): res.div_num = next0__ res.single_val = rval return res elif (next0__.plus_minus_percent): res.single_val = rval res.plus_minus = next0__.single_val res.plus_minus_percent = True res.to_include = True else: res.to_val = next0__.single_val res.to_include = True if (next0__ is not None): if (res.from_val is None): res.from_val = rval res.from_include = True return res elif ((next0__ is not None and next0__.from_val is not None and next0__.to_val is not None) and next0__.to_val == (-next0__.from_val)): if (len(next0__.units) == 1 and next0__.units[0].unit == UnitsHelper.UPERCENT and len(res.units) > 0): res.single_val = rval res.plus_minus = next0__.to_val res.plus_minus_percent = True res.end_token = next0__.end_token return res if (len(next0__.units) == 0): res.single_val = rval res.plus_minus = next0__.to_val res.end_token = next0__.end_token return res res.from_val = (next0__.from_val + rval) res.from_include = True res.to_val = (next0__.to_val + rval) res.to_include = True res.end_token = next0__.end_token if (len(next0__.units) > 0): res.units = next0__.units return res if (dty == NumbersWithUnitToken.DiapTyp.UNDEFINED): if (plusminus and ((not res.plus_minus_percent or not second))): res.from_include = True res.from_val = (-rval) res.to_include = True res.to_val = rval else: res.single_val = rval res.plus_minus_percent = plusminus return res
def tryParseMulti( t: 'Token', add_units: 'TerminCollection', can_omit_number: bool = False, not0__: bool = False) -> typing.List['NumbersWithUnitToken']: if (t is None or (isinstance(t, ReferentToken))): return None if (t.isChar('(')): res0 = NumbersWithUnitToken.tryParseMulti(t.next0_, add_units, False, can_omit_number) if (res0 is not None and res0[len(res0) - 1].end_token.next0_ is not None and res0[len(res0) - 1].end_token.next0_.isChar(')')): res0[len(res0) - 1].end_token = res0[len(res0) - 1].end_token.next0_ return res0 mt = NumbersWithUnitToken.tryParse(t, add_units, can_omit_number, not0__) if (mt is None): return None res = list() if ((mt.whitespaces_after_count < 2) and MeasureHelper.isMultChar(mt.end_token.next0_)): mt2 = NumbersWithUnitToken.tryParse(mt.end_token.next0_.next0_, add_units, not0__, False) if (mt2 is not None): mt3 = None if ((mt2.whitespaces_after_count < 2) and MeasureHelper.isMultChar(mt2.end_token.next0_)): mt3 = NumbersWithUnitToken.tryParse( mt2.end_token.next0_.next0_, add_units, False, False) if (mt3 is None): tt2 = mt2.end_token.next0_ if (tt2 is not None and not tt2.is_whitespace_before): if (not tt2.isCharOf(",.;")): return None if (mt3 is not None and len(mt3.units) > 0): if (len(mt2.units) == 0): mt2.units = mt3.units res.append(mt) if (mt2 is not None): if (len(mt2.units) > 0 and len(mt.units) == 0): mt.units = mt2.units res.append(mt2) if (mt3 is not None): res.append(mt3) return res if ((not mt.is_whitespace_after and MeasureHelper.isMultCharEnd(mt.end_token.next0_) and (isinstance(mt.end_token.next0_.next0_, NumberToken))) and len(mt.units) == 0): utxt = (mt.end_token.next0_).term utxt = utxt[0:0 + len(utxt) - 1] terms = UnitsHelper.TERMINS.tryAttachStr(utxt, None) if (terms is not None and len(terms) > 0): mt.units.append( UnitToken._new1517( mt.end_token.next0_, mt.end_token.next0_, Utils.asObjectOrNull(terms[0].tag, Unit))) mt.end_token = mt.end_token.next0_ res1 = NumbersWithUnitToken.tryParseMulti( mt.end_token.next0_, add_units, False, False) if (res1 is not None): res1.insert(0, mt) return res1 res.append(mt) return res
def _try_parse(t : 'Token', add_units : 'TerminCollection', second : bool, can_omit_number : bool, can_be_nan : bool) -> 'NumbersWithUnitToken': if (t is None): return None while t is not None: if (t.is_comma_and or t.is_value("НО", None)): t = t.next0_ else: break t0 = t about_ = False has_keyw = False is_diap_keyw = False min_max = 0 wrapmin_max1633 = RefOutArgWrapper(min_max) ttt = NumbersWithUnitToken._is_min_or_max(t, wrapmin_max1633) min_max = wrapmin_max1633.value if (ttt is not None): t = ttt.next0_ if (t is None): return None if (t is None): return None if (t.is_char('~') or t.is_value("ОКОЛО", None) or t.is_value("ПРИМЕРНО", None)): t = t.next0_ about_ = True has_keyw = True if (t is None): return None if (t.is_value("В", None) and t.next0_ is not None): if (t.next0_.is_value("ПРЕДЕЛ", None) or t.is_value("ДИАПАЗОН", None)): t = t.next0_.next0_ if (t is None): return None is_diap_keyw = True if (t0.is_char('(')): mt0 = NumbersWithUnitToken._try_parse(t.next0_, add_units, False, False, False) if (mt0 is not None and mt0.end_token.next0_ is not None and mt0.end_token.next0_.is_char(')')): if (second): if (mt0.from_val is not None and mt0.to_val is not None and mt0.from_val == (- mt0.to_val)): pass else: return None mt0.begin_token = t0 mt0.end_token = mt0.end_token.next0_ uu = UnitToken.try_parse_list(mt0.end_token.next0_, add_units, False) if (uu is not None and len(mt0.units) == 0): mt0.units = uu mt0.end_token = uu[len(uu) - 1].end_token return mt0 plusminus = False unit_before = False is_age_ = False dty = NumbersWithUnitToken.DiapTyp.UNDEFINED whd = None uni = None tok = (None if NumbersWithUnitToken.M_TERMINS is None else NumbersWithUnitToken.M_TERMINS.try_parse(t, TerminParseAttr.NO)) if (tok is not None): if (tok.end_token.is_value("СТАРШЕ", None) or tok.end_token.is_value("МЛАДШЕ", None)): is_age_ = True t = tok.end_token.next0_ dty = (Utils.valToEnum(tok.termin.tag, NumbersWithUnitToken.DiapTyp)) has_keyw = True if (not tok.is_whitespace_after): if (t is None): return None if (isinstance(t, NumberToken)): if (tok.begin_token == tok.end_token and not tok.chars.is_all_lower): return None elif (t.is_comma and t.next0_ is not None and t.next0_.is_value("ЧЕМ", None)): t = t.next0_.next0_ if (t is not None and t.morph.class0_.is_preposition): t = t.next0_ elif (t.is_char_of(":,(") or t.is_table_control_char): pass else: return None if (t is not None and t.is_char('(')): uni = UnitToken.try_parse_list(t.next0_, add_units, False) if (uni is not None): t = uni[len(uni) - 1].end_token.next0_ while t is not None: if (t.is_char_of("):")): t = t.next0_ else: break mt0 = NumbersWithUnitToken._try_parse(t, add_units, False, can_omit_number, False) if (mt0 is not None and len(mt0.units) == 0): mt0.begin_token = t0 mt0.units = uni return mt0 whd = NumbersWithUnitToken._try_parsewhl(t) if (whd is not None): t = whd.end_token.next0_ elif (t is not None and t.is_value("IP", None)): uni = UnitToken.try_parse_list(t, add_units, False) if (uni is not None): t = uni[len(uni) - 1].end_token.next0_ if ((t is not None and t.is_hiphen and t.is_whitespace_before) and t.is_whitespace_after): t = t.next0_ elif (t.is_char('<')): dty = NumbersWithUnitToken.DiapTyp.LS t = t.next0_ has_keyw = True if (t is not None and t.is_char('=')): t = t.next0_ dty = NumbersWithUnitToken.DiapTyp.LE elif (t.is_char('>')): dty = NumbersWithUnitToken.DiapTyp.GT t = t.next0_ has_keyw = True if (t is not None and t.is_char('=')): t = t.next0_ dty = NumbersWithUnitToken.DiapTyp.GE elif (t.is_char('≤')): dty = NumbersWithUnitToken.DiapTyp.LE has_keyw = True t = t.next0_ elif (t.is_char('≥')): dty = NumbersWithUnitToken.DiapTyp.GE has_keyw = True t = t.next0_ elif (t.is_value("IP", None)): uni = UnitToken.try_parse_list(t, add_units, False) if (uni is not None): t = uni[len(uni) - 1].end_token.next0_ elif (t.is_value("ЗА", None) and (isinstance(t.next0_, NumberToken))): dty = NumbersWithUnitToken.DiapTyp.GE t = t.next0_ while t is not None and ((t.is_char_of(":,") or t.is_value("ЧЕМ", None) or t.is_table_control_char)): t = t.next0_ if (t is not None): if (t.is_char('+') or t.is_value("ПЛЮС", None)): t = t.next0_ if (t is not None and not t.is_whitespace_before): if (t.is_hiphen): t = t.next0_ plusminus = True elif ((t.is_char_of("\\/") and t.next0_ is not None and not t.is_newline_after) and t.next0_.is_hiphen): t = t.next0_.next0_ plusminus = True elif (second and ((t.is_char_of("\\/÷…~")))): t = t.next0_ elif ((t.is_hiphen and t == t0 and not second) and NumbersWithUnitToken.M_TERMINS.try_parse(t.next0_, TerminParseAttr.NO) is not None): tok = NumbersWithUnitToken.M_TERMINS.try_parse(t.next0_, TerminParseAttr.NO) t = tok.end_token.next0_ dty = (Utils.valToEnum(tok.termin.tag, NumbersWithUnitToken.DiapTyp)) elif (t.is_hiphen and t == t0 and ((t.is_whitespace_after or second))): t = t.next0_ elif (t.is_char('±')): t = t.next0_ plusminus = True has_keyw = True elif ((second and t.is_char('.') and t.next0_ is not None) and t.next0_.is_char('.')): t = t.next0_.next0_ if (t is not None and t.is_char('.')): t = t.next0_ num = NumberHelper.try_parse_real_number(t, True, False) if (num is None): uni = UnitToken.try_parse_list(t, add_units, False) if (uni is not None): unit_before = True t = uni[len(uni) - 1].end_token.next0_ delim = False while t is not None: if (t.is_char_of(":,")): delim = True t = t.next0_ elif (t.is_hiphen and t.is_whitespace_after): delim = True t = t.next0_ else: break if (not delim): if (t is None): if (has_keyw and can_be_nan): pass else: return None elif (not t.is_whitespace_before): return None if (t.next0_ is not None and t.is_hiphen and t.is_whitespace_after): delim = True t = t.next0_ num = NumberHelper.try_parse_real_number(t, True, False) res = None rval = 0 if (num is None): tt = NumbersWithUnitToken.M_SPEC.try_parse(t, TerminParseAttr.NO) if (tt is not None): rval = (tt.termin.tag) unam = tt.termin.tag2 for u in UnitsHelper.UNITS: if (u.fullname_cyr == unam): uni = list() uni.append(UnitToken._new1626(t, t, u)) break if (uni is None): return None res = NumbersWithUnitToken._new1628(t0, tt.end_token, about_) t = tt.end_token.next0_ else: if (not can_omit_number and not has_keyw and not can_be_nan): return None if ((uni is not None and len(uni) == 1 and uni[0].begin_token == uni[0].end_token) and uni[0].length_char > 3): rval = (1) res = NumbersWithUnitToken._new1628(t0, uni[len(uni) - 1].end_token, about_) t = res.end_token.next0_ elif (has_keyw and can_be_nan): rval = math.nan res = NumbersWithUnitToken._new1628(t0, t0, about_) if (t is not None): res.end_token = t.previous else: t = t0 while t is not None: res.end_token = t t = t.next0_ else: return None else: if ((t == t0 and t0.is_hiphen and not t.is_whitespace_before) and not t.is_whitespace_after and (num.real_value < 0)): num = NumberHelper.try_parse_real_number(t.next0_, True, False) if (num is None): return None if (t == t0 and (isinstance(t, NumberToken)) and t.morph.class0_.is_adjective): nn = Utils.asObjectOrNull(t.end_token, TextToken) if (nn is None): return None norm = nn.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if ((norm.endswith("Ь") or norm == "ЧЕТЫРЕ" or norm == "ТРИ") or norm == "ДВА"): pass else: mi = MorphologyService.get_word_base_info("КОКО" + nn.term, None, False, False) if (mi.class0_.is_adjective): return None t = num.end_token.next0_ res = NumbersWithUnitToken._new1628(t0, num.end_token, about_) rval = num.real_value if (uni is None): uni = UnitToken.try_parse_list(t, add_units, False) if (uni is not None): if ((plusminus and second and len(uni) >= 1) and uni[0].unit == UnitsHelper.UPERCENT): res.end_token = uni[0].end_token res.plus_minus_percent = True tt1 = uni[0].end_token.next0_ uni = UnitToken.try_parse_list(tt1, add_units, False) if (uni is not None): res.units = uni res.end_token = uni[len(uni) - 1].end_token else: res.units = uni res.end_token = uni[len(uni) - 1].end_token t = res.end_token.next0_ else: res.units = uni if (len(uni) > 1): uni1 = UnitToken.try_parse_list(t, add_units, False) if (((uni1 is not None and uni1[0].unit == uni[0].unit and (len(uni1) < len(uni))) and uni[len(uni1)].pow0_ == -1 and uni1[len(uni1) - 1].end_token.next0_ is not None) and uni1[len(uni1) - 1].end_token.next0_.is_char_of("/\\")): num2 = NumbersWithUnitToken._try_parse(uni1[len(uni1) - 1].end_token.next0_.next0_, add_units, False, False, False) if (num2 is not None and num2.units is not None and num2.units[0].unit == uni[len(uni1)].unit): res.units = uni1 res.div_num = num2 res.end_token = num2.end_token res.whl = whd if (dty != NumbersWithUnitToken.DiapTyp.UNDEFINED): if (dty == NumbersWithUnitToken.DiapTyp.GE or dty == NumbersWithUnitToken.DiapTyp.FROM): res.from_include = True res.from_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.GT): res.from_include = False res.from_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.LE or dty == NumbersWithUnitToken.DiapTyp.TO): res.to_include = True res.to_val = rval elif (dty == NumbersWithUnitToken.DiapTyp.LS): res.to_include = False res.to_val = rval is_second_max = False if (not second): iii = 0 wrapiii1632 = RefOutArgWrapper(iii) ttt = NumbersWithUnitToken._is_min_or_max(t, wrapiii1632) iii = wrapiii1632.value if (ttt is not None and iii > 0): is_second_max = True t = ttt.next0_ next0__ = (None if second or plusminus or ((t is not None and ((t.is_table_control_char or t.is_newline_before)))) else NumbersWithUnitToken._try_parse(t, add_units, True, False, can_be_nan)) if (next0__ is not None and (isinstance(t.previous, NumberToken))): if (MeasureHelper.is_mult_char(t.previous.end_token)): next0__ = (None) if (next0__ is not None and ((next0__.to_val is not None or next0__.single_val is not None)) and next0__.from_val is None): if ((((next0__.begin_token.is_char('+') and next0__.single_val is not None and not math.isnan(next0__.single_val)) and next0__.end_token.next0_ is not None and next0__.end_token.next0_.is_char_of("\\/")) and next0__.end_token.next0_.next0_ is not None and next0__.end_token.next0_.next0_.is_hiphen) and not has_keyw and not math.isnan(rval)): next2 = NumbersWithUnitToken._try_parse(next0__.end_token.next0_.next0_.next0_, add_units, True, False, False) if (next2 is not None and next2.single_val is not None and not math.isnan(next2.single_val)): res.from_val = (rval - next2.single_val) res.from_include = True res.to_val = (rval + next0__.single_val) res.to_include = True if (next2.units is not None and len(res.units) == 0): res.units = next2.units res.end_token = next2.end_token return res if (len(next0__.units) > 0): if (len(res.units) == 0): res.units = next0__.units elif (not UnitToken.can_be_equals(res.units, next0__.units)): next0__ = (None) elif (len(res.units) > 0 and not unit_before and not next0__.plus_minus_percent): next0__ = (None) if (next0__ is not None): res.end_token = next0__.end_token if (next0__ is not None and next0__.to_val is not None): res.to_val = next0__.to_val res.to_include = next0__.to_include elif (next0__ is not None and next0__.single_val is not None): if (next0__.begin_token.is_char_of("/\\")): res.div_num = next0__ res.single_val = rval return res elif (next0__.plus_minus_percent): res.single_val = rval res.plus_minus = next0__.single_val res.plus_minus_percent = True res.to_include = True else: res.to_val = next0__.single_val res.to_include = True if (next0__ is not None): if (res.from_val is None): res.from_val = rval res.from_include = True return res elif ((next0__ is not None and next0__.from_val is not None and next0__.to_val is not None) and next0__.to_val == (- next0__.from_val)): if (len(next0__.units) == 1 and next0__.units[0].unit == UnitsHelper.UPERCENT and len(res.units) > 0): res.single_val = rval res.plus_minus = next0__.to_val res.plus_minus_percent = True res.end_token = next0__.end_token return res if (len(next0__.units) == 0): res.single_val = rval res.plus_minus = next0__.to_val res.end_token = next0__.end_token return res res.from_val = (next0__.from_val + rval) res.from_include = True res.to_val = (next0__.to_val + rval) res.to_include = True res.end_token = next0__.end_token if (len(next0__.units) > 0): res.units = next0__.units return res if (dty == NumbersWithUnitToken.DiapTyp.UNDEFINED): if (plusminus and ((not res.plus_minus_percent or not second))): res.from_include = True res.from_val = (- rval) res.to_include = True res.to_val = rval else: res.single_val = rval res.plus_minus_percent = plusminus if (is_age_): res.is_age = True return res
def try_parse_multi(t : 'Token', add_units : 'TerminCollection', can_omit_number : bool=False, not0__ : bool=False, can_be_non : bool=False, is_resctriction : bool=False) -> typing.List['NumbersWithUnitToken']: if (t is None or (isinstance(t, ReferentToken))): return None tt0 = t if (tt0.is_char('(')): whd = NumbersWithUnitToken._try_parsewhl(tt0) if (whd is not None): tt0 = whd.end_token res0 = NumbersWithUnitToken.try_parse_multi(tt0.next0_, add_units, False, can_omit_number, can_be_non, False) if (res0 is not None): res0[0].whl = whd tt2 = res0[len(res0) - 1].end_token.next0_ if (tt2 is not None and tt2.is_char_of(",")): tt2 = tt2.next0_ if (whd is not None): return res0 if (tt2 is not None and tt2.is_char(')')): res0[len(res0) - 1].end_token = tt2 return res0 mt = NumbersWithUnitToken.try_parse(t, add_units, can_omit_number, not0__, can_be_non, is_resctriction) if (mt is None): return None res = list() nnn = None if (mt.whitespaces_after_count < 2): if (MeasureHelper.is_mult_char(mt.end_token.next0_)): nnn = mt.end_token.next0_.next0_ elif ((isinstance(mt.end_token, NumberToken)) and MeasureHelper.is_mult_char(mt.end_token.end_token)): nnn = mt.end_token.next0_ if (nnn is not None): mt2 = NumbersWithUnitToken.try_parse(nnn, add_units, not0__, False, False, False) if (mt2 is not None): mt3 = None nnn = (None) if (mt2.whitespaces_after_count < 2): if (MeasureHelper.is_mult_char(mt2.end_token.next0_)): nnn = mt2.end_token.next0_.next0_ elif ((isinstance(mt2.end_token, NumberToken)) and MeasureHelper.is_mult_char(mt2.end_token.end_token)): nnn = mt2.end_token.next0_ if (nnn is not None): mt3 = NumbersWithUnitToken.try_parse(nnn, add_units, False, False, False, False) if (mt3 is None): tt2 = mt2.end_token.next0_ if (tt2 is not None and not tt2.is_whitespace_before): if (not tt2.is_char_of(",.;")): return None if (mt3 is not None and len(mt3.units) > 0): if (len(mt2.units) == 0): mt2.units = mt3.units res.append(mt) if (mt2 is not None): if (len(mt2.units) > 0 and len(mt.units) == 0): mt.units = mt2.units res.append(mt2) if (mt3 is not None): res.append(mt3) return res if ((not mt.is_whitespace_after and MeasureHelper.is_mult_char_end(mt.end_token.next0_) and (isinstance(mt.end_token.next0_.next0_, NumberToken))) and len(mt.units) == 0): utxt = mt.end_token.next0_.term utxt = utxt[0:0+len(utxt) - 1] terms = UnitsHelper.TERMINS.find_termins_by_string(utxt, None) if (terms is not None and len(terms) > 0): mt.units.append(UnitToken._new1626(mt.end_token.next0_, mt.end_token.next0_, Utils.asObjectOrNull(terms[0].tag, Unit))) mt.end_token = mt.end_token.next0_ res1 = NumbersWithUnitToken.try_parse_multi(mt.end_token.next0_, add_units, False, False, False, False) if (res1 is not None): res1.insert(0, mt) return res1 res.append(mt) return res