def __try_attach_speciality(t: 'Token', key_word_before: bool) -> 'TitleItemToken': if (t is None): return None susp = False if (not key_word_before): if (not t.is_newline_before): susp = True val = None t0 = t dig_count = 0 for i in range(3): nt = Utils.asObjectOrNull(t, NumberToken) if (nt is None): break if (nt.typ != NumberSpellingType.DIGIT or nt.morph.class0_.is_adjective): break if (val is None): val = io.StringIO() if (susp and t.length_char != 2): return None digs = nt.get_source_text() dig_count += len(digs) print(digs, end="", file=val) if (t.next0_ is None): break t = t.next0_ if (t.is_char_of(".,") or t.is_hiphen): if (susp and (i < 2)): if (not t.is_char('.') or t.is_whitespace_after or t.is_whitespace_before): return None if (t.next0_ is not None): t = t.next0_ if (val is None or (dig_count < 5)): return None if (dig_count != 6): if (not key_word_before): return None else: Utils.insertStringIO(val, 4, '.') Utils.insertStringIO(val, 2, '.') tt = t.next0_ first_pass3395 = True while True: if first_pass3395: first_pass3395 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt continue t = tt return TitleItemToken._new2655(t0, t, TitleItemToken.Types.SPECIALITY, Utils.toStringStringIO(val))
def attach_mail_users(t1: 'Token') -> typing.List['UriItemToken']: if (t1 is None): return None if (t1.is_char('}')): res0 = UriItemToken.attach_mail_users(t1.previous) if (res0 is None): return None t1 = res0[0].begin_token.previous first_pass3415 = True while True: if first_pass3415: first_pass3415 = False else: t1 = t1.previous if (not (t1 is not None)): break if (t1.is_char('{')): res0[0].begin_token = t1 return res0 if (t1.is_char_of(";,")): continue res1 = UriItemToken.attach_mail_users(t1) if (res1 is None): return None res0.insert(0, res1[0]) t1 = res1[0].begin_token return None txt = io.StringIO() t0 = t1 t = t1 first_pass3416 = True while True: if first_pass3416: first_pass3416 = False else: t = t.previous if (not (t is not None)): break if (t.is_whitespace_after): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) Utils.insertStringIO(txt, 0, nt.get_source_text()) t0 = t continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break src = tt.get_source_text() ch = src[0] if (not str.isalpha(ch)): if (".-_".find(ch) < 0): break Utils.insertStringIO(txt, 0, src) t0 = t if (txt.tell() == 0): return None res = list() res.append( UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt).lower())) return res
def to_string(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: res = Utils.newStringIO(self.template) vals = list() for s in self.slots: if (s.type_name == MeasureReferent.ATTR_VALUE): if (isinstance(s.value, str)): val = Utils.asObjectOrNull(s.value, str) if (val == "NaN"): val = "?" vals.append(val) elif (isinstance(s.value, Referent)): vals.append(s.value.to_string(True, lang, 0)) for i in range(res.tell() - 1, -1, -1): ch = Utils.getCharAtStringIO(res, i) if (not str.isdigit(ch)): continue j = ((ord(ch)) - (ord('1'))) if ((j < 0) or j >= len(vals)): continue Utils.removeStringIO(res, i, 1) Utils.insertStringIO(res, i, vals[j]) print(self.out_units(lang), end="", file=res) if (not short_variant): nam = self.get_string_value(MeasureReferent.ATTR_NAME) if (nam is not None): print(" - {0}".format(nam), end="", file=res, flush=True) for s in self.slots: if (s.type_name == MeasureReferent.ATTR_REF and (isinstance(s.value, MeasureReferent))): print(" / {0}".format(s.value.to_string(True, lang, 0)), end="", file=res, flush=True) ki = self.kind if (ki != MeasureKind.UNDEFINED): print(" ({0})".format(Utils.enumToString(ki).upper()), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def toString(self, short_variant : bool, lang : 'MorphLang'=None, lev : int=0) -> str: res = Utils.newStringIO(self.template) vals = list() for s in self.slots: if (s.type_name == MeasureReferent.ATTR_VALUE): if (isinstance(s.value, str)): vals.append(Utils.asObjectOrNull(s.value, str)) elif (isinstance(s.value, Referent)): vals.append((s.value).toString(True, lang, 0)) for i in range(res.tell() - 1, -1, -1): ch = Utils.getCharAtStringIO(res, i) if (not str.isdigit(ch)): continue j = ((ord(ch)) - (ord('1'))) if ((j < 0) or j >= len(vals)): continue Utils.removeStringIO(res, i, 1) Utils.insertStringIO(res, i, vals[j]) uu = self.units if (len(uu) > 0): print(uu[0].toString(True, lang, 0), end="", file=res) i = 1 while i < len(uu): pow0_ = uu[i].getStringValue(UnitReferent.ATTR_POW) if (not Utils.isNullOrEmpty(pow0_) and pow0_[0] == '-'): print("/{0}".format(uu[i].toString(True, lang, 1)), end="", file=res, flush=True) if (pow0_ != "-1"): print("<{0}>".format(pow0_[1:]), end="", file=res, flush=True) else: print("*{0}".format(uu[i].toString(True, lang, 0)), end="", file=res, flush=True) i += 1 if (not short_variant): nam = self.getStringValue(MeasureReferent.ATTR_NAME) if (nam is not None): print(" - {0}".format(nam), end="", file=res, flush=True) for s in self.slots: if (s.type_name == MeasureReferent.ATTR_REF and (isinstance(s.value, MeasureReferent))): print(" / {0}".format((s.value).toString(True, lang, 0)), end="", file=res, flush=True) ki = self.kind if (ki != MeasureKind.UNDEFINED): print(" ({0})".format(Utils.enumToString(ki).upper()), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def correct_word_by_morph(self, word: str) -> str: vars0_ = list() tmp = Utils.newStringIO(len(word)) ch = 1 while ch < len(word): Utils.setLengthStringIO(tmp, 0) print(word, end="", file=tmp) Utils.setCharAtStringIO(tmp, ch, '*') var = self.__check_corr_var(Utils.toStringStringIO(tmp), self.m_root, 0) if (var is not None): if (not var in vars0_): vars0_.append(var) ch += 1 if (len(vars0_) == 0): ch = 1 while ch < len(word): Utils.setLengthStringIO(tmp, 0) print(word, end="", file=tmp) Utils.insertStringIO(tmp, ch, '*') var = self.__check_corr_var(Utils.toStringStringIO(tmp), self.m_root, 0) if (var is not None): if (not var in vars0_): vars0_.append(var) ch += 1 if (len(vars0_) == 0): ch = 1 while ch < (len(word) - 1): Utils.setLengthStringIO(tmp, 0) print(word, end="", file=tmp) Utils.removeStringIO(tmp, ch, 1) var = self.__check_corr_var(Utils.toStringStringIO(tmp), self.m_root, 0) if (var is not None): if (not var in vars0_): vars0_.append(var) ch += 1 if (len(vars0_) != 1): return None return vars0_[0]
def to_string(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: res = io.StringIO() if (self.country_code is not None): print("{0}{1} ".format(("+" if self.country_code != "8" else ""), self.country_code), end="", file=res, flush=True) num = self.number if (num is not None and len(num) >= 9): cou = 3 if (len(num) >= 11): cou = (len(num) - 7) print("({0}) ".format(num[0:0 + cou]), end="", file=res, flush=True) num = num[cou:] elif (num is not None and len(num) == 8): print("({0}) ".format(num[0:0 + 2]), end="", file=res, flush=True) num = num[2:] if (num is None): print("???-??-??", end="", file=res) else: print(num, end="", file=res) if (len(num) > 5): Utils.insertStringIO(res, res.tell() - 4, '-') Utils.insertStringIO(res, res.tell() - 2, '-') if (self.add_number is not None): print(" (доб.{0})".format(self.add_number), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def toString(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: res = io.StringIO() v = self.getStringValue(MoneyReferent.ATTR_VALUE) r = self.rest if (v is not None or r > 0): print(Utils.ifNotNull(v, "0"), end="", file=res) cou = 0 for i in range(res.tell() - 1, 0, -1): cou += 1 if ((cou) == 3): Utils.insertStringIO(res, i, '.') cou = 0 else: print("?", end="", file=res) if (r > 0): print(",{0}".format("{:02d}".format(r)), end="", file=res, flush=True) print(" {0}".format(self.currency), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def __attach_uri_content( t0: 'Token', chars_: str, can_be_whitespaces: bool = False) -> 'UriItemToken': txt = io.StringIO() t1 = t0 dom = UriItemToken.attach_domain_name(t0, True, can_be_whitespaces) if (dom is not None): if (len(dom.value) < 3): return None open_char = chr(0) t = t0 if (dom is not None): t = dom.end_token.next0_ first_pass3411 = True while True: if first_pass3411: first_pass3411 = False else: t = t.next0_ if (not (t is not None)): break if (t != t0 and t.is_whitespace_before): if (t.is_newline_before or not can_be_whitespaces): break if (dom is None): break if (t.previous.is_hiphen): pass elif (t.previous.is_char_of(",;")): break elif (t.previous.is_char('.') and t.chars.is_letter and t.length_char == 2): pass else: ok = False tt1 = t if (t.is_char_of("\\/")): tt1 = t.next0_ tt0 = tt1 first_pass3412 = True while True: if first_pass3412: first_pass3412 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1 != tt0 and tt1.is_whitespace_before): break if (isinstance(tt1, NumberToken)): continue if (not (isinstance(tt1, TextToken))): break term1 = tt1.term if (((term1 == "HTM" or term1 == "HTML" or term1 == "SHTML") or term1 == "ASP" or term1 == "ASPX") or term1 == "JSP"): ok = True break if (not tt1.chars.is_letter): if (tt1.is_char_of("\\/")): ok = True break if (not tt1.is_char_of(chars_)): break elif (not tt1.chars.is_latin_letter): break if (not ok): break if (isinstance(t, NumberToken)): nt = Utils.asObjectOrNull(t, NumberToken) print(nt.get_source_text(), end="", file=txt) t1 = t continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token.is_value("РФ", None)): if (txt.tell() > 0 and Utils.getCharAtStringIO( txt, txt.tell() - 1) == '.'): print(rt.begin_token.get_source_text(), end="", file=txt) t1 = t continue if (rt is not None and rt.chars.is_latin_letter and rt.begin_token == rt.end_token): print(rt.begin_token.get_source_text(), end="", file=txt) t1 = t continue break src = tt.get_source_text() ch = src[0] if (not str.isalpha(ch)): if (chars_.find(ch) < 0): break if (ch == '(' or ch == '['): open_char = ch elif (ch == ')'): if (open_char != '('): break open_char = (chr(0)) elif (ch == ']'): if (open_char != '['): break open_char = (chr(0)) print(src, end="", file=txt) t1 = t if (txt.tell() == 0): return dom i = 0 i = 0 while i < txt.tell(): if (str.isalnum(Utils.getCharAtStringIO(txt, i))): break i += 1 if (i >= txt.tell()): return dom if (Utils.getCharAtStringIO(txt, txt.tell() - 1) == '.' or Utils.getCharAtStringIO(txt, txt.tell() - 1) == '/'): Utils.setLengthStringIO(txt, txt.tell() - 1) t1 = t1.previous if (dom is not None): Utils.insertStringIO(txt, 0, dom.value) tmp = Utils.toStringStringIO(txt) if (tmp.startswith("\\\\")): Utils.replaceStringIO(txt, "\\\\", "//") tmp = Utils.toStringStringIO(txt) if (tmp.startswith("//")): tmp = tmp[2:] if (Utils.compareStrings(tmp, "WWW", True) == 0): return None res = UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt)) return res
def _to_string(self, short_variant: bool, lang: 'MorphLang', lev: int, from_range: int) -> str: from pullenti.ner.date.internal.DateRelHelper import DateRelHelper res = io.StringIO() p = self.pointer if (lang is None): lang = MorphLang.RU if (self.is_relative): if (self.pointer == DatePointerType.TODAY): print("сейчас".format(), end="", file=res, flush=True) if (not short_variant): DateRelHelper.append_to_string(self, res) return Utils.toStringStringIO(res) word = None val = 0 back = False is_local_rel = self.get_string_value( DateReferent.ATTR_ISRELATIVE) == "true" for s in self.slots: if (s.type_name == DateReferent.ATTR_CENTURY): word = "век" wrapval784 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval784) val = wrapval784.value elif (s.type_name == DateReferent.ATTR_YEAR): word = "год" wrapval785 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval785) val = wrapval785.value elif (s.type_name == DateReferent.ATTR_MONTH): word = "месяц" wrapval786 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval786) val = wrapval786.value if (not is_local_rel and val >= 1 and val <= 12): print(DateReferent.__m_month0[val - 1], end="", file=res) elif (s.type_name == DateReferent.ATTR_DAY): word = "день" wrapval787 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval787) val = wrapval787.value if ((not is_local_rel and self.month > 0 and self.month <= 12) and self.higher is not None and self.higher.get_string_value( DateReferent.ATTR_ISRELATIVE) != "true"): print("{0} {1}".format( val, DateReferent.__m_month[self.month - 1]), end="", file=res, flush=True) elif (not is_local_rel): print("{0} число".format(val), end="", file=res, flush=True) elif (s.type_name == DateReferent.ATTR_QUARTAL): word = "квартал" wrapval788 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval788) val = wrapval788.value elif (s.type_name == DateReferent.ATTR_WEEK): word = "неделя" wrapval789 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval789) val = wrapval789.value elif (s.type_name == DateReferent.ATTR_HOUR): word = "час" wrapval790 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval790) val = wrapval790.value if (not is_local_rel): print("{0}:{1}".format("{:02d}".format(val), "{:02d}".format(self.minute)), end="", file=res, flush=True) elif (s.type_name == DateReferent.ATTR_MINUTE): word = "минута" wrapval791 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval791) val = wrapval791.value elif (s.type_name == DateReferent.ATTR_DAYOFWEEK): wrapval792 = RefOutArgWrapper(0) Utils.tryParseInt(Utils.asObjectOrNull(s.value, str), wrapval792) val = wrapval792.value if (not is_local_rel): print((DateReferent.__m_week_day_ex[val - 1] if val >= 1 and val <= 7 else "?"), end="", file=res) else: if (val < 0): val = (-val) back = True if (val >= 0 and val <= 7): print("{0} {1}".format( ((("прошлое" if back else "будущее")) if val == 7 else ((("прошлая" if back else "будущая")) if (val == 3 or val == 6) else (("прошлый" if back else "будущий")))), DateReferent.__m_week_day_ex[val - 1]), end="", file=res, flush=True) break if (word is not None and is_local_rel): if (val == 0): print("{0} {1}".format( ("текущая" if word == "неделя" or word == "минута" else "текущий"), word), end="", file=res, flush=True) elif (val > 0 and not back): print("{0} {1} вперёд".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) else: val = (-val) print("{0} {1} назад".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) elif (not is_local_rel and res.tell() == 0): print("{0} {1}".format( val, MiscHelper.get_text_morph_var_by_case_and_number_ex( word, None, MorphNumber.UNDEFINED, str(val))), end="", file=res, flush=True) if (not short_variant): DateRelHelper.append_to_string(self, res) if (from_range == 1): Utils.insertStringIO( res, 0, "{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с")))) elif (from_range == 2): Utils.insertStringIO(res, 0, ("to " if lang.is_en else "по ")) return Utils.toStringStringIO(res) if (from_range == 1): print("{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с"))), end="", file=res, flush=True) elif (from_range == 2): print(("to " if lang.is_en else "по ").format(), end="", file=res, flush=True) if (p != DatePointerType.NO): val = MetaDate.POINTER.convert_inner_value_to_outer_value( Utils.enumToString(p), lang) if (from_range == 0 or lang.is_en): pass elif (from_range == 1): if (p == DatePointerType.BEGIN): val = ("початку" if lang.is_ua else "начала") elif (p == DatePointerType.CENTER): val = ("середини" if lang.is_ua else "середины") elif (p == DatePointerType.END): val = ("кінця" if lang.is_ua else "конца") elif (p == DatePointerType.TODAY): val = ("цього часу" if lang.is_ua else "настоящего времени") elif (from_range == 2): if (p == DatePointerType.BEGIN): val = ("початок" if lang.is_ua else "начало") elif (p == DatePointerType.CENTER): val = ("середину" if lang.is_ua else "середину") elif (p == DatePointerType.END): val = ("кінець" if lang.is_ua else "конец") elif (p == DatePointerType.TODAY): val = ("теперішній час" if lang.is_ua else "настоящее время") print("{0} ".format(val), end="", file=res, flush=True) if (self.day_of_week > 0): if (lang.is_en): print("{0}, ".format( DateReferent.__m_week_day_en[self.day_of_week - 1]), end="", file=res, flush=True) else: print("{0}, ".format( DateReferent.__m_week_day[self.day_of_week - 1]), end="", file=res, flush=True) y = self.year m = self.month d = self.day cent = self.century if (y == 0 and cent != 0): is_bc = cent < 0 if (cent < 0): cent = (-cent) print(NumberHelper.get_number_roman(cent), end="", file=res) if (lang.is_ua): print(" century", end="", file=res) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print((" віка" if lang.is_ua else " века"), end="", file=res) else: print((" вік" if lang.is_ua else " век"), end="", file=res) if (is_bc): print((" до н.е." if lang.is_ua else " до н.э."), end="", file=res) return Utils.toStringStringIO(res) if (d > 0): print(d, end="", file=res) if (m > 0 and m <= 12): if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang.is_ua): print((DateReferent.__m_monthua[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0ua[m - 1]), end="", file=res) elif (lang.is_en): print(DateReferent.__m_monthen[m - 1], end="", file=res) else: print((DateReferent.__m_month[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0[m - 1]), end="", file=res) if (y != 0): is_bc = y < 0 if (y < 0): y = (-y) if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): print(' ', end="", file=res) if (lang is not None and lang.is_en): print("{0}".format(y), end="", file=res, flush=True) elif (short_variant): print("{0}{1}".format(y, ("р" if lang.is_ua else "г")), end="", file=res, flush=True) elif (m > 0 or p != DatePointerType.NO or from_range == 1): print("{0} {1}".format(y, ("року" if lang.is_ua else "года")), end="", file=res, flush=True) else: print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")), end="", file=res, flush=True) if (is_bc): print((" до н.е." if lang.is_ua else ("BC" if lang.is_en else " до н.э.")), end="", file=res) h = self.hour mi = self.minute se = self.second if (h >= 0 and mi >= 0): if (res.tell() > 0): print(' ', end="", file=res) print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)), end="", file=res, flush=True) if (se >= 0): print(":{0}".format("{:02d}".format(se)), end="", file=res, flush=True) if (res.tell() == 0): if (self.quartal != 0): print("{0}-й квартал".format(self.quartal), end="", file=res, flush=True) if (res.tell() == 0): return "?" while Utils.getCharAtStringIO( res, res.tell() - 1) == ' ' or Utils.getCharAtStringIO( res, res.tell() - 1) == ',': Utils.setLengthStringIO(res, res.tell() - 1) if (not short_variant and self.is_relative): DateRelHelper.append_to_string(self, res) return Utils.toStringStringIO(res).strip()
def toString(self, short_variant: bool, lang: 'MorphLang', lev: int = 0) -> str: res = io.StringIO() ki = self.kind str0_ = (Utils.asObjectOrNull( MetaInstrumentBlock.GLOBAL_META.kind_feature. convertInnerValueToOuterValue(Utils.enumToString(ki), lang), str)) if (str0_ is not None): print(str0_, end="", file=res) if (self.kind2 != InstrumentKind.UNDEFINED): str0_ = (Utils.asObjectOrNull( MetaInstrumentBlock.GLOBAL_META.kind_feature. convertInnerValueToOuterValue( Utils.enumToString(self.kind2), lang), str)) if (str0_ is not None): print(" ({0})".format(str0_), end="", file=res, flush=True) if (self.number > 0): if (ki == InstrumentKind.TABLE): print(" {0} строк, {1} столбцов".format( len(self.children), self.number), end="", file=res, flush=True) else: print(" №{0}".format(self.number), end="", file=res, flush=True) if (self.sub_number > 0): print(".{0}".format(self.sub_number), end="", file=res, flush=True) if (self.sub_number2 > 0): print(".{0}".format(self.sub_number2), end="", file=res, flush=True) if (self.sub_number3 > 0): print(".{0}".format(self.sub_number3), end="", file=res, flush=True) if (self.min_number > 0): for i in range(res.tell() - 1, -1, -1): if (Utils.getCharAtStringIO(res, i) == ' ' or Utils.getCharAtStringIO(res, i) == '.'): Utils.insertStringIO( res, i + 1, "{0}-".format(self.min_number)) break ignore_ref = False if (self.is_expired): print(" (утратить силу)", end="", file=res) ignore_ref = True elif (ki != InstrumentKind.EDITIONS and ki != InstrumentKind.APPROVED and (isinstance(self.ref, DecreeReferent))): print(" (*)", end="", file=res) ignore_ref = True str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_NAME) if ((str0_) is None): str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_VALUE) if (str0_ is not None): if (len(str0_) > 100): str0_ = (str0_[0:0 + 100] + "...") print(" \"{0}\"".format(str0_), end="", file=res, flush=True) elif (not ignore_ref and (isinstance(self.ref, Referent)) and (lev < 30)): print(" \"{0}\"".format( self.ref.toString(short_variant, lang, lev + 1)), end="", file=res, flush=True) return Utils.toStringStringIO(res).strip()