def _addValue(self, begin : 'Token', end : 'Token') -> None:
     tmp = io.StringIO()
     t = begin
     first_pass2889 = True
     while True:
         if first_pass2889: first_pass2889 = False
         else: t = t.next0_
         if (not (t is not None and t.previous != end)): break
         if (isinstance(t, NumberToken)): 
             print(t.getSourceText(), end="", file=tmp)
             continue
         if (isinstance(t, TextToken)): 
             s = (t).term
             if (t.isCharOf("-\\/")): 
                 s = "-"
             print(s, end="", file=tmp)
     i = 0
     while i < tmp.tell(): 
         if (Utils.getCharAtStringIO(tmp, i) == '-' and i > 0 and ((i + 1) < tmp.tell())): 
             ch0 = Utils.getCharAtStringIO(tmp, i - 1)
             ch1 = Utils.getCharAtStringIO(tmp, i + 1)
             if (str.isalnum(ch0) and str.isalnum(ch1)): 
                 if (str.isdigit(ch0) and not str.isdigit(ch1)): 
                     Utils.removeStringIO(tmp, i, 1)
                 elif (not str.isdigit(ch0) and str.isdigit(ch1)): 
                     Utils.removeStringIO(tmp, i, 1)
         i += 1
     self.addSlot(DenominationReferent.ATTR_VALUE, Utils.toStringStringIO(tmp), False, 0)
     self.__m_names = (None)
Example #2
0
 def correct_word(w: str) -> str:
     """ Откорректировать слово (перевод в верхний регистр и замена некоторых букв типа Ё->Е)
     
     Args:
         w(str): исходное слово
     
     Returns:
         str: откорректированное слово
     """
     if (w is None):
         return None
     w = w.upper()
     for ch in w:
         if (LanguageHelper.__m_rus0.find(ch) >= 0):
             tmp = io.StringIO()
             print(w, end="", file=tmp)
             i = 0
             while i < tmp.tell():
                 j = LanguageHelper.__m_rus0.find(
                     Utils.getCharAtStringIO(tmp, i))
                 if (j >= 0):
                     Utils.setCharAtStringIO(tmp, i,
                                             LanguageHelper.__m_rus1[j])
                 i += 1
             w = Utils.toStringStringIO(tmp)
             break
     if (w.find(chr(0x00AD)) >= 0):
         w = w.replace(chr(0x00AD), '-')
     if (w.startswith("АГЕНС")):
         w = ("АГЕНТС" + w[5:])
     return w
Example #3
0
 def attach_url(t0: 'Token') -> 'UriItemToken':
     srv = UriItemToken.attach_domain_name(t0, True, False)
     if (srv is None):
         return None
     txt = Utils.newStringIO(srv.value)
     t1 = srv.end_token
     if (t1.next0_ is not None and t1.next0_.is_char(':')
             and (isinstance(t1.next0_.next0_, NumberToken))):
         t1 = t1.next0_.next0_
         print(":{0}".format(t1.value), end="", file=txt, flush=True)
     elif ((srv.value == "vk.com" and t1.next0_ is not None
            and t1.next0_.is_hiphen) and t1.next0_.next0_ is not None):
         t1 = t1.next0_.next0_
         dat = UriItemToken.__attach_uri_content(t1, ".-_+%", False)
         if (dat is not None):
             t1 = dat.end_token
             print("/{0}".format(dat.value), end="", file=txt, flush=True)
     t = t1.next0_
     while t is not None:
         if (t.is_whitespace_before):
             break
         if (not t.is_char('/')):
             break
         if (t.is_whitespace_after):
             t1 = t
             break
         dat = UriItemToken.__attach_uri_content(t.next0_, ".-_+%", False)
         if (dat is None):
             t1 = t
             break
         t1 = dat.end_token
         t = t1
         print("/{0}".format(dat.value), end="", file=txt, flush=True)
         t = t.next0_
     if ((t1.next0_ is not None and t1.next0_.is_char('?')
          and not t1.next0_.is_whitespace_after)
             and not t1.is_whitespace_after):
         dat = UriItemToken.__attach_uri_content(t1.next0_.next0_,
                                                 ".-_+%=&", False)
         if (dat is not None):
             t1 = dat.end_token
             print("?{0}".format(dat.value), end="", file=txt, flush=True)
     if ((t1.next0_ is not None and t1.next0_.is_char('#')
          and not t1.next0_.is_whitespace_after)
             and not t1.is_whitespace_after):
         dat = UriItemToken.__attach_uri_content(t1.next0_.next0_, ".-_+%",
                                                 False)
         if (dat is not None):
             t1 = dat.end_token
             print("#{0}".format(dat.value), end="", file=txt, flush=True)
     i = 0
     i = 0
     while i < txt.tell():
         if (str.isalpha(Utils.getCharAtStringIO(txt, i))):
             break
         i += 1
     if (i >= txt.tell()):
         return None
     return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
Example #4
0
 def attachisbn(t0: 'Token') -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     digs = 0
     t = t0
     first_pass3417 = True
     while True:
         if first_pass3417: first_pass3417 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         if (t.is_newline_before and t != t0):
             if (t.previous is not None and t.previous.is_hiphen):
                 pass
             else:
                 break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             if (nt.typ != NumberSpellingType.DIGIT
                     or not nt.morph.class0_.is_undefined):
                 break
             d = nt.get_source_text()
             print(d, end="", file=txt)
             digs += len(d)
             t1 = t
             if (digs > 13):
                 break
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         s = tt.term
         if (s != "-" and s != "Х" and s != "X"):
             break
         if (s == "Х"):
             s = "X"
         print(s, end="", file=txt)
         t1 = t
         if (s != "-"):
             break
     i = 0
     dig = 0
     i = 0
     while i < txt.tell():
         if (str.isdigit(Utils.getCharAtStringIO(txt, i))):
             dig += 1
         i += 1
     if (dig < 7):
         return None
     return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
Example #5
0
 def attachbbk(t0: 'Token') -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     digs = 0
     t = t0
     first_pass3418 = True
     while True:
         if first_pass3418: first_pass3418 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t != t0):
             break
         if (t.is_table_control_char):
             break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             if (nt.typ != NumberSpellingType.DIGIT
                     or not nt.morph.class0_.is_undefined):
                 break
             d = nt.get_source_text()
             print(d, end="", file=txt)
             digs += len(d)
             t1 = t
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         if (tt.is_char(',')):
             break
         if (tt.is_char('(')):
             if (not (isinstance(tt.next0_, NumberToken))):
                 break
         s = tt.get_source_text()
         if (str.isalpha(s[0])):
             if (tt.is_whitespace_before):
                 break
         print(s, end="", file=txt)
         t1 = t
     if ((txt.tell() < 3) or (digs < 2)):
         return None
     if (Utils.getCharAtStringIO(txt, txt.tell() - 1) == '.'):
         Utils.setLengthStringIO(txt, txt.tell() - 1)
         t1 = t1.previous
     return UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
Example #6
0
 def to_string(self,
               short_variant: bool,
               lang: 'MorphLang' = None,
               lev: int = 0) -> str:
     res = Utils.newStringIO(self.template)
     vals = list()
     for s in self.slots:
         if (s.type_name == MeasureReferent.ATTR_VALUE):
             if (isinstance(s.value, str)):
                 val = Utils.asObjectOrNull(s.value, str)
                 if (val == "NaN"):
                     val = "?"
                 vals.append(val)
             elif (isinstance(s.value, Referent)):
                 vals.append(s.value.to_string(True, lang, 0))
     for i in range(res.tell() - 1, -1, -1):
         ch = Utils.getCharAtStringIO(res, i)
         if (not str.isdigit(ch)):
             continue
         j = ((ord(ch)) - (ord('1')))
         if ((j < 0) or j >= len(vals)):
             continue
         Utils.removeStringIO(res, i, 1)
         Utils.insertStringIO(res, i, vals[j])
     print(self.out_units(lang), end="", file=res)
     if (not short_variant):
         nam = self.get_string_value(MeasureReferent.ATTR_NAME)
         if (nam is not None):
             print(" - {0}".format(nam), end="", file=res, flush=True)
         for s in self.slots:
             if (s.type_name == MeasureReferent.ATTR_REF
                     and (isinstance(s.value, MeasureReferent))):
                 print(" / {0}".format(s.value.to_string(True, lang, 0)),
                       end="",
                       file=res,
                       flush=True)
         ki = self.kind
         if (ki != MeasureKind.UNDEFINED):
             print(" ({0})".format(Utils.enumToString(ki).upper()),
                   end="",
                   file=res,
                   flush=True)
     return Utils.toStringStringIO(res)
Example #7
0
 def toString(self, short_variant : bool, lang : 'MorphLang'=None, lev : int=0) -> str:
     res = Utils.newStringIO(self.template)
     vals = list()
     for s in self.slots: 
         if (s.type_name == MeasureReferent.ATTR_VALUE): 
             if (isinstance(s.value, str)): 
                 vals.append(Utils.asObjectOrNull(s.value, str))
             elif (isinstance(s.value, Referent)): 
                 vals.append((s.value).toString(True, lang, 0))
     for i in range(res.tell() - 1, -1, -1):
         ch = Utils.getCharAtStringIO(res, i)
         if (not str.isdigit(ch)): 
             continue
         j = ((ord(ch)) - (ord('1')))
         if ((j < 0) or j >= len(vals)): 
             continue
         Utils.removeStringIO(res, i, 1)
         Utils.insertStringIO(res, i, vals[j])
     uu = self.units
     if (len(uu) > 0): 
         print(uu[0].toString(True, lang, 0), end="", file=res)
         i = 1
         while i < len(uu): 
             pow0_ = uu[i].getStringValue(UnitReferent.ATTR_POW)
             if (not Utils.isNullOrEmpty(pow0_) and pow0_[0] == '-'): 
                 print("/{0}".format(uu[i].toString(True, lang, 1)), end="", file=res, flush=True)
                 if (pow0_ != "-1"): 
                     print("<{0}>".format(pow0_[1:]), end="", file=res, flush=True)
             else: 
                 print("*{0}".format(uu[i].toString(True, lang, 0)), end="", file=res, flush=True)
             i += 1
     if (not short_variant): 
         nam = self.getStringValue(MeasureReferent.ATTR_NAME)
         if (nam is not None): 
             print(" - {0}".format(nam), end="", file=res, flush=True)
         for s in self.slots: 
             if (s.type_name == MeasureReferent.ATTR_REF and (isinstance(s.value, MeasureReferent))): 
                 print(" / {0}".format((s.value).toString(True, lang, 0)), end="", file=res, flush=True)
         ki = self.kind
         if (ki != MeasureKind.UNDEFINED): 
             print(" ({0})".format(Utils.enumToString(ki).upper()), end="", file=res, flush=True)
     return Utils.toStringStringIO(res)
 def correct_word(w: str) -> str:
     if (w is None):
         return None
     res = w.upper()
     for ch in res:
         if (LanguageHelper.__m_rus0.find(ch) >= 0):
             tmp = io.StringIO()
             print(res, end="", file=tmp)
             i = 0
             while i < tmp.tell():
                 j = LanguageHelper.__m_rus0.find(
                     Utils.getCharAtStringIO(tmp, i))
                 if (j >= 0):
                     Utils.setCharAtStringIO(tmp, i,
                                             LanguageHelper.__m_rus1[j])
                 i += 1
             res = Utils.toStringStringIO(tmp)
             break
     if (res.find(chr(0x00AD)) >= 0):
         res = res.replace(chr(0x00AD), '-')
     if (res.startswith("АГЕНС")):
         res = ("АГЕНТС" + res[5:])
     return res
Example #9
0
 def attach_domain_name(t0: 'Token', check_: bool,
                        can_be_whitspaces: bool) -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     ip_count = 0
     is_ip = True
     t = t0
     first_pass3413 = True
     while True:
         if first_pass3413: first_pass3413 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_whitespace_before and t != t0):
             ok = False
             if (not t.is_newline_before and can_be_whitspaces):
                 tt1 = t
                 first_pass3414 = True
                 while True:
                     if first_pass3414: first_pass3414 = False
                     else: tt1 = tt1.next0_
                     if (not (tt1 is not None)): break
                     if (tt1.is_char('.') or tt1.is_hiphen):
                         continue
                     if (tt1.is_whitespace_before):
                         if (tt1.is_newline_before):
                             break
                         if (tt1.previous is not None
                                 and ((tt1.previous.is_char('.')
                                       or tt1.previous.is_hiphen))):
                             pass
                         else:
                             break
                     if (not (isinstance(tt1, TextToken))):
                         break
                     if (UriItemToken.__m_std_groups.try_parse(
                             tt1, TerminParseAttr.NO) is not None):
                         ok = True
                         break
                     if (not tt1.chars.is_latin_letter):
                         break
             if (not ok):
                 break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             if (nt.int_value is None):
                 break
             print(nt.get_source_text(), end="", file=txt)
             t1 = t
             if (nt.typ == NumberSpellingType.DIGIT and nt.int_value >= 0
                     and (nt.int_value < 256)):
                 ip_count += 1
             else:
                 is_ip = False
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         src = tt.term
         ch = src[0]
         if (not str.isalpha(ch)):
             if (".-_".find(ch) < 0):
                 break
             if (ch != '.'):
                 is_ip = False
             if (ch == '-'):
                 if (Utils.compareStrings(Utils.toStringStringIO(txt),
                                          "vk.com", True) == 0):
                     return UriItemToken._new2706(
                         t0, t1,
                         Utils.toStringStringIO(txt).lower())
         else:
             is_ip = False
         print(src.lower(), end="", file=txt)
         t1 = t
     if (txt.tell() == 0):
         return None
     if (ip_count != 4):
         is_ip = False
     i = 0
     points = 0
     i = 0
     while i < txt.tell():
         if (Utils.getCharAtStringIO(txt, i) == '.'):
             if (i == 0):
                 return None
             if (i >= (txt.tell() - 1)):
                 Utils.setLengthStringIO(txt, txt.tell() - 1)
                 t1 = t1.previous
                 break
             if (Utils.getCharAtStringIO(txt, i - 1) == '.'
                     or Utils.getCharAtStringIO(txt, i + 1) == '.'):
                 return None
             points += 1
         i += 1
     if (points == 0):
         return None
     uri_ = Utils.toStringStringIO(txt)
     if (check_):
         ok = is_ip
         if (not is_ip):
             if (Utils.toStringStringIO(txt) == "localhost"):
                 ok = True
         if (not ok and t1.previous is not None
                 and t1.previous.is_char('.')):
             if (UriItemToken.__m_std_groups.try_parse(
                     t1, TerminParseAttr.NO) is not None):
                 ok = True
         if (not ok):
             return None
     return UriItemToken._new2706(t0, t1,
                                  Utils.toStringStringIO(txt).lower())
Example #10
0
 def __try_attach_(self, pli : typing.List['PhoneItemToken'], ind : int, is_phone_before : bool, prev_phone : 'PhoneReferent', lev : int=0) -> 'ReferentToken':
     if (ind >= len(pli) or lev > 4): 
         return None
     country_code = None
     city_code = None
     j = ind
     if (prev_phone is not None and prev_phone._m_template is not None and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
         tmp = io.StringIO()
         jj = j
         first_pass3391 = True
         while True:
             if first_pass3391: first_pass3391 = False
             else: jj += 1
             if (not (jj < len(pli))): break
             if (pli[jj].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
                 print(len(pli[jj].value), end="", file=tmp)
             elif (pli[jj].item_type == PhoneItemToken.PhoneItemType.DELIM): 
                 if (pli[jj].value == " "): 
                     break
                 print(pli[jj].value, end="", file=tmp)
                 continue
             else: 
                 break
             templ0 = Utils.toStringStringIO(tmp)
             if (templ0 == prev_phone._m_template): 
                 if ((jj + 1) < len(pli)): 
                     if (pli[jj + 1].item_type == PhoneItemToken.PhoneItemType.PREFIX and (jj + 2) == len(pli)): 
                         pass
                     else: 
                         del pli[jj + 1:jj + 1+len(pli) - jj - 1]
                 break
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE): 
         country_code = pli[j].value
         if (country_code != "8"): 
             cc = PhoneHelper.get_country_prefix(country_code)
             if (cc is not None and (len(cc) < len(country_code))): 
                 city_code = country_code[len(cc):]
                 country_code = cc
         j += 1
     elif ((j < len(pli)) and pli[j].can_be_country_prefix): 
         k = j + 1
         if ((k < len(pli)) and pli[k].item_type == PhoneItemToken.PhoneItemType.DELIM): 
             k += 1
         rrt = self.__try_attach_(pli, k, is_phone_before, None, lev + 1)
         if (rrt is not None): 
             if ((((is_phone_before and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM and pli[j + 1].begin_token.is_hiphen) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and len(pli[j].value) == 3) and ((j + 2) < len(pli)) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER) and len(pli[j + 2].value) == 3): 
                 pass
             else: 
                 country_code = pli[j].value
                 j += 1
     if (((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and ((pli[j].value[0] == '8' or pli[j].value[0] == '7'))) and country_code is None): 
         if (len(pli[j].value) == 1): 
             country_code = pli[j].value
             j += 1
         elif (len(pli[j].value) == 4): 
             country_code = pli[j].value[0:0+1]
             if (city_code is None): 
                 city_code = pli[j].value[1:]
             else: 
                 city_code += pli[j].value[1:]
             j += 1
         elif (len(pli[j].value) == 11 and j == (len(pli) - 1) and is_phone_before): 
             ph0 = PhoneReferent()
             if (pli[j].value[0] != '8'): 
                 ph0.country_code = pli[j].value[0:0+1]
             ph0.number = pli[j].value[1:1+3] + pli[j].value[4:]
             return ReferentToken(ph0, pli[0].begin_token, pli[j].end_token)
         elif (city_code is None and len(pli[j].value) > 3 and ((j + 1) < len(pli))): 
             sum0_ = 0
             for it in pli: 
                 if (it.item_type == PhoneItemToken.PhoneItemType.NUMBER): 
                     sum0_ += len(it.value)
             if (sum0_ == 11): 
                 city_code = pli[j].value[1:]
                 j += 1
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.CITYCODE): 
         if (city_code is None): 
             city_code = pli[j].value
         else: 
             city_code += pli[j].value
         j += 1
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): 
         j += 1
     if ((country_code == "8" and city_code is None and ((j + 3) < len(pli))) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
         if (len(pli[j].value) == 3 or len(pli[j].value) == 4): 
             city_code = pli[j].value
             j += 1
             if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): 
                 j += 1
     normal_num_len = 0
     if (country_code == "421"): 
         normal_num_len = 9
     num = io.StringIO()
     templ = io.StringIO()
     part_length = list()
     delim = None
     ok = False
     additional = None
     std = False
     if (country_code is not None and ((j + 4) < len(pli)) and j > 0): 
         if (((((pli[j - 1].value == "-" or pli[j - 1].item_type == PhoneItemToken.PhoneItemType.COUNTRYCODE)) and pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 2].item_type == PhoneItemToken.PhoneItemType.NUMBER and pli[j + 3].item_type == PhoneItemToken.PhoneItemType.DELIM) and pli[j + 4].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
             if ((((len(pli[j].value) + len(pli[j + 2].value)) == 6 or ((len(pli[j].value) == 4 and len(pli[j + 2].value) == 5)))) and ((len(pli[j + 4].value) == 4 or len(pli[j + 4].value) == 1))): 
                 print(pli[j].value, end="", file=num)
                 print(pli[j + 2].value, end="", file=num)
                 print(pli[j + 4].value, end="", file=num)
                 print("{0}{1}{2}{3}{4}".format(len(pli[j].value), pli[j + 1].value, len(pli[j + 2].value), pli[j + 3].value, len(pli[j + 4].value)), end="", file=templ, flush=True)
                 std = True
                 ok = True
                 j += 5
     first_pass3392 = True
     while True:
         if first_pass3392: first_pass3392 = False
         else: j += 1
         if (not (j < len(pli))): break
         if (std): 
             break
         if (pli[j].item_type == PhoneItemToken.PhoneItemType.DELIM): 
             if (pli[j].is_in_brackets): 
                 continue
             if (j > 0 and pli[j - 1].is_in_brackets): 
                 continue
             if (templ.tell() > 0): 
                 print(pli[j].value, end="", file=templ)
             if (delim is None): 
                 delim = pli[j].value
             elif (pli[j].value != delim): 
                 if ((len(part_length) == 2 and ((part_length[0] == 3 or part_length[0] == 4)) and city_code is None) and part_length[1] == 3): 
                     city_code = Utils.toStringStringIO(num)[0:0+part_length[0]]
                     Utils.removeStringIO(num, 0, part_length[0])
                     del part_length[0]
                     delim = pli[j].value
                     continue
                 if (is_phone_before and ((j + 1) < len(pli)) and pli[j + 1].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
                     if (num.tell() < 6): 
                         continue
                     if (normal_num_len > 0 and (num.tell() + len(pli[j + 1].value)) == normal_num_len): 
                         continue
                 break
             else: 
                 continue
             ok = False
         elif (pli[j].item_type == PhoneItemToken.PhoneItemType.NUMBER): 
             if (num.tell() == 0 and pli[j].begin_token.previous is not None and pli[j].begin_token.previous.is_table_control_char): 
                 tt = pli[len(pli) - 1].end_token.next0_
                 if (tt is not None and tt.is_char_of(",.")): 
                     tt = tt.next0_
                 if (isinstance(tt, NumberToken)): 
                     return None
             if ((num.tell() + len(pli[j].value)) > 13): 
                 if (j > 0 and pli[j - 1].item_type == PhoneItemToken.PhoneItemType.DELIM): 
                     j -= 1
                 ok = True
                 break
             print(pli[j].value, end="", file=num)
             part_length.append(len(pli[j].value))
             print(len(pli[j].value), end="", file=templ)
             ok = True
             if (num.tell() > 10): 
                 j += 1
                 if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): 
                     additional = pli[j].value
                     j += 1
                 break
         elif (pli[j].item_type == PhoneItemToken.PhoneItemType.ADDNUMBER): 
             additional = pli[j].value
             j += 1
             break
         else: 
             break
     if ((j == (len(pli) - 1) and pli[j].is_in_brackets and ((len(pli[j].value) == 3 or len(pli[j].value) == 4))) and additional is None): 
         additional = pli[j].value
         j += 1
     if ((j < len(pli)) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[j].is_in_brackets): 
         is_phone_before = True
         j += 1
     if ((country_code is None and city_code is not None and len(city_code) > 3) and (num.tell() < 8) and city_code[0] != '8'): 
         if ((len(city_code) + num.tell()) == 10): 
             pass
         else: 
             cc = PhoneHelper.get_country_prefix(city_code)
             if (cc is not None): 
                 if (len(cc) > 1 and (len(city_code) - len(cc)) > 1): 
                     country_code = cc
                     city_code = city_code[len(cc):]
     if (country_code is None and city_code is not None and city_code.startswith("00")): 
         cc = PhoneHelper.get_country_prefix(city_code[2:])
         if (cc is not None): 
             if (len(city_code) > (len(cc) + 3)): 
                 country_code = cc
                 city_code = city_code[len(cc) + 2:]
     if (num.tell() == 0 and city_code is not None): 
         if (len(city_code) == 10): 
             print(city_code[3:], end="", file=num)
             part_length.append(num.tell())
             city_code = city_code[0:0+3]
             ok = True
         elif (((len(city_code) == 9 or len(city_code) == 11 or len(city_code) == 8)) and ((is_phone_before or country_code is not None))): 
             print(city_code, end="", file=num)
             part_length.append(num.tell())
             city_code = (None)
             ok = True
     if (num.tell() < 4): 
         ok = False
     if (num.tell() < 7): 
         if (city_code is not None and (len(city_code) + num.tell()) > 7): 
             if (not is_phone_before and len(city_code) == 3): 
                 ii = 0
                 ii = 0
                 while ii < len(part_length): 
                     if (part_length[ii] == 3): 
                         pass
                     elif (part_length[ii] > 3): 
                         break
                     elif ((ii < (len(part_length) - 1)) or (part_length[ii] < 2)): 
                         break
                     ii += 1
                 if (ii >= len(part_length)): 
                     if (country_code == "61"): 
                         pass
                     else: 
                         ok = False
         elif (((num.tell() == 6 or num.tell() == 5)) and ((len(part_length) >= 1 and len(part_length) <= 3)) and is_phone_before): 
             if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].kind == PhoneKind.HOME): 
                 ok = False
         elif (prev_phone is not None and prev_phone.number is not None and ((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)))): 
             pass
         elif (num.tell() > 4 and prev_phone is not None and Utils.toStringStringIO(templ) == prev_phone._m_template): 
             ok = True
         else: 
             ok = False
     if (delim == "." and country_code is None and city_code is None): 
         ok = False
     if ((is_phone_before and country_code is None and city_code is None) and num.tell() > 10): 
         cc = PhoneHelper.get_country_prefix(Utils.toStringStringIO(num))
         if (cc is not None): 
             if ((num.tell() - len(cc)) == 9): 
                 country_code = cc
                 Utils.removeStringIO(num, 0, len(cc))
                 ok = True
     if (ok): 
         if (std): 
             pass
         elif (prev_phone is not None and prev_phone.number is not None and (((len(prev_phone.number) == num.tell() or len(prev_phone.number) == (num.tell() + 3) or len(prev_phone.number) == (num.tell() + 4)) or prev_phone._m_template == Utils.toStringStringIO(templ)))): 
             pass
         elif ((len(part_length) == 3 and part_length[0] == 3 and part_length[1] == 2) and part_length[2] == 2): 
             pass
         elif (len(part_length) == 3 and is_phone_before): 
             pass
         elif ((len(part_length) == 4 and (((part_length[0] + part_length[1]) == 3)) and part_length[2] == 2) and part_length[3] == 2): 
             pass
         elif ((len(part_length) == 4 and part_length[0] == 3 and part_length[1] == 3) and part_length[2] == 2 and part_length[3] == 2): 
             pass
         elif (len(part_length) == 5 and (part_length[1] + part_length[2]) == 4 and (part_length[3] + part_length[4]) == 4): 
             pass
         elif (len(part_length) > 4): 
             ok = False
         elif (len(part_length) > 3 and city_code is not None): 
             ok = False
         elif ((is_phone_before or city_code is not None or country_code is not None) or additional is not None): 
             ok = True
         else: 
             ok = False
             if (((num.tell() == 6 or num.tell() == 7)) and (len(part_length) < 4) and j > 0): 
                 next_ph = self.__get_next_phone(pli[j - 1].end_token.next0_, lev + 1)
                 if (next_ph is not None): 
                     d = len(next_ph.number) - num.tell()
                     if (d == 0 or d == 3 or d == 4): 
                         ok = True
     end = (pli[j - 1].end_token if j > 0 else None)
     if (end is None): 
         ok = False
     if ((ok and city_code is None and country_code is None) and prev_phone is None and not is_phone_before): 
         if (not end.is_whitespace_after and end.next0_ is not None): 
             tt = end.next0_
             if (tt.is_char_of(".,)") and tt.next0_ is not None): 
                 tt = tt.next0_
             if (not tt.is_whitespace_before): 
                 ok = False
     if (not ok): 
         return None
     if (templ.tell() > 0 and not str.isdigit(Utils.getCharAtStringIO(templ, templ.tell() - 1))): 
         Utils.setLengthStringIO(templ, templ.tell() - 1)
     if ((country_code is None and city_code is not None and len(city_code) > 3) and num.tell() > 6): 
         cc = PhoneHelper.get_country_prefix(city_code)
         if (cc is not None and ((len(cc) + 1) < len(city_code))): 
             country_code = cc
             city_code = city_code[len(cc):]
     if (pli[0].begin_token.previous is not None): 
         if (pli[0].begin_token.previous.is_value("ГОСТ", None) or pli[0].begin_token.previous.is_value("ТУ", None)): 
             return None
     ph = PhoneReferent()
     if (country_code is not None): 
         ph.country_code = country_code
     number = Utils.toStringStringIO(num)
     if ((city_code is None and num.tell() > 7 and len(part_length) > 0) and (part_length[0] < 5)): 
         city_code = number[0:0+part_length[0]]
         number = number[part_length[0]:]
     if (city_code is None and num.tell() == 11 and Utils.getCharAtStringIO(num, 0) == '8'): 
         city_code = number[1:1+3]
         number = number[4:]
     if (city_code is None and num.tell() == 10): 
         city_code = number[0:0+3]
         number = number[3:]
     if (city_code is not None): 
         number = (city_code + number)
     elif (country_code is None and prev_phone is not None): 
         ok1 = False
         if (len(prev_phone.number) >= (len(number) + 2)): 
             ok1 = True
         elif (templ.tell() > 0 and prev_phone._m_template is not None and LanguageHelper.ends_with(prev_phone._m_template, Utils.toStringStringIO(templ))): 
             ok1 = True
         if (ok1 and len(prev_phone.number) > len(number)): 
             number = (prev_phone.number[0:0+len(prev_phone.number) - len(number)] + number)
     if (ph.country_code is None and prev_phone is not None and prev_phone.country_code is not None): 
         if (len(prev_phone.number) == len(number)): 
             ph.country_code = prev_phone.country_code
     ok = False
     for d in number: 
         if (d != '0'): 
             ok = True
             break
     if (not ok): 
         return None
     if (country_code is not None): 
         if (len(number) < 7): 
             return None
     else: 
         s = PhoneHelper.get_country_prefix(number)
         if (s is not None): 
             num2 = number[len(s):]
             if (len(num2) >= 10 and len(num2) <= 11): 
                 number = num2
                 if (s != "7"): 
                     ph.country_code = s
         if (len(number) == 8 and prev_phone is None): 
             return None
     if (len(number) > 11): 
         if ((len(number) < 14) and ((country_code == "1" or country_code == "43"))): 
             pass
         else: 
             return None
     ph.number = number
     if (additional is not None): 
         ph.add_slot(PhoneReferent.ATTR_ADDNUMBER, additional, True, 0)
     if (not is_phone_before and end.next0_ is not None and not end.is_newline_after): 
         if (end.next0_.is_char_of("+=") or end.next0_.is_hiphen): 
             return None
     if (country_code is not None and country_code == "7"): 
         if (len(number) != 10): 
             return None
     ph._m_template = Utils.toStringStringIO(templ)
     if (j == (len(pli) - 1) and pli[j].item_type == PhoneItemToken.PhoneItemType.PREFIX and not pli[j].is_newline_before): 
         end = pli[j].end_token
         if (pli[j].kind != PhoneKind.UNDEFINED): 
             ph.kind = pli[j].kind
     res = ReferentToken(ph, pli[0].begin_token, end)
     if (pli[0].item_type == PhoneItemToken.PhoneItemType.PREFIX and pli[0].end_token.next0_.is_table_control_char): 
         res.begin_token = pli[1].begin_token
     return res
Example #11
0
 def get_variants(rus_or_lat: str) -> typing.List[str]:
     res = list()
     if (Utils.isNullOrEmpty(rus_or_lat)):
         return res
     rus_or_lat = rus_or_lat.upper()
     is_rus = LanguageHelper.is_cyrillic_char(rus_or_lat[0])
     stack = list()
     i = 0
     i = 0
     while i < len(rus_or_lat):
         li = list()
         maxlen = 0
         for a in RusLatAccord.__get_accords():
             pref = None
             if (is_rus and len(a.rus) > 0):
                 pref = a.rus
             elif (not is_rus and len(a.lat) > 0):
                 pref = a.lat
             else:
                 continue
             if (len(pref) < maxlen):
                 continue
             if (not RusLatAccord.__is_pref(rus_or_lat, i, pref)):
                 continue
             if (a.on_tail):
                 if ((len(pref) + i) < len(rus_or_lat)):
                     continue
             if (len(pref) > maxlen):
                 maxlen = len(pref)
                 li.clear()
             li.append(a)
         if (len(li) == 0 or maxlen == 0):
             return res
         stack.append(li)
         i += (maxlen - 1)
         i += 1
     if (len(stack) == 0):
         return res
     ind = list()
     i = 0
     while i < len(stack):
         ind.append(0)
         i += 1
     tmp = io.StringIO()
     while True:
         Utils.setLengthStringIO(tmp, 0)
         i = 0
         while i < len(ind):
             a = stack[i][ind[i]]
             print((a.lat if is_rus else a.rus), end="", file=tmp)
             i += 1
         ok = True
         if (not is_rus):
             i = 0
             while i < tmp.tell():
                 if (Utils.getCharAtStringIO(tmp, i) == 'Й'):
                     if (i == 0):
                         ok = False
                         break
                     if (not LanguageHelper.is_cyrillic_vowel(
                             Utils.getCharAtStringIO(tmp, i - 1))):
                         ok = False
                         break
                 i += 1
         if (ok):
             res.append(Utils.toStringStringIO(tmp))
         for i in range(len(ind) - 1, -1, -1):
             ind[i] += 1
             if (ind[i] < len(stack[i])):
                 break
             else:
                 ind[i] = 0
         else:
             i = -1
         if (i < 0):
             break
     return res
Example #12
0
 def transliteral_correction(value: str,
                             prev_value: str,
                             always: bool = False) -> str:
     """ Транслитеральная корректировка
     
     Args:
         value(str): 
         prev_value(str): 
         always(bool): 
     
     """
     pure_cyr = 0
     pure_lat = 0
     ques_cyr = 0
     ques_lat = 0
     udar_cyr = 0
     y = False
     udaren = False
     i = 0
     first_pass2897 = True
     while True:
         if first_pass2897: first_pass2897 = False
         else: i += 1
         if (not (i < len(value))): break
         ch = value[i]
         ui = UnicodeInfo.ALL_CHARS[ord(ch)]
         if (not ui.is_letter):
             if (ui.is_udaren):
                 udaren = True
                 continue
             if (ui.is_apos and len(value) > 2):
                 return LanguageHelper.transliteral_correction(
                     value.replace("{0}".format(ch), ""), prev_value, False)
             return value
         if (ui.is_cyrillic):
             if (LanguageHelper._m_cyr_chars.find(ch) >= 0):
                 ques_cyr += 1
             else:
                 pure_cyr += 1
         elif (ui.is_latin):
             if (LanguageHelper._m_lat_chars.find(ch) >= 0):
                 ques_lat += 1
             else:
                 pure_lat += 1
         elif (LanguageHelper.__m_udar_chars.find(ch) >= 0):
             udar_cyr += 1
         else:
             return value
         if (ch == 'Ь' and ((i + 1) < len(value)) and value[i + 1] == 'I'):
             y = True
     to_rus = False
     to_lat = False
     if (pure_lat > 0 and pure_cyr > 0):
         return value
     if (((pure_lat > 0 or always)) and ques_cyr > 0):
         to_lat = True
     elif (((pure_cyr > 0 or always)) and ques_lat > 0):
         to_rus = True
     elif (pure_cyr == 0 and pure_lat == 0):
         if (ques_cyr > 0 and ques_lat > 0):
             if (not Utils.isNullOrEmpty(prev_value)):
                 if (LanguageHelper.is_cyrillic_char(prev_value[0])):
                     to_rus = True
                 elif (LanguageHelper.is_latin_char(prev_value[0])):
                     to_lat = True
             if (not to_lat and not to_rus):
                 if (ques_cyr > ques_lat):
                     to_rus = True
                 elif (ques_cyr < ques_lat):
                     to_lat = True
     if (not to_rus and not to_lat):
         if (not y and not udaren and udar_cyr == 0):
             return value
     tmp = Utils.newStringIO(value)
     i = 0
     first_pass2898 = True
     while True:
         if first_pass2898: first_pass2898 = False
         else: i += 1
         if (not (i < tmp.tell())): break
         if (Utils.getCharAtStringIO(tmp, i) == 'Ь'
                 and ((i + 1) < tmp.tell())
                 and Utils.getCharAtStringIO(tmp, i + 1) == 'I'):
             Utils.setCharAtStringIO(tmp, i, 'Ы')
             Utils.removeStringIO(tmp, i + 1, 1)
             continue
         cod = ord(Utils.getCharAtStringIO(tmp, i))
         if (cod >= 0x300 and (cod < 0x370)):
             Utils.removeStringIO(tmp, i, 1)
             continue
         if (to_rus):
             ii = LanguageHelper._m_lat_chars.find(
                 Utils.getCharAtStringIO(tmp, i))
             if (ii >= 0):
                 Utils.setCharAtStringIO(tmp, i,
                                         LanguageHelper._m_cyr_chars[ii])
             else:
                 ii = LanguageHelper.__m_udar_chars.find(
                     Utils.getCharAtStringIO(tmp, i))
                 if (((ii)) >= 0):
                     Utils.setCharAtStringIO(
                         tmp, i, LanguageHelper.__m_udar_cyr_chars[ii])
         elif (to_lat):
             ii = LanguageHelper._m_cyr_chars.find(
                 Utils.getCharAtStringIO(tmp, i))
             if (ii >= 0):
                 Utils.setCharAtStringIO(tmp, i,
                                         LanguageHelper._m_lat_chars[ii])
         else:
             ii = LanguageHelper.__m_udar_chars.find(
                 Utils.getCharAtStringIO(tmp, i))
             if (ii >= 0):
                 Utils.setCharAtStringIO(
                     tmp, i, LanguageHelper.__m_udar_cyr_chars[ii])
     return Utils.toStringStringIO(tmp)
Example #13
0
 def getNameEx(begin: 'Token',
               end: 'Token',
               cla: 'MorphClass',
               mc: 'MorphCase',
               gender: 'MorphGender' = MorphGender.UNDEFINED,
               ignore_brackets_and_hiphens: bool = False,
               ignore_geo_referent: bool = False) -> str:
     if (end is None or begin is None):
         return None
     if (begin.end_char > end.begin_char and begin != end):
         return None
     res = io.StringIO()
     prefix = None
     t = begin
     first_pass2809 = True
     while True:
         if first_pass2809: first_pass2809 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= end.end_char)): break
         if (res.tell() > 1000):
             break
         if (t.is_table_control_char):
             continue
         if (ignore_brackets_and_hiphens):
             if (BracketHelper.isBracket(t, False)):
                 if (t == end):
                     break
                 if (t.isCharOf("(<[")):
                     br = BracketHelper.tryParse(t, BracketParseAttr.NO,
                                                 100)
                     if (br is not None and br.end_char <= end.end_char):
                         tmp = ProperNameHelper.getNameEx(
                             br.begin_token.next0_, br.end_token.previous,
                             MorphClass.UNDEFINED, MorphCase.UNDEFINED,
                             MorphGender.UNDEFINED,
                             ignore_brackets_and_hiphens, False)
                         if (tmp is not None):
                             if ((br.end_char == end.end_char
                                  and br.begin_token.next0_
                                  == br.end_token.previous and
                                  not br.begin_token.next0_.chars.is_letter)
                                     and not ((isinstance(
                                         br.begin_token.next0_,
                                         ReferentToken)))):
                                 pass
                             else:
                                 print(" {0}{1}{2}".format(
                                     t.getSourceText(), tmp,
                                     br.end_token.getSourceText()),
                                       end="",
                                       file=res,
                                       flush=True)
                         t = br.end_token
                 continue
             if (t.is_hiphen):
                 if (t == end):
                     break
                 elif (t.is_whitespace_before or t.is_whitespace_after):
                     continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (not ignore_brackets_and_hiphens):
                 if ((tt.next0_ is not None and tt.next0_.is_hiphen and
                      (isinstance(tt.next0_.next0_, TextToken)))
                         and tt != end and tt.next0_ != end):
                     if (prefix is None):
                         prefix = tt.term
                     else:
                         prefix = "{0}-{1}".format(prefix, tt.term)
                     t = tt.next0_
                     if (t == end):
                         break
                     else:
                         continue
             s = None
             if (cla.value != (0) or not mc.is_undefined
                     or gender != MorphGender.UNDEFINED):
                 for wff in tt.morph.items:
                     wf = Utils.asObjectOrNull(wff, MorphWordForm)
                     if (wf is None):
                         continue
                     if (cla.value != (0)):
                         if ((((wf.class0_.value) & (cla.value))) == 0):
                             continue
                     if (not mc.is_undefined):
                         if (((wf.case_) & mc).is_undefined):
                             continue
                     if (gender != MorphGender.UNDEFINED):
                         if ((((wf.gender) &
                               (gender))) == (MorphGender.UNDEFINED)):
                             continue
                     if (s is None or wf.normal_case == tt.term):
                         s = wf.normal_case
                 if (s is None and gender != MorphGender.UNDEFINED):
                     for wff in tt.morph.items:
                         wf = Utils.asObjectOrNull(wff, MorphWordForm)
                         if (wf is None):
                             continue
                         if (cla.value != (0)):
                             if ((((wf.class0_.value) & (cla.value))) == 0):
                                 continue
                         if (not mc.is_undefined):
                             if (((wf.case_) & mc).is_undefined):
                                 continue
                         if (s is None or wf.normal_case == tt.term):
                             s = wf.normal_case
             if (s is None):
                 s = tt.term
                 if (tt.chars.is_last_lower and tt.length_char > 2):
                     s = tt.getSourceText()
                     for i in range(len(s) - 1, -1, -1):
                         if (str.isupper(s[i])):
                             s = s[0:0 + i + 1]
                             break
             if (prefix is not None):
                 delim = "-"
                 if (ignore_brackets_and_hiphens):
                     delim = " "
                 s = "{0}{1}{2}".format(prefix, delim, s)
             prefix = (None)
             if (res.tell() > 0 and len(s) > 0):
                 if (str.isalnum(s[0])):
                     ch0 = Utils.getCharAtStringIO(res, res.tell() - 1)
                     if (ch0 == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 elif (not ignore_brackets_and_hiphens
                       and BracketHelper.canBeStartOfSequence(
                           tt, False, False)):
                     print(' ', end="", file=res)
             print(s, end="", file=res)
         elif (isinstance(t, NumberToken)):
             if (res.tell() > 0):
                 if (not t.is_whitespace_before and Utils.getCharAtStringIO(
                         res,
                         res.tell() - 1) == '-'):
                     pass
                 else:
                     print(' ', end="", file=res)
             nt = Utils.asObjectOrNull(t, NumberToken)
             if ((t.morph.class0_.is_adjective
                  and nt.typ == NumberSpellingType.WORDS
                  and nt.begin_token == nt.end_token)
                     and (isinstance(nt.begin_token, TextToken))):
                 print((nt.begin_token).term, end="", file=res)
             else:
                 print(nt.value, end="", file=res)
         elif (isinstance(t, MetaToken)):
             if ((ignore_geo_referent and t != begin
                  and t.getReferent() is not None)
                     and t.getReferent().type_name == "GEO"):
                 continue
             s = ProperNameHelper.getNameEx(
                 (t).begin_token, (t).end_token, cla, mc, gender,
                 ignore_brackets_and_hiphens, ignore_geo_referent)
             if (not Utils.isNullOrEmpty(s)):
                 if (res.tell() > 0):
                     if (not t.is_whitespace_before
                             and Utils.getCharAtStringIO(
                                 res,
                                 res.tell() - 1) == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 print(s, end="", file=res)
         if (t == end):
             break
     if (res.tell() == 0):
         return None
     return Utils.toStringStringIO(res)
Example #14
0
 def __doCrLfCorrection(self, txt: str) -> str:
     """ Это анализ случаев принудительно отформатированного текста
     
     Args:
         txt(str): 
     """
     cou = 0
     total_len = 0
     i = 0
     first_pass3166 = True
     while True:
         if first_pass3166: first_pass3166 = False
         else: i += 1
         if (not (i < len(txt))): break
         ch = txt[i]
         if ((ord(ch)) != 0xD and (ord(ch)) != 0xA):
             continue
         len0_ = 0
         last_char = ch
         j = (i + 1)
         while j < len(txt):
             ch = txt[j]
             if ((ord(ch)) == 0xD or (ord(ch)) == 0xA):
                 break
             elif ((ord(ch)) == 0x9):
                 len0_ += 5
             else:
                 last_char = ch
                 len0_ += 1
             j += 1
         if (j >= len(txt)):
             break
         if (len0_ < 30):
             continue
         if (last_char != '.' and last_char != ':' and last_char != ';'):
             next_is_dig = False
             k = j + 1
             while k < len(txt):
                 if (not Utils.isWhitespace(txt[k])):
                     if (str.isdigit(txt[k])):
                         next_is_dig = True
                     break
                 k += 1
             if (not next_is_dig):
                 cou += 1
                 total_len += len0_
         i = j
     if (cou < 4):
         return txt
     total_len = math.floor(total_len / cou)
     if ((total_len < 50) or total_len > 100):
         return txt
     tmp = Utils.newStringIO(txt)
     i = 0
     while i < tmp.tell():
         ch = Utils.getCharAtStringIO(tmp, i)
         len0_ = 0
         last_char = ch
         j = (i + 1)
         while j < tmp.tell():
             ch = Utils.getCharAtStringIO(tmp, j)
             if ((ord(ch)) == 0xD or (ord(ch)) == 0xA):
                 break
             elif ((ord(ch)) == 0x9):
                 len0_ += 5
             else:
                 last_char = ch
                 len0_ += 1
             j += 1
         if (j >= tmp.tell()):
             break
         for jj in range(j - 1, -1, -1):
             last_char = Utils.getCharAtStringIO(tmp, jj)
             if (not Utils.isWhitespace(last_char)):
                 break
         else:
             jj = -1
         not_single = False
         jj = (j + 1)
         if ((jj < tmp.tell())
                 and (ord(Utils.getCharAtStringIO(tmp, j))) == 0xD
                 and (ord(Utils.getCharAtStringIO(tmp, jj))) == 0xA):
             jj += 1
         while jj < tmp.tell():
             ch = Utils.getCharAtStringIO(tmp, jj)
             if (not Utils.isWhitespace(ch)):
                 break
             if ((ord(ch)) == 0xD or (ord(ch)) == 0xA):
                 not_single = True
                 break
             jj += 1
         if (((not not_single and len0_ >
               (total_len - 20) and (len0_ < (total_len + 10)))
              and last_char != '.' and last_char != ':')
                 and last_char != ';'):
             Utils.setCharAtStringIO(tmp, j, ' ')
             self.crlf_corrected_count += 1
             if ((j + 1) < tmp.tell()):
                 ch = Utils.getCharAtStringIO(tmp, j + 1)
                 if ((ord(ch)) == 0xA):
                     Utils.setCharAtStringIO(tmp, j + 1, ' ')
                     j += 1
         i = (j - 1)
         i += 1
     return Utils.toStringStringIO(tmp)
Example #15
0
 def __toFullString(self, last_name_first : bool, lang : 'MorphLang') -> str:
     id0_ = None
     for a in self.slots: 
         if (a.type_name == PersonReferent.ATTR_IDENTITY): 
             s = str(a.value)
             if (id0_ is None or len(s) > len(id0_)): 
                 id0_ = s
     if (id0_ is not None): 
         return MiscHelper.convertFirstCharUpperAndOtherLower(id0_)
     sss = self.getStringValue("NAMETYPE")
     if (sss == "china"): 
         last_name_first = True
     n = self.getStringValue(PersonReferent.ATTR_LASTNAME)
     if (n is not None): 
         res = io.StringIO()
         if (last_name_first): 
             print("{0} ".format(n), end="", file=res, flush=True)
         s = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, n, False)
         if (s is not None): 
             print("{0}".format(s), end="", file=res, flush=True)
             if (PersonReferent.__isInitial(s)): 
                 print('.', end="", file=res)
             else: 
                 print(' ', end="", file=res)
             s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False)
             if (s is not None): 
                 print("{0}".format(s), end="", file=res, flush=True)
                 if (PersonReferent.__isInitial(s)): 
                     print('.', end="", file=res)
                 else: 
                     print(' ', end="", file=res)
         if (not last_name_first): 
             print(n, end="", file=res)
         elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): 
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (LanguageHelper.isCyrillicChar(n[0])): 
             nl = None
             for sl in self.slots: 
                 if (sl.type_name == PersonReferent.ATTR_LASTNAME): 
                     ss = Utils.asObjectOrNull(sl.value, str)
                     if (len(ss) > 0 and LanguageHelper.isLatinChar(ss[0])): 
                         nl = ss
                         break
             if (nl is not None): 
                 nal = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, nl, False)
                 if (nal is None): 
                     print(" ({0})".format(nl), end="", file=res, flush=True)
                 elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION): 
                     print(" ({0} {1})".format(nl, nal), end="", file=res, flush=True)
                 else: 
                     print(" ({0} {1})".format(nal, nl), end="", file=res, flush=True)
         return MiscHelper.convertFirstCharUpperAndOtherLower(Utils.toStringStringIO(res))
     else: 
         n = self.getStringValue(PersonReferent.ATTR_FIRSTNAME)
         if ((n) is not None): 
             s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False)
             if (s is not None): 
                 n = "{0} {1}".format(n, s)
             n = MiscHelper.convertFirstCharUpperAndOtherLower(n)
             nik = self.getStringValue(PersonReferent.ATTR_NICKNAME)
             tit = self.__findShortestKingTitul(False)
             if (tit is not None): 
                 n = "{0} {1}".format(tit, n)
             if (nik is not None): 
                 n = "{0} {1}".format(n, nik)
             return n
     return "?"
Example #16
0
 def toString(self,
              short_variant: bool,
              lang: 'MorphLang',
              lev: int = 0) -> str:
     res = io.StringIO()
     ki = self.kind
     str0_ = (Utils.asObjectOrNull(
         MetaInstrumentBlock.GLOBAL_META.kind_feature.
         convertInnerValueToOuterValue(Utils.enumToString(ki), lang), str))
     if (str0_ is not None):
         print(str0_, end="", file=res)
         if (self.kind2 != InstrumentKind.UNDEFINED):
             str0_ = (Utils.asObjectOrNull(
                 MetaInstrumentBlock.GLOBAL_META.kind_feature.
                 convertInnerValueToOuterValue(
                     Utils.enumToString(self.kind2), lang), str))
             if (str0_ is not None):
                 print(" ({0})".format(str0_), end="", file=res, flush=True)
     if (self.number > 0):
         if (ki == InstrumentKind.TABLE):
             print(" {0} строк, {1} столбцов".format(
                 len(self.children), self.number),
                   end="",
                   file=res,
                   flush=True)
         else:
             print(" №{0}".format(self.number),
                   end="",
                   file=res,
                   flush=True)
             if (self.sub_number > 0):
                 print(".{0}".format(self.sub_number),
                       end="",
                       file=res,
                       flush=True)
                 if (self.sub_number2 > 0):
                     print(".{0}".format(self.sub_number2),
                           end="",
                           file=res,
                           flush=True)
                     if (self.sub_number3 > 0):
                         print(".{0}".format(self.sub_number3),
                               end="",
                               file=res,
                               flush=True)
             if (self.min_number > 0):
                 for i in range(res.tell() - 1, -1, -1):
                     if (Utils.getCharAtStringIO(res, i) == ' '
                             or Utils.getCharAtStringIO(res, i) == '.'):
                         Utils.insertStringIO(
                             res, i + 1, "{0}-".format(self.min_number))
                         break
     ignore_ref = False
     if (self.is_expired):
         print(" (утратить силу)", end="", file=res)
         ignore_ref = True
     elif (ki != InstrumentKind.EDITIONS and ki != InstrumentKind.APPROVED
           and (isinstance(self.ref, DecreeReferent))):
         print(" (*)", end="", file=res)
         ignore_ref = True
     str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_NAME)
     if ((str0_) is None):
         str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_VALUE)
     if (str0_ is not None):
         if (len(str0_) > 100):
             str0_ = (str0_[0:0 + 100] + "...")
         print(" \"{0}\"".format(str0_), end="", file=res, flush=True)
     elif (not ignore_ref and (isinstance(self.ref, Referent))
           and (lev < 30)):
         print(" \"{0}\"".format(
             self.ref.toString(short_variant, lang, lev + 1)),
               end="",
               file=res,
               flush=True)
     return Utils.toStringStringIO(res).strip()
Example #17
0
 def _attachNumber(t: 'Token',
                   ignore_region: bool = False) -> 'TransItemToken':
     if (t is None):
         return None
     t0 = t
     t1 = t
     if (t.isValue("НА", None)):
         npt = NounPhraseHelper.tryParse(t.next0_, NounPhraseParseAttr.NO,
                                         0)
         if (npt is not None and npt.noun.isValue("ФОН", None)):
             t = npt.end_token.next0_
     res = None
     first_pass3135 = True
     while True:
         if first_pass3135: first_pass3135 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before):
             break
         if (t != t0 and t.whitespaces_before_count > 1):
             break
         if (t.getReferent() is not None):
             break
         if (t.is_hiphen):
             continue
         nt = Utils.asObjectOrNull(t, NumberToken)
         if (nt is not None):
             if (nt.typ != NumberSpellingType.DIGIT
                     or nt.morph.class0_.is_adjective):
                 break
             if (res is None):
                 res = io.StringIO()
             elif (str.isdigit(Utils.getCharAtStringIO(res,
                                                       res.tell() - 1))):
                 print(' ', end="", file=res)
             print(nt.getSourceText(), end="", file=res)
             t1 = t
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             break
         if (not tt.chars.is_letter):
             break
         if (not tt.chars.is_all_upper and tt.is_whitespace_before):
             break
         if (tt.length_char > 3):
             break
         if (res is None):
             res = io.StringIO()
         print(tt.term, end="", file=res)
         t1 = t
     if (res is None or (res.tell() < 5)):
         return None
     re = TransItemToken._new2526(t0, t1, TransItemToken.Typs.NUMBER,
                                  Utils.toStringStringIO(res))
     if (not ignore_region):
         k = 0
         i = res.tell() - 1
         while i > 4:
             if (not str.isdigit(Utils.getCharAtStringIO(res, i))):
                 if (Utils.getCharAtStringIO(res, i) == ' '
                         and ((k == 2 or k == 3))):
                     re.alt_value = re.value[i + 1:]
                     re.value = re.value[0:0 + i]
                 break
             i -= 1
             k += 1
     re.value = re.value.replace(" ", "")
     if (ignore_region):
         re.alt_value = MiscHelper.createCyrLatAlternative(re.value)
     return re
Example #18
0
 def toString(self,
              short_variant: bool,
              lang: 'MorphLang' = None,
              lev: int = 0) -> str:
     res = io.StringIO()
     str0_ = self.getStringValue(AddressReferent.ATTR_DETAIL)
     if (str0_ is not None):
         str0_ = (Utils.asObjectOrNull(
             MetaAddress._global_meta.detail_feature.
             convertInnerValueToOuterValue(str0_, lang), str))
     if (str0_ is not None):
         print("[{0}".format(str0_.lower()), end="", file=res, flush=True)
         str0_ = self.getStringValue(AddressReferent.ATTR_DETAILPARAM)
         if ((str0_) is not None):
             print(", {0}".format(str0_), end="", file=res, flush=True)
         print(']', end="", file=res)
     strs = self.streets
     if (len(strs) == 0):
         if (self.metro is not None):
             if (res.tell() > 0):
                 print(' ', end="", file=res)
             print(Utils.ifNotNull(self.metro, ""), end="", file=res)
     else:
         if (res.tell() > 0):
             print(' ', end="", file=res)
         i = 0
         while i < len(strs):
             if (i > 0):
                 print(", ", end="", file=res)
             print(strs[i].toString(True, lang, 0), end="", file=res)
             i += 1
     if (self.kilometer is not None):
         print(" {0}км.".format(self.kilometer),
               end="",
               file=res,
               flush=True)
     if (self.house is not None):
         ty = self.house_type
         if (ty == AddressHouseType.ESTATE):
             print(" влад.", end="", file=res)
         elif (ty == AddressHouseType.HOUSEESTATE):
             print(" домовл.", end="", file=res)
         else:
             print(" д.", end="", file=res)
         print(("Б/Н" if self.house == "0" else self.house),
               end="",
               file=res)
     if (self.corpus is not None):
         print(" корп.{0}".format(
             ("Б/Н" if self.corpus == "0" else self.corpus)),
               end="",
               file=res,
               flush=True)
     if (self.building is not None):
         ty = self.building_type
         if (ty == AddressBuildingType.CONSTRUCTION):
             print(" сооруж.", end="", file=res)
         elif (ty == AddressBuildingType.LITER):
             print(" лит.", end="", file=res)
         else:
             print(" стр.", end="", file=res)
         print(("Б/Н" if self.building == "0" else self.building),
               end="",
               file=res)
     if (self.potch is not None):
         print(" под.{0}".format(self.potch), end="", file=res, flush=True)
     if (self.floor0_ is not None):
         print(" эт.{0}".format(self.floor0_), end="", file=res, flush=True)
     if (self.flat is not None):
         print(" кв.{0}".format(self.flat), end="", file=res, flush=True)
     if (self.corpus_or_flat is not None):
         print(" корп.(кв.?){0}".format(self.corpus_or_flat),
               end="",
               file=res,
               flush=True)
     if (self.office is not None):
         print(" оф.{0}".format(self.office), end="", file=res, flush=True)
     if (self.block is not None):
         print(" блок {0}".format(self.block), end="", file=res, flush=True)
     if (self.plot is not None):
         print(" уч.{0}".format(self.plot), end="", file=res, flush=True)
     if (self.box is not None):
         print(" бокс {0}".format(self.box), end="", file=res, flush=True)
     if (self.post_office_box is not None):
         print(" а\\я{0}".format(self.post_office_box),
               end="",
               file=res,
               flush=True)
     if (self.csp is not None):
         print(" ГСП-{0}".format(self.csp), end="", file=res, flush=True)
     kladr = self.getSlotValue(AddressReferent.ATTR_FIAS)
     if (isinstance(kladr, Referent)):
         print(" (ФИАС: {0}".format(
             Utils.ifNotNull((kladr).getStringValue("GUID"), "?")),
               end="",
               file=res,
               flush=True)
         for s in self.slots:
             if (s.type_name == AddressReferent.ATTR_FIAS
                     and (isinstance(s.value, Referent))
                     and s.value != kladr):
                 print(", {0}".format(
                     Utils.ifNotNull((s.value).getStringValue("GUID"),
                                     "?")),
                       end="",
                       file=res,
                       flush=True)
         print(')', end="", file=res)
     bti = self.getStringValue(AddressReferent.ATTR_BTI)
     if (bti is not None):
         print(" (БТИ {0})".format(bti), end="", file=res, flush=True)
     for g in self.geos:
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) == ' '):
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) == ']'):
             pass
         elif (res.tell() > 0):
             print(';', end="", file=res)
         print(" {0}".format(g.toString(True, lang, lev + 1)),
               end="",
               file=res,
               flush=True)
     if (self.zip0_ is not None):
         print("; {0}".format(self.zip0_), end="", file=res, flush=True)
     return Utils.toStringStringIO(res).strip()
Example #19
0
 def _to_string(self, short_variant: bool, lang: 'MorphLang', lev: int,
                from_range: int) -> str:
     from pullenti.ner.date.internal.DateRelHelper import DateRelHelper
     res = io.StringIO()
     p = self.pointer
     if (lang is None):
         lang = MorphLang.RU
     if (self.is_relative):
         if (self.pointer == DatePointerType.TODAY):
             print("сейчас".format(), end="", file=res, flush=True)
             if (not short_variant):
                 DateRelHelper.append_to_string(self, res)
             return Utils.toStringStringIO(res)
         word = None
         val = 0
         back = False
         is_local_rel = self.get_string_value(
             DateReferent.ATTR_ISRELATIVE) == "true"
         for s in self.slots:
             if (s.type_name == DateReferent.ATTR_CENTURY):
                 word = "век"
                 wrapval784 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval784)
                 val = wrapval784.value
             elif (s.type_name == DateReferent.ATTR_YEAR):
                 word = "год"
                 wrapval785 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval785)
                 val = wrapval785.value
             elif (s.type_name == DateReferent.ATTR_MONTH):
                 word = "месяц"
                 wrapval786 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval786)
                 val = wrapval786.value
                 if (not is_local_rel and val >= 1 and val <= 12):
                     print(DateReferent.__m_month0[val - 1],
                           end="",
                           file=res)
             elif (s.type_name == DateReferent.ATTR_DAY):
                 word = "день"
                 wrapval787 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval787)
                 val = wrapval787.value
                 if ((not is_local_rel and self.month > 0
                      and self.month <= 12) and self.higher is not None
                         and self.higher.get_string_value(
                             DateReferent.ATTR_ISRELATIVE) != "true"):
                     print("{0} {1}".format(
                         val, DateReferent.__m_month[self.month - 1]),
                           end="",
                           file=res,
                           flush=True)
                 elif (not is_local_rel):
                     print("{0} число".format(val),
                           end="",
                           file=res,
                           flush=True)
             elif (s.type_name == DateReferent.ATTR_QUARTAL):
                 word = "квартал"
                 wrapval788 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval788)
                 val = wrapval788.value
             elif (s.type_name == DateReferent.ATTR_WEEK):
                 word = "неделя"
                 wrapval789 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval789)
                 val = wrapval789.value
             elif (s.type_name == DateReferent.ATTR_HOUR):
                 word = "час"
                 wrapval790 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval790)
                 val = wrapval790.value
                 if (not is_local_rel):
                     print("{0}:{1}".format("{:02d}".format(val),
                                            "{:02d}".format(self.minute)),
                           end="",
                           file=res,
                           flush=True)
             elif (s.type_name == DateReferent.ATTR_MINUTE):
                 word = "минута"
                 wrapval791 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval791)
                 val = wrapval791.value
             elif (s.type_name == DateReferent.ATTR_DAYOFWEEK):
                 wrapval792 = RefOutArgWrapper(0)
                 Utils.tryParseInt(Utils.asObjectOrNull(s.value, str),
                                   wrapval792)
                 val = wrapval792.value
                 if (not is_local_rel):
                     print((DateReferent.__m_week_day_ex[val - 1]
                            if val >= 1 and val <= 7 else "?"),
                           end="",
                           file=res)
                 else:
                     if (val < 0):
                         val = (-val)
                         back = True
                     if (val >= 0 and val <= 7):
                         print("{0} {1}".format(
                             ((("прошлое" if back else "будущее"))
                              if val == 7 else
                              ((("прошлая" if back else "будущая")) if
                               (val == 3 or val == 6) else
                               (("прошлый" if back else "будущий")))),
                             DateReferent.__m_week_day_ex[val - 1]),
                               end="",
                               file=res,
                               flush=True)
                         break
         if (word is not None and is_local_rel):
             if (val == 0):
                 print("{0} {1}".format(
                     ("текущая" if word == "неделя" or word == "минута" else
                      "текущий"), word),
                       end="",
                       file=res,
                       flush=True)
             elif (val > 0 and not back):
                 print("{0} {1} вперёд".format(
                     val,
                     MiscHelper.get_text_morph_var_by_case_and_number_ex(
                         word, None, MorphNumber.UNDEFINED, str(val))),
                       end="",
                       file=res,
                       flush=True)
             else:
                 val = (-val)
                 print("{0} {1} назад".format(
                     val,
                     MiscHelper.get_text_morph_var_by_case_and_number_ex(
                         word, None, MorphNumber.UNDEFINED, str(val))),
                       end="",
                       file=res,
                       flush=True)
         elif (not is_local_rel and res.tell() == 0):
             print("{0} {1}".format(
                 val,
                 MiscHelper.get_text_morph_var_by_case_and_number_ex(
                     word, None, MorphNumber.UNDEFINED, str(val))),
                   end="",
                   file=res,
                   flush=True)
         if (not short_variant):
             DateRelHelper.append_to_string(self, res)
         if (from_range == 1):
             Utils.insertStringIO(
                 res, 0, "{0} ".format(("з" if lang.is_ua else
                                        ("from" if lang.is_en else "с"))))
         elif (from_range == 2):
             Utils.insertStringIO(res, 0, ("to " if lang.is_en else "по "))
         return Utils.toStringStringIO(res)
     if (from_range == 1):
         print("{0} ".format(("з" if lang.is_ua else
                              ("from" if lang.is_en else "с"))),
               end="",
               file=res,
               flush=True)
     elif (from_range == 2):
         print(("to " if lang.is_en else "по ").format(),
               end="",
               file=res,
               flush=True)
     if (p != DatePointerType.NO):
         val = MetaDate.POINTER.convert_inner_value_to_outer_value(
             Utils.enumToString(p), lang)
         if (from_range == 0 or lang.is_en):
             pass
         elif (from_range == 1):
             if (p == DatePointerType.BEGIN):
                 val = ("початку" if lang.is_ua else "начала")
             elif (p == DatePointerType.CENTER):
                 val = ("середини" if lang.is_ua else "середины")
             elif (p == DatePointerType.END):
                 val = ("кінця" if lang.is_ua else "конца")
             elif (p == DatePointerType.TODAY):
                 val = ("цього часу"
                        if lang.is_ua else "настоящего времени")
         elif (from_range == 2):
             if (p == DatePointerType.BEGIN):
                 val = ("початок" if lang.is_ua else "начало")
             elif (p == DatePointerType.CENTER):
                 val = ("середину" if lang.is_ua else "середину")
             elif (p == DatePointerType.END):
                 val = ("кінець" if lang.is_ua else "конец")
             elif (p == DatePointerType.TODAY):
                 val = ("теперішній час"
                        if lang.is_ua else "настоящее время")
         print("{0} ".format(val), end="", file=res, flush=True)
     if (self.day_of_week > 0):
         if (lang.is_en):
             print("{0}, ".format(
                 DateReferent.__m_week_day_en[self.day_of_week - 1]),
                   end="",
                   file=res,
                   flush=True)
         else:
             print("{0}, ".format(
                 DateReferent.__m_week_day[self.day_of_week - 1]),
                   end="",
                   file=res,
                   flush=True)
     y = self.year
     m = self.month
     d = self.day
     cent = self.century
     if (y == 0 and cent != 0):
         is_bc = cent < 0
         if (cent < 0):
             cent = (-cent)
         print(NumberHelper.get_number_roman(cent), end="", file=res)
         if (lang.is_ua):
             print(" century", end="", file=res)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1):
             print((" віка" if lang.is_ua else " века"), end="", file=res)
         else:
             print((" вік" if lang.is_ua else " век"), end="", file=res)
         if (is_bc):
             print((" до н.е." if lang.is_ua else " до н.э."),
                   end="",
                   file=res)
         return Utils.toStringStringIO(res)
     if (d > 0):
         print(d, end="", file=res)
     if (m > 0 and m <= 12):
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) != ' '):
             print(' ', end="", file=res)
         if (lang.is_ua):
             print((DateReferent.__m_monthua[m - 1]
                    if d > 0 or p != DatePointerType.NO or from_range != 0
                    else DateReferent.__m_month0ua[m - 1]),
                   end="",
                   file=res)
         elif (lang.is_en):
             print(DateReferent.__m_monthen[m - 1], end="", file=res)
         else:
             print((DateReferent.__m_month[m - 1]
                    if d > 0 or p != DatePointerType.NO or from_range != 0
                    else DateReferent.__m_month0[m - 1]),
                   end="",
                   file=res)
     if (y != 0):
         is_bc = y < 0
         if (y < 0):
             y = (-y)
         if (res.tell() > 0
                 and Utils.getCharAtStringIO(res,
                                             res.tell() - 1) != ' '):
             print(' ', end="", file=res)
         if (lang is not None and lang.is_en):
             print("{0}".format(y), end="", file=res, flush=True)
         elif (short_variant):
             print("{0}{1}".format(y, ("р" if lang.is_ua else "г")),
                   end="",
                   file=res,
                   flush=True)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1):
             print("{0} {1}".format(y, ("року" if lang.is_ua else "года")),
                   end="",
                   file=res,
                   flush=True)
         else:
             print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")),
                   end="",
                   file=res,
                   flush=True)
         if (is_bc):
             print((" до н.е." if lang.is_ua else
                    ("BC" if lang.is_en else " до н.э.")),
                   end="",
                   file=res)
     h = self.hour
     mi = self.minute
     se = self.second
     if (h >= 0 and mi >= 0):
         if (res.tell() > 0):
             print(' ', end="", file=res)
         print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)),
               end="",
               file=res,
               flush=True)
         if (se >= 0):
             print(":{0}".format("{:02d}".format(se)),
                   end="",
                   file=res,
                   flush=True)
     if (res.tell() == 0):
         if (self.quartal != 0):
             print("{0}-й квартал".format(self.quartal),
                   end="",
                   file=res,
                   flush=True)
     if (res.tell() == 0):
         return "?"
     while Utils.getCharAtStringIO(
             res,
             res.tell() - 1) == ' ' or Utils.getCharAtStringIO(
                 res,
                 res.tell() - 1) == ',':
         Utils.setLengthStringIO(res, res.tell() - 1)
     if (not short_variant and self.is_relative):
         DateRelHelper.append_to_string(self, res)
     return Utils.toStringStringIO(res).strip()
 def try_attach(self, t : 'Token', for_ontology : bool=False) -> 'ReferentToken':
     if (t is None): 
         return None
     rt0 = self.__try_attach_spec(t)
     if (rt0 is not None): 
         return rt0
     if (t.chars.is_all_lower): 
         if (not t.is_whitespace_after and (isinstance(t.next0_, NumberToken))): 
             if (t.previous is None or t.is_whitespace_before or t.previous.is_char_of(",:")): 
                 pass
             else: 
                 return None
         else: 
             return None
     tmp = io.StringIO()
     t1 = t
     hiph = False
     ok = True
     nums = 0
     chars = 0
     w = t1.next0_
     first_pass3148 = True
     while True:
         if first_pass3148: first_pass3148 = False
         else: w = w.next0_
         if (not (w is not None)): break
         if (w.is_whitespace_before and not for_ontology): 
             break
         if (w.is_char_of("/\\_") or w.is_hiphen): 
             hiph = True
             print('-', end="", file=tmp)
             continue
         hiph = False
         nt = Utils.asObjectOrNull(w, NumberToken)
         if (nt is not None): 
             if (nt.typ != NumberSpellingType.DIGIT): 
                 break
             t1 = (nt)
             print(nt.get_source_text(), end="", file=tmp)
             nums += 1
             continue
         tt = Utils.asObjectOrNull(w, TextToken)
         if (tt is None): 
             break
         if (tt.length_char > 3): 
             ok = False
             break
         if (not str.isalpha(tt.term[0])): 
             if (tt.is_char_of(",:") or BracketHelper.can_be_end_of_sequence(tt, False, None, False)): 
                 break
             if (not tt.is_char_of("+*&^#@!")): 
                 ok = False
                 break
             chars += 1
         t1 = (tt)
         print(tt.get_source_text(), end="", file=tmp)
     if (not for_ontology): 
         if ((tmp.tell() < 1) or not ok or hiph): 
             return None
         if (tmp.tell() > 12): 
             return None
         last = Utils.getCharAtStringIO(tmp, tmp.tell() - 1)
         if (last == '!'): 
             return None
         if ((nums + chars) == 0): 
             return None
         if (not self.__check_attach(t, t1)): 
             return None
     new_dr = DenominationReferent()
     new_dr._add_value(t, t1)
     return ReferentToken(new_dr, t, t1)
Example #21
0
 def __doTransliteralCorrection(txt: io.StringIO, info: io.StringIO) -> int:
     """ Произвести транслитеральную коррекцию
     
     Args:
         txt(io.StringIO): корректируемый текст
         info(io.StringIO): информация о замене (может быть null)
     
     Returns:
         int: количество замен
     """
     stat = 0
     pref_rus_word = False
     i = 0
     while i < txt.tell():
         if (str.isalpha(Utils.getCharAtStringIO(txt, i))):
             rus = 0
             pure_lat = 0
             unknown = 0
             j = i
             while j < txt.tell():
                 ch = Utils.getCharAtStringIO(txt, j)
                 if (not str.isalpha(ch)):
                     break
                 code = ord(ch)
                 if (code >= 0x400 and (code < 0x500)):
                     rus += 1
                 elif (SourceOfAnalysis.__m_lat_chars.find(ch) >= 0):
                     unknown += 1
                 else:
                     pure_lat += 1
                 j += 1
             if (((unknown > 0 and rus > 0)) or
                 ((unknown > 0 and pure_lat == 0 and pref_rus_word))):
                 if (info is not None):
                     if (info.tell() > 0):
                         print("\r\n", end="", file=info)
                     k = i
                     while k < j:
                         print(Utils.getCharAtStringIO(txt, k),
                               end="",
                               file=info)
                         k += 1
                     print(": ", end="", file=info)
                 k = i
                 while k < j:
                     ii = SourceOfAnalysis.__m_lat_chars.find(
                         Utils.getCharAtStringIO(txt, k))
                     if (ii >= 0):
                         if (info is not None):
                             print("{0}->{1} ".format(
                                 Utils.getCharAtStringIO(txt, k),
                                 SourceOfAnalysis.__m_rus_chars[ii]),
                                   end="",
                                   file=info,
                                   flush=True)
                         Utils.setCharAtStringIO(
                             txt, k, SourceOfAnalysis.__m_rus_chars[ii])
                     k += 1
                 stat += unknown
                 pref_rus_word = True
             else:
                 pref_rus_word = rus > 0
             i = j
         i += 1
     return stat
Example #22
0
 def __attach_uri_content(
         t0: 'Token',
         chars_: str,
         can_be_whitespaces: bool = False) -> 'UriItemToken':
     txt = io.StringIO()
     t1 = t0
     dom = UriItemToken.attach_domain_name(t0, True, can_be_whitespaces)
     if (dom is not None):
         if (len(dom.value) < 3):
             return None
     open_char = chr(0)
     t = t0
     if (dom is not None):
         t = dom.end_token.next0_
     first_pass3411 = True
     while True:
         if first_pass3411: first_pass3411 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t != t0 and t.is_whitespace_before):
             if (t.is_newline_before or not can_be_whitespaces):
                 break
             if (dom is None):
                 break
             if (t.previous.is_hiphen):
                 pass
             elif (t.previous.is_char_of(",;")):
                 break
             elif (t.previous.is_char('.') and t.chars.is_letter
                   and t.length_char == 2):
                 pass
             else:
                 ok = False
                 tt1 = t
                 if (t.is_char_of("\\/")):
                     tt1 = t.next0_
                 tt0 = tt1
                 first_pass3412 = True
                 while True:
                     if first_pass3412: first_pass3412 = False
                     else: tt1 = tt1.next0_
                     if (not (tt1 is not None)): break
                     if (tt1 != tt0 and tt1.is_whitespace_before):
                         break
                     if (isinstance(tt1, NumberToken)):
                         continue
                     if (not (isinstance(tt1, TextToken))):
                         break
                     term1 = tt1.term
                     if (((term1 == "HTM" or term1 == "HTML" or term1
                           == "SHTML") or term1 == "ASP" or term1 == "ASPX")
                             or term1 == "JSP"):
                         ok = True
                         break
                     if (not tt1.chars.is_letter):
                         if (tt1.is_char_of("\\/")):
                             ok = True
                             break
                         if (not tt1.is_char_of(chars_)):
                             break
                     elif (not tt1.chars.is_latin_letter):
                         break
                 if (not ok):
                     break
         if (isinstance(t, NumberToken)):
             nt = Utils.asObjectOrNull(t, NumberToken)
             print(nt.get_source_text(), end="", file=txt)
             t1 = t
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None):
             rt = Utils.asObjectOrNull(t, ReferentToken)
             if (rt is not None and rt.begin_token.is_value("РФ", None)):
                 if (txt.tell() > 0 and Utils.getCharAtStringIO(
                         txt,
                         txt.tell() - 1) == '.'):
                     print(rt.begin_token.get_source_text(),
                           end="",
                           file=txt)
                     t1 = t
                     continue
             if (rt is not None and rt.chars.is_latin_letter
                     and rt.begin_token == rt.end_token):
                 print(rt.begin_token.get_source_text(), end="", file=txt)
                 t1 = t
                 continue
             break
         src = tt.get_source_text()
         ch = src[0]
         if (not str.isalpha(ch)):
             if (chars_.find(ch) < 0):
                 break
             if (ch == '(' or ch == '['):
                 open_char = ch
             elif (ch == ')'):
                 if (open_char != '('):
                     break
                 open_char = (chr(0))
             elif (ch == ']'):
                 if (open_char != '['):
                     break
                 open_char = (chr(0))
         print(src, end="", file=txt)
         t1 = t
     if (txt.tell() == 0):
         return dom
     i = 0
     i = 0
     while i < txt.tell():
         if (str.isalnum(Utils.getCharAtStringIO(txt, i))):
             break
         i += 1
     if (i >= txt.tell()):
         return dom
     if (Utils.getCharAtStringIO(txt,
                                 txt.tell() - 1) == '.'
             or Utils.getCharAtStringIO(txt,
                                        txt.tell() - 1) == '/'):
         Utils.setLengthStringIO(txt, txt.tell() - 1)
         t1 = t1.previous
     if (dom is not None):
         Utils.insertStringIO(txt, 0, dom.value)
     tmp = Utils.toStringStringIO(txt)
     if (tmp.startswith("\\\\")):
         Utils.replaceStringIO(txt, "\\\\", "//")
         tmp = Utils.toStringStringIO(txt)
     if (tmp.startswith("//")):
         tmp = tmp[2:]
     if (Utils.compareStrings(tmp, "WWW", True) == 0):
         return None
     res = UriItemToken._new2706(t0, t1, Utils.toStringStringIO(txt))
     return res
Example #23
0
 def _ToString(self, short_variant : bool, lang : 'MorphLang', lev : int, from_range : int) -> str:
     res = io.StringIO()
     p = self.pointer
     if (lang is None): 
         lang = MorphLang.RU
     if (from_range == 1): 
         print("{0} ".format(("з" if lang.is_ua else ("from" if lang.is_en else "с"))), end="", file=res, flush=True)
     elif (from_range == 2): 
         print(("to " if lang.is_en else "по ").format(), end="", file=res, flush=True)
     if (p != DatePointerType.NO): 
         val = MetaDate.POINTER.convertInnerValueToOuterValue(Utils.enumToString(p), lang)
         if (from_range == 0 or lang.is_en): 
             pass
         elif (from_range == 1): 
             if (p == DatePointerType.BEGIN): 
                 val = ("початку" if lang.is_ua else "начала")
             elif (p == DatePointerType.CENTER): 
                 val = ("середини" if lang.is_ua else "середины")
             elif (p == DatePointerType.END): 
                 val = ("кінця" if lang.is_ua else "конца")
             elif (p == DatePointerType.TODAY): 
                 val = ("цього часу" if lang.is_ua else "настоящего времени")
         elif (from_range == 2): 
             if (p == DatePointerType.BEGIN): 
                 val = ("початок" if lang.is_ua else "начало")
             elif (p == DatePointerType.CENTER): 
                 val = ("середину" if lang.is_ua else "середину")
             elif (p == DatePointerType.END): 
                 val = ("кінець" if lang.is_ua else "конец")
             elif (p == DatePointerType.TODAY): 
                 val = ("теперішній час" if lang.is_ua else "настоящее время")
         print("{0} ".format(val), end="", file=res, flush=True)
     if (self.day_of_week > 0): 
         if (lang.is_en): 
             print("{0}, ".format(DateReferent.__m_week_day_en[self.day_of_week - 1]), end="", file=res, flush=True)
         else: 
             print("{0}, ".format(DateReferent.__m_week_day[self.day_of_week - 1]), end="", file=res, flush=True)
     y = self.year
     m = self.month
     d = self.day
     cent = self.century
     if (y == 0 and cent != 0): 
         is_bc = cent < 0
         if (cent < 0): 
             cent = (- cent)
         print(NumberHelper.getNumberRoman(cent), end="", file=res)
         if (lang.is_ua): 
             print(" century", end="", file=res)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1): 
             print((" віка" if lang.is_ua else " века"), end="", file=res)
         else: 
             print((" вік" if lang.is_ua else " век"), end="", file=res)
         if (is_bc): 
             print((" до н.е." if lang.is_ua else " до н.э."), end="", file=res)
         return Utils.toStringStringIO(res)
     if (d > 0): 
         print(d, end="", file=res)
     if (m > 0 and m <= 12): 
         if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): 
             print(' ', end="", file=res)
         if (lang.is_ua): 
             print((DateReferent.__m_monthua[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0ua[m - 1]), end="", file=res)
         elif (lang.is_en): 
             print(DateReferent.__m_monthen[m - 1], end="", file=res)
         else: 
             print((DateReferent.__m_month[m - 1] if d > 0 or p != DatePointerType.NO or from_range != 0 else DateReferent.__m_month0[m - 1]), end="", file=res)
     if (y != 0): 
         is_bc = y < 0
         if (y < 0): 
             y = (- y)
         if (res.tell() > 0 and Utils.getCharAtStringIO(res, res.tell() - 1) != ' '): 
             print(' ', end="", file=res)
         if (lang is not None and lang.is_en): 
             print("{0}".format(y), end="", file=res, flush=True)
         elif (short_variant): 
             print("{0}{1}".format(y, ("р" if lang.is_ua else "г")), end="", file=res, flush=True)
         elif (m > 0 or p != DatePointerType.NO or from_range == 1): 
             print("{0} {1}".format(y, ("року" if lang.is_ua else "года")), end="", file=res, flush=True)
         else: 
             print("{0} {1}".format(y, ("рік" if lang.is_ua else "год")), end="", file=res, flush=True)
         if (is_bc): 
             print((" до н.е." if lang.is_ua else ("BC" if lang.is_en else " до н.э.")), end="", file=res)
     h = self.hour
     mi = self.minute
     se = self.second
     if (h >= 0 and mi >= 0): 
         if (res.tell() > 0): 
             print(' ', end="", file=res)
         print("{0}:{1}".format("{:02d}".format(h), "{:02d}".format(mi)), end="", file=res, flush=True)
         if (se >= 0): 
             print(":{0}".format("{:02d}".format(se)), end="", file=res, flush=True)
     if (res.tell() == 0): 
         return "?"
     while Utils.getCharAtStringIO(res, res.tell() - 1) == ' ' or Utils.getCharAtStringIO(res, res.tell() - 1) == ',':
         Utils.setLengthStringIO(res, res.tell() - 1)
     return Utils.toStringStringIO(res).strip()