def __find_for_surname(self,
                        attr_name: str,
                        surname: str,
                        find_shortest: bool = False) -> str:
     rus = LanguageHelper.is_cyrillic_char(surname[0])
     res = None
     for a in self.slots:
         if (a.type_name == attr_name):
             v = str(a.value)
             if (LanguageHelper.is_cyrillic_char(v[0]) != rus):
                 continue
             if (res is None):
                 res = v
             elif (find_shortest and (len(v) < len(res))):
                 res = v
     return res
 def to_string(self,
               short_variant: bool,
               lang: 'MorphLang' = None,
               lev: int = 0) -> str:
     res = io.StringIO()
     str0_ = None
     for s in self.slots:
         if (s.type_name == WeaponReferent.ATTR_TYPE):
             n = s.value
             if (str0_ is None or (len(n) < len(str0_))):
                 str0_ = n
     if (str0_ is not None):
         print(str0_.lower(), end="", file=res)
     str0_ = self.get_string_value(WeaponReferent.ATTR_BRAND)
     if ((str0_) is not None):
         print(" {0}".format(
             MiscHelper.convert_first_char_upper_and_other_lower(str0_)),
               end="",
               file=res,
               flush=True)
     str0_ = self.get_string_value(WeaponReferent.ATTR_MODEL)
     if ((str0_) is not None):
         print(" {0}".format(str0_), end="", file=res, flush=True)
     str0_ = self.get_string_value(WeaponReferent.ATTR_NAME)
     if ((str0_) is not None):
         print(" \"{0}\"".format(
             MiscHelper.convert_first_char_upper_and_other_lower(str0_)),
               end="",
               file=res,
               flush=True)
         for s in self.slots:
             if (s.type_name == WeaponReferent.ATTR_NAME and str0_ !=
                 (s.value)):
                 if (LanguageHelper.is_cyrillic_char(str0_[0]) !=
                         LanguageHelper.is_cyrillic_char(s.value[0])):
                     print(" ({0})".format(
                         MiscHelper.
                         convert_first_char_upper_and_other_lower(s.value)),
                           end="",
                           file=res,
                           flush=True)
                     break
     str0_ = self.get_string_value(WeaponReferent.ATTR_NUMBER)
     if ((str0_) is not None):
         print(", номер {0}".format(str0_), end="", file=res, flush=True)
     return Utils.toStringStringIO(res)
Example #3
0
 def correct_word_by_morph(self, word: str, lang: 'MorphLang') -> str:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (lang is not None):
             if (self.__m_engine_ru.language.is_ru and lang.is_ru):
                 return self.__m_engine_ru.correct_word_by_morph(word)
             if (self.__m_engine_ua.language.is_ua and lang.is_ua):
                 return self.__m_engine_ua.correct_word_by_morph(word)
             if (self.__m_engine_by.language.is_by and lang.is_by):
                 return self.__m_engine_by.correct_word_by_morph(word)
             if (self.__m_engine_kz.language.is_kz and lang.is_kz):
                 return self.__m_engine_kz.correct_word_by_morph(word)
         return self.__m_engine_ru.correct_word_by_morph(word)
     else:
         return self.__m_engine_en.correct_word_by_morph(word)
 def set_shortest_canonical_text(
         self, ignore_termins_with_notnull_tags: bool = False) -> None:
     self.__m_canonic_text = (None)
     for t in self.termins:
         if (ignore_termins_with_notnull_tags and t.tag is not None):
             continue
         if (len(t.terms) == 0):
             continue
         s = t.canonic_text
         if (not LanguageHelper.is_cyrillic_char(s[0])):
             continue
         if (self.__m_canonic_text is None):
             self.__m_canonic_text = s
         elif (len(s) < len(self.__m_canonic_text)):
             self.__m_canonic_text = s
 def get_all_wordforms(self, word: str,
                       lang: 'MorphLang') -> typing.List['MorphWordForm']:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (lang is not None):
             if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru):
                 return InnerMorphology.M_ENGINE_RU.get_all_wordforms(word)
             if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua):
                 return InnerMorphology.M_ENGINE_UA.get_all_wordforms(word)
             if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by):
                 return InnerMorphology.M_ENGINE_BY.get_all_wordforms(word)
             if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz):
                 return InnerMorphology.M_ENGINE_KZ.get_all_wordforms(word)
         return InnerMorphology.M_ENGINE_RU.get_all_wordforms(word)
     else:
         return InnerMorphology.M_ENGINE_EN.get_all_wordforms(word)
Example #6
0
 def get_all_wordforms(self, word: str,
                       lang: 'MorphLang') -> typing.List['MorphWordForm']:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (lang is not None):
             if (self.__m_engine_ru.language.is_ru and lang.is_ru):
                 return self.__m_engine_ru.get_all_wordforms(word)
             if (self.__m_engine_ua.language.is_ua and lang.is_ua):
                 return self.__m_engine_ua.get_all_wordforms(word)
             if (self.__m_engine_by.language.is_by and lang.is_by):
                 return self.__m_engine_by.get_all_wordforms(word)
             if (self.__m_engine_kz.language.is_kz and lang.is_kz):
                 return self.__m_engine_kz.get_all_wordforms(word)
         return self.__m_engine_ru.get_all_wordforms(word)
     else:
         return self.__m_engine_en.get_all_wordforms(word)
 def correct_word_by_morph(self, word: str, lang: 'MorphLang') -> str:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (lang is not None):
             if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru):
                 return InnerMorphology.M_ENGINE_RU.correct_word_by_morph(
                     word)
             if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua):
                 return InnerMorphology.M_ENGINE_UA.correct_word_by_morph(
                     word)
             if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by):
                 return InnerMorphology.M_ENGINE_BY.correct_word_by_morph(
                     word)
             if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz):
                 return InnerMorphology.M_ENGINE_KZ.correct_word_by_morph(
                     word)
         return InnerMorphology.M_ENGINE_RU.correct_word_by_morph(word)
     else:
         return InnerMorphology.M_ENGINE_EN.correct_word_by_morph(word)
 def __correct_model(self) -> None:
     tt = self.end_token.next0_
     if (tt is None or tt.whitespaces_before_count > 2): 
         return
     if (tt.is_value(":\\/.", None) or tt.is_hiphen): 
         tt = tt.next0_
     if (isinstance(tt, NumberToken)): 
         tmp = io.StringIO()
         print(tt.value, end="", file=tmp)
         is_lat = LanguageHelper.is_latin_char(self.value[0])
         self.end_token = tt
         tt = tt.next0_
         first_pass3427 = True
         while True:
             if first_pass3427: first_pass3427 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if ((isinstance(tt, TextToken)) and tt.length_char == 1 and tt.chars.is_letter): 
                 if (not tt.is_whitespace_before or ((tt.previous is not None and tt.previous.is_hiphen))): 
                     ch = tt.term[0]
                     self.end_token = tt
                     ch2 = chr(0)
                     if (LanguageHelper.is_latin_char(ch) and not is_lat): 
                         ch2 = LanguageHelper.get_cyr_for_lat(ch)
                         if (ch2 != (chr(0))): 
                             ch = ch2
                     elif (LanguageHelper.is_cyrillic_char(ch) and is_lat): 
                         ch2 = LanguageHelper.get_lat_for_cyr(ch)
                         if (ch2 != (chr(0))): 
                             ch = ch2
                     print(ch, end="", file=tmp)
                     continue
             break
         self.value = "{0}-{1}".format(self.value, Utils.toStringStringIO(tmp))
         self.alt_value = MiscHelper.create_cyr_lat_alternative(self.value)
     if (not self.end_token.is_whitespace_after and self.end_token.next0_ is not None and ((self.end_token.next0_.is_hiphen or self.end_token.next0_.is_char_of("\\/")))): 
         if (not self.end_token.next0_.is_whitespace_after and (isinstance(self.end_token.next0_.next0_, NumberToken))): 
             self.end_token = self.end_token.next0_.next0_
             self.value = "{0}-{1}".format(self.value, self.end_token.value)
             if (self.alt_value is not None): 
                 self.alt_value = "{0}-{1}".format(self.alt_value, self.end_token.value)
 def get_wordform(self, word: str, cla: 'MorphClass', gender: 'MorphGender',
                  cas: 'MorphCase', num: 'MorphNumber', lang: 'MorphLang',
                  add_info: 'MorphWordForm') -> str:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru):
             return InnerMorphology.M_ENGINE_RU.get_wordform(
                 word, cla, gender, cas, num, add_info)
         if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua):
             return InnerMorphology.M_ENGINE_UA.get_wordform(
                 word, cla, gender, cas, num, add_info)
         if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by):
             return InnerMorphology.M_ENGINE_BY.get_wordform(
                 word, cla, gender, cas, num, add_info)
         if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz):
             return InnerMorphology.M_ENGINE_KZ.get_wordform(
                 word, cla, gender, cas, num, add_info)
         return InnerMorphology.M_ENGINE_RU.get_wordform(
             word, cla, gender, cas, num, add_info)
     else:
         return InnerMorphology.M_ENGINE_EN.get_wordform(
             word, cla, gender, cas, num, add_info)
Example #10
0
 def get_wordform(self, word: str, cla: 'MorphClass', gender: 'MorphGender',
                  cas: 'MorphCase', num: 'MorphNumber', lang: 'MorphLang',
                  add_info: 'MorphWordForm') -> str:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (self.__m_engine_ru.language.is_ru and lang.is_ru):
             return self.__m_engine_ru.get_wordform(word, cla, gender, cas,
                                                    num, add_info)
         if (self.__m_engine_ua.language.is_ua and lang.is_ua):
             return self.__m_engine_ua.get_wordform(word, cla, gender, cas,
                                                    num, add_info)
         if (self.__m_engine_by.language.is_by and lang.is_by):
             return self.__m_engine_by.get_wordform(word, cla, gender, cas,
                                                    num, add_info)
         if (self.__m_engine_kz.language.is_kz and lang.is_kz):
             return self.__m_engine_kz.get_wordform(word, cla, gender, cas,
                                                    num, add_info)
         return self.__m_engine_ru.get_wordform(word, cla, gender, cas, num,
                                                add_info)
     else:
         return self.__m_engine_en.get_wordform(word, cla, gender, cas, num,
                                                add_info)
Example #11
0
 def process(self, word: str) -> typing.List['MorphWordForm']:
     """ Обработка одного слова
     
     Args:
         word(str): слово должно быть в верхнем регистре
     
     """
     if (Utils.isNullOrEmpty(word)):
         return None
     res = None
     if (len(word) > 1):
         i = 0
         while i < len(word):
             ch = word[i]
             if (LanguageHelper.is_cyrillic_vowel(ch)
                     or LanguageHelper.is_latin_vowel(ch)):
                 break
             i += 1
         if (i >= len(word)):
             return res
     mvs = []
     tn = self.m_root
     i = 0
     while i <= len(word):
         if (tn.lazy_pos > 0):
             self.__load_tree_node(tn)
         if (tn.rules is not None):
             word_begin = None
             word_end = None
             if (i == 0):
                 word_end = word
             elif (i < len(word)):
                 word_end = word[i:]
             else:
                 word_end = ""
             if (res is None):
                 res = list()
             for r in tn.rules:
                 wrapmvs20 = RefOutArgWrapper(None)
                 inoutres21 = Utils.tryGetValue(r.variants, word_end,
                                                wrapmvs20)
                 mvs = wrapmvs20.value
                 if (inoutres21):
                     if (word_begin is None):
                         if (i == len(word)):
                             word_begin = word
                         elif (i > 0):
                             word_begin = word[0:0 + i]
                         else:
                             word_begin = ""
                     r.process_result(res, word_begin, mvs)
         if (tn.nodes is None or i >= len(word)):
             break
         ch = ord(word[i])
         wraptn22 = RefOutArgWrapper(None)
         inoutres23 = Utils.tryGetValue(tn.nodes, ch, wraptn22)
         tn = wraptn22.value
         if (not inoutres23):
             break
         i += 1
     need_test_unknown_vars = True
     if (res is not None):
         for r in res:
             if ((r.class0_.is_pronoun or r.class0_.is_noun
                  or r.class0_.is_adjective)
                     or (r.class0_.is_misc and r.class0_.is_conjunction)
                     or r.class0_.is_preposition):
                 need_test_unknown_vars = False
             elif (r.class0_.is_adverb and r.normal_case is not None):
                 if (not LanguageHelper.ends_with_ex(
                         r.normal_case, "О", "А", None, None)):
                     need_test_unknown_vars = False
                 elif (r.normal_case == "МНОГО"):
                     need_test_unknown_vars = False
             elif (r.class0_.is_verb and len(res) > 1):
                 ok = False
                 for rr in res:
                     if (rr != r and rr.class0_ != r.class0_):
                         ok = True
                         break
                 if (ok and not LanguageHelper.ends_with(word, "ИМ")):
                     need_test_unknown_vars = False
     if (need_test_unknown_vars
             and LanguageHelper.is_cyrillic_char(word[0])):
         gl = 0
         sog = 0
         j = 0
         while j < len(word):
             if (LanguageHelper.is_cyrillic_vowel(word[j])):
                 gl += 1
             else:
                 sog += 1
             j += 1
         if ((gl < 2) or (sog < 2)):
             need_test_unknown_vars = False
     if (need_test_unknown_vars and res is not None and len(res) == 1):
         if (res[0].class0_.is_verb):
             if ("н.вр." in res[0].misc.attrs
                     and "нес.в." in res[0].misc.attrs
                     and not "страд.з." in res[0].misc.attrs):
                 need_test_unknown_vars = False
             elif ("б.вр." in res[0].misc.attrs
                   and "сов.в." in res[0].misc.attrs):
                 need_test_unknown_vars = False
             elif (res[0].normal_case is not None
                   and LanguageHelper.ends_with(res[0].normal_case, "СЯ")):
                 need_test_unknown_vars = False
         if (res[0].class0_.is_undefined
                 and "прдктв." in res[0].misc.attrs):
             need_test_unknown_vars = False
     if (need_test_unknown_vars):
         if (self.m_root_reverce is None):
             return res
         tn = self.m_root_reverce
         tn0 = None
         for i in range(len(word) - 1, -1, -1):
             if (tn.lazy_pos > 0):
                 self.__load_tree_node(tn)
             ch = ord(word[i])
             if (tn.nodes is None):
                 break
             wrapnext24 = RefOutArgWrapper(None)
             inoutres25 = Utils.tryGetValue(tn.nodes, ch, wrapnext24)
             next0_ = wrapnext24.value
             if (not inoutres25):
                 break
             tn = next0_
             if (tn.lazy_pos > 0):
                 self.__load_tree_node(tn)
             if (tn.reverce_variants is not None):
                 tn0 = tn
                 break
         else:
             i = -1
         if (tn0 is not None):
             glas = i < 4
             while i >= 0:
                 if (LanguageHelper.is_cyrillic_vowel(word[i])
                         or LanguageHelper.is_latin_vowel(word[i])):
                     glas = True
                     break
                 i -= 1
             if (glas):
                 for mv in tn0.reverce_variants:
                     if (((not mv.class0_.is_verb
                           and not mv.class0_.is_adjective
                           and not mv.class0_.is_noun)
                          and not mv.class0_.is_proper_surname
                          and not mv.class0_.is_proper_geo)
                             and not mv.class0_.is_proper_secname):
                         continue
                     ok = False
                     for rr in res:
                         if (rr.is_in_dictionary):
                             if (rr.class0_ == mv.class0_
                                     or rr.class0_.is_noun):
                                 ok = True
                                 break
                             if (not mv.class0_.is_adjective
                                     and rr.class0_.is_verb):
                                 ok = True
                                 break
                     if (ok):
                         continue
                     if (len(mv.tail) > 0 and
                             not LanguageHelper.ends_with(word, mv.tail)):
                         continue
                     r = MorphWordForm(mv, word)
                     if (not MorphWordForm._has_morph_equals(res, r)):
                         r.undef_coef = mv.coef
                         if (res is None):
                             res = list()
                         res.append(r)
     if (word == "ПРИ" and res is not None):
         for i in range(len(res) - 1, -1, -1):
             if (res[i].class0_.is_proper_geo):
                 del res[i]
         else:
             i = -1
     if (res is None or len(res) == 0):
         return None
     MorphEngine.__sort(res, word)
     for v in res:
         if (v.normal_case is None):
             v.normal_case = word
         if (v.class0_.is_verb):
             if (v.normal_full is None
                     and LanguageHelper.ends_with(v.normal_case, "ТЬСЯ")):
                 v.normal_full = v.normal_case[0:0 + len(v.normal_case) - 2]
         v.language = self.language
         if (v.class0_.is_preposition):
             v.normal_case = LanguageHelper.normalize_preposition(
                 v.normal_case)
     mc = MorphClass()
     for i in range(len(res) - 1, -1, -1):
         if (not res[i].is_in_dictionary and res[i].class0_.is_adjective
                 and len(res) > 1):
             if ("к.ф." in res[i].misc.attrs
                     or "неизм." in res[i].misc.attrs):
                 del res[i]
                 continue
         if (res[i].is_in_dictionary):
             mc.value |= res[i].class0_.value
     else:
         i = -1
     if (mc == MorphClass.VERB and len(res) > 1):
         for r in res:
             if (r.undef_coef > (100)
                     and r.class0_ == MorphClass.ADJECTIVE):
                 r.undef_coef = (0)
     if (len(res) == 0):
         return None
     return res
Example #12
0
 def to_string(self, short_variant : bool, lang : 'MorphLang', lev : int=0) -> str:
     res = io.StringIO()
     str0_ = None
     for s in self.slots: 
         if (s.type_name == TransportReferent.ATTR_TYPE): 
             n = s.value
             if (str0_ is None or (len(n) < len(str0_))): 
                 str0_ = n
     if (str0_ is not None): 
         print(str0_, end="", file=res)
     elif (self.kind == TransportKind.AUTO): 
         print("автомобиль", end="", file=res)
     elif (self.kind == TransportKind.FLY): 
         print("самолет", end="", file=res)
     elif (self.kind == TransportKind.SHIP): 
         print("судно", end="", file=res)
     elif (self.kind == TransportKind.SPACE): 
         print("космический корабль", end="", file=res)
     else: 
         print(Utils.enumToString(self.kind), end="", file=res)
     str0_ = self.get_string_value(TransportReferent.ATTR_BRAND)
     if ((str0_) is not None): 
         print(" {0}".format(MiscHelper.convert_first_char_upper_and_other_lower(str0_)), end="", file=res, flush=True)
     str0_ = self.get_string_value(TransportReferent.ATTR_MODEL)
     if ((str0_) is not None): 
         print(" {0}".format(MiscHelper.convert_first_char_upper_and_other_lower(str0_)), end="", file=res, flush=True)
     str0_ = self.get_string_value(TransportReferent.ATTR_NAME)
     if ((str0_) is not None): 
         print(" \"{0}\"".format(MiscHelper.convert_first_char_upper_and_other_lower(str0_)), end="", file=res, flush=True)
         for s in self.slots: 
             if (s.type_name == TransportReferent.ATTR_NAME and str0_ != (s.value)): 
                 if (LanguageHelper.is_cyrillic_char(str0_[0]) != LanguageHelper.is_cyrillic_char(s.value[0])): 
                     print(" ({0})".format(MiscHelper.convert_first_char_upper_and_other_lower(s.value)), end="", file=res, flush=True)
                     break
     str0_ = self.get_string_value(TransportReferent.ATTR_CLASS)
     if ((str0_) is not None): 
         print(" класса \"{0}\"".format(MiscHelper.convert_first_char_upper_and_other_lower(str0_)), end="", file=res, flush=True)
     str0_ = self.get_string_value(TransportReferent.ATTR_NUMBER)
     if ((str0_) is not None): 
         print(", номер {0}".format(str0_), end="", file=res, flush=True)
         str0_ = self.get_string_value(TransportReferent.ATTR_NUMBER_REGION)
         if ((str0_) is not None): 
             print(str0_, end="", file=res)
     if (self.find_slot(TransportReferent.ATTR_ROUTEPOINT, None, True) is not None): 
         print(" (".format(), end="", file=res, flush=True)
         fi = True
         for s in self.slots: 
             if (s.type_name == TransportReferent.ATTR_ROUTEPOINT): 
                 if (fi): 
                     fi = False
                 else: 
                     print(" - ", end="", file=res)
                 if (isinstance(s.value, Referent)): 
                     print(s.value.to_string(True, lang, 0), end="", file=res)
                 else: 
                     print(s.value, end="", file=res)
         print(")", end="", file=res)
     if (not short_variant): 
         str0_ = self.get_string_value(TransportReferent.ATTR_GEO)
         if ((str0_) is not None): 
             print("; {0}".format(str0_), end="", file=res, flush=True)
         str0_ = self.get_string_value(TransportReferent.ATTR_ORG)
         if ((str0_) is not None): 
             print("; {0}".format(str0_), end="", file=res, flush=True)
     return Utils.toStringStringIO(res)
Example #13
0
 def get_variants(rus_or_lat: str) -> typing.List[str]:
     res = list()
     if (Utils.isNullOrEmpty(rus_or_lat)):
         return res
     rus_or_lat = rus_or_lat.upper()
     is_rus = LanguageHelper.is_cyrillic_char(rus_or_lat[0])
     stack = list()
     i = 0
     i = 0
     while i < len(rus_or_lat):
         li = list()
         maxlen = 0
         for a in RusLatAccord.__get_accords():
             pref = None
             if (is_rus and len(a.rus) > 0):
                 pref = a.rus
             elif (not is_rus and len(a.lat) > 0):
                 pref = a.lat
             else:
                 continue
             if (len(pref) < maxlen):
                 continue
             if (not RusLatAccord.__is_pref(rus_or_lat, i, pref)):
                 continue
             if (a.on_tail):
                 if ((len(pref) + i) < len(rus_or_lat)):
                     continue
             if (len(pref) > maxlen):
                 maxlen = len(pref)
                 li.clear()
             li.append(a)
         if (len(li) == 0 or maxlen == 0):
             return res
         stack.append(li)
         i += (maxlen - 1)
         i += 1
     if (len(stack) == 0):
         return res
     ind = list()
     i = 0
     while i < len(stack):
         ind.append(0)
         i += 1
     tmp = io.StringIO()
     while True:
         Utils.setLengthStringIO(tmp, 0)
         i = 0
         while i < len(ind):
             a = stack[i][ind[i]]
             print((a.lat if is_rus else a.rus), end="", file=tmp)
             i += 1
         ok = True
         if (not is_rus):
             i = 0
             while i < tmp.tell():
                 if (Utils.getCharAtStringIO(tmp, i) == 'Й'):
                     if (i == 0):
                         ok = False
                         break
                     if (not LanguageHelper.is_cyrillic_vowel(
                             Utils.getCharAtStringIO(tmp, i - 1))):
                         ok = False
                         break
                 i += 1
         if (ok):
             res.append(Utils.toStringStringIO(tmp))
         for i in range(len(ind) - 1, -1, -1):
             ind[i] += 1
             if (ind[i] < len(stack[i])):
                 break
             else:
                 ind[i] = 0
         else:
             i = -1
         if (i < 0):
             break
     return res
Example #14
0
 def parse(t : 'Token', max_char : int, prev : 'LineToken') -> 'LineToken':
     from pullenti.morph.LanguageHelper import LanguageHelper
     from pullenti.ner.NumberToken import NumberToken
     from pullenti.ner.TextToken import TextToken
     from pullenti.ner.core.BracketHelper import BracketHelper
     from pullenti.ner.core.BracketParseAttr import BracketParseAttr
     from pullenti.ner.decree.DecreeReferent import DecreeReferent
     if (t is None or t.end_char > max_char): 
         return None
     res = ListHelper.LineToken(t, t)
     first_pass3272 = True
     while True:
         if first_pass3272: first_pass3272 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= max_char)): break
         if (t.is_char(':')): 
             if (res.is_newline_before and res.begin_token.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                 res.is_list_head = True
             res.end_token = t
             break
         if (t.is_char(';')): 
             if (not t.is_whitespace_after): 
                 pass
             if (t.previous is not None and (isinstance(t.previous.get_referent(), DecreeReferent))): 
                 if (not t.is_whitespace_after): 
                     continue
                 if (t.next0_ is not None and (isinstance(t.next0_.get_referent(), DecreeReferent))): 
                     continue
             res.is_list_item = True
             res.end_token = t
             break
         if (t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t = br.end_token
                 res.end_token = t
                 continue
         if (t.is_newline_before and t != res.begin_token): 
             next0__ = True
             if (t.previous.is_comma or t.previous.is_and or t.is_char_of("(")): 
                 next0__ = False
             elif (t.chars.is_letter or (isinstance(t, NumberToken))): 
                 if (t.chars.is_all_lower): 
                     next0__ = False
                 elif (t.previous.chars.is_letter): 
                     next0__ = False
             if (next0__): 
                 break
         res.end_token = t
     if (res.begin_token.is_hiphen): 
         res.is_list_item = (res.begin_token.next0_ is not None and not res.begin_token.next0_.is_hiphen)
     elif (res.begin_token.is_char_of("·")): 
         res.is_list_item = True
         res.begin_token = res.begin_token.next0_
     elif (res.begin_token.next0_ is not None and ((res.begin_token.next0_.is_char(')') or ((prev is not None and ((prev.is_list_item or prev.is_list_head))))))): 
         if (res.begin_token.length_char == 1 or (isinstance(res.begin_token, NumberToken))): 
             res.is_list_item = True
             if ((isinstance(res.begin_token, NumberToken)) and res.begin_token.int_value is not None): 
                 res.number = res.begin_token.int_value
             elif ((isinstance(res.begin_token, TextToken)) and res.begin_token.length_char == 1): 
                 te = res.begin_token.term
                 if (LanguageHelper.is_cyrillic_char(te[0])): 
                     res.number = ((ord(te[0])) - (ord('А')))
                 elif (LanguageHelper.is_latin_char(te[0])): 
                     res.number = ((ord(te[0])) - (ord('A')))
     return res
Example #15
0
 def __to_full_string(self, last_name_first: bool,
                      lang: 'MorphLang') -> str:
     id0_ = None
     for a in self.slots:
         if (a.type_name == PersonReferent.ATTR_IDENTITY):
             s = str(a.value)
             if (id0_ is None or len(s) > len(id0_)):
                 id0_ = s
     if (id0_ is not None):
         return MiscHelper.convert_first_char_upper_and_other_lower(id0_)
     sss = self.get_string_value("NAMETYPE")
     if (sss == "china"):
         last_name_first = True
     n = self.get_string_value(PersonReferent.ATTR_LASTNAME)
     if (n is not None):
         res = io.StringIO()
         if (last_name_first):
             print("{0} ".format(n), end="", file=res, flush=True)
         s = self.__find_for_surname(PersonReferent.ATTR_FIRSTNAME, n,
                                     False)
         if (s is not None):
             print("{0}".format(s), end="", file=res, flush=True)
             if (PersonReferent.__is_initial(s)):
                 print('.', end="", file=res)
             else:
                 print(' ', end="", file=res)
             s = self.__find_for_surname(PersonReferent.ATTR_MIDDLENAME, n,
                                         False)
             if (s is not None):
                 print("{0}".format(s), end="", file=res, flush=True)
                 if (PersonReferent.__is_initial(s)):
                     print('.', end="", file=res)
                 else:
                     print(' ', end="", file=res)
         if (not last_name_first):
             print(n, end="", file=res)
         elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '):
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (LanguageHelper.is_cyrillic_char(n[0])):
             nl = None
             for sl in self.slots:
                 if (sl.type_name == PersonReferent.ATTR_LASTNAME):
                     ss = Utils.asObjectOrNull(sl.value, str)
                     if (len(ss) > 0
                             and LanguageHelper.is_latin_char(ss[0])):
                         nl = ss
                         break
             if (nl is not None):
                 nal = self.__find_for_surname(
                     PersonReferent.ATTR_FIRSTNAME, nl, False)
                 if (nal is None):
                     print(" ({0})".format(nl),
                           end="",
                           file=res,
                           flush=True)
                 elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION):
                     print(" ({0} {1})".format(nl, nal),
                           end="",
                           file=res,
                           flush=True)
                 else:
                     print(" ({0} {1})".format(nal, nl),
                           end="",
                           file=res,
                           flush=True)
         return MiscHelper.convert_first_char_upper_and_other_lower(
             Utils.toStringStringIO(res))
     else:
         n = self.get_string_value(PersonReferent.ATTR_FIRSTNAME)
         if ((n) is not None):
             s = self.__find_for_surname(PersonReferent.ATTR_MIDDLENAME, n,
                                         False)
             if (s is not None):
                 n = "{0} {1}".format(n, s)
             n = MiscHelper.convert_first_char_upper_and_other_lower(n)
             nik = self.get_string_value(PersonReferent.ATTR_NICKNAME)
             tit = self.__find_shortest_king_titul(False)
             if (tit is not None):
                 n = "{0} {1}".format(tit, n)
             if (nik is not None):
                 n = "{0} {1}".format(n, nik)
             return n
     return "?"