Example #1
0
 def process(self, word: str) -> typing.List['MorphWordForm']:
     """ Обработка одного слова
     
     Args:
         word(str): слово должно быть в верхнем регистре
     
     """
     if (Utils.isNullOrEmpty(word)):
         return None
     res = None
     if (len(word) > 1):
         i = 0
         while i < len(word):
             ch = word[i]
             if (LanguageHelper.is_cyrillic_vowel(ch)
                     or LanguageHelper.is_latin_vowel(ch)):
                 break
             i += 1
         if (i >= len(word)):
             return res
     mvs = []
     tn = self.m_root
     i = 0
     while i <= len(word):
         if (tn.lazy_pos > 0):
             self.__load_tree_node(tn)
         if (tn.rules is not None):
             word_begin = None
             word_end = None
             if (i == 0):
                 word_end = word
             elif (i < len(word)):
                 word_end = word[i:]
             else:
                 word_end = ""
             if (res is None):
                 res = list()
             for r in tn.rules:
                 wrapmvs20 = RefOutArgWrapper(None)
                 inoutres21 = Utils.tryGetValue(r.variants, word_end,
                                                wrapmvs20)
                 mvs = wrapmvs20.value
                 if (inoutres21):
                     if (word_begin is None):
                         if (i == len(word)):
                             word_begin = word
                         elif (i > 0):
                             word_begin = word[0:0 + i]
                         else:
                             word_begin = ""
                     r.process_result(res, word_begin, mvs)
         if (tn.nodes is None or i >= len(word)):
             break
         ch = ord(word[i])
         wraptn22 = RefOutArgWrapper(None)
         inoutres23 = Utils.tryGetValue(tn.nodes, ch, wraptn22)
         tn = wraptn22.value
         if (not inoutres23):
             break
         i += 1
     need_test_unknown_vars = True
     if (res is not None):
         for r in res:
             if ((r.class0_.is_pronoun or r.class0_.is_noun
                  or r.class0_.is_adjective)
                     or (r.class0_.is_misc and r.class0_.is_conjunction)
                     or r.class0_.is_preposition):
                 need_test_unknown_vars = False
             elif (r.class0_.is_adverb and r.normal_case is not None):
                 if (not LanguageHelper.ends_with_ex(
                         r.normal_case, "О", "А", None, None)):
                     need_test_unknown_vars = False
                 elif (r.normal_case == "МНОГО"):
                     need_test_unknown_vars = False
             elif (r.class0_.is_verb and len(res) > 1):
                 ok = False
                 for rr in res:
                     if (rr != r and rr.class0_ != r.class0_):
                         ok = True
                         break
                 if (ok and not LanguageHelper.ends_with(word, "ИМ")):
                     need_test_unknown_vars = False
     if (need_test_unknown_vars
             and LanguageHelper.is_cyrillic_char(word[0])):
         gl = 0
         sog = 0
         j = 0
         while j < len(word):
             if (LanguageHelper.is_cyrillic_vowel(word[j])):
                 gl += 1
             else:
                 sog += 1
             j += 1
         if ((gl < 2) or (sog < 2)):
             need_test_unknown_vars = False
     if (need_test_unknown_vars and res is not None and len(res) == 1):
         if (res[0].class0_.is_verb):
             if ("н.вр." in res[0].misc.attrs
                     and "нес.в." in res[0].misc.attrs
                     and not "страд.з." in res[0].misc.attrs):
                 need_test_unknown_vars = False
             elif ("б.вр." in res[0].misc.attrs
                   and "сов.в." in res[0].misc.attrs):
                 need_test_unknown_vars = False
             elif (res[0].normal_case is not None
                   and LanguageHelper.ends_with(res[0].normal_case, "СЯ")):
                 need_test_unknown_vars = False
         if (res[0].class0_.is_undefined
                 and "прдктв." in res[0].misc.attrs):
             need_test_unknown_vars = False
     if (need_test_unknown_vars):
         if (self.m_root_reverce is None):
             return res
         tn = self.m_root_reverce
         tn0 = None
         for i in range(len(word) - 1, -1, -1):
             if (tn.lazy_pos > 0):
                 self.__load_tree_node(tn)
             ch = ord(word[i])
             if (tn.nodes is None):
                 break
             wrapnext24 = RefOutArgWrapper(None)
             inoutres25 = Utils.tryGetValue(tn.nodes, ch, wrapnext24)
             next0_ = wrapnext24.value
             if (not inoutres25):
                 break
             tn = next0_
             if (tn.lazy_pos > 0):
                 self.__load_tree_node(tn)
             if (tn.reverce_variants is not None):
                 tn0 = tn
                 break
         else:
             i = -1
         if (tn0 is not None):
             glas = i < 4
             while i >= 0:
                 if (LanguageHelper.is_cyrillic_vowel(word[i])
                         or LanguageHelper.is_latin_vowel(word[i])):
                     glas = True
                     break
                 i -= 1
             if (glas):
                 for mv in tn0.reverce_variants:
                     if (((not mv.class0_.is_verb
                           and not mv.class0_.is_adjective
                           and not mv.class0_.is_noun)
                          and not mv.class0_.is_proper_surname
                          and not mv.class0_.is_proper_geo)
                             and not mv.class0_.is_proper_secname):
                         continue
                     ok = False
                     for rr in res:
                         if (rr.is_in_dictionary):
                             if (rr.class0_ == mv.class0_
                                     or rr.class0_.is_noun):
                                 ok = True
                                 break
                             if (not mv.class0_.is_adjective
                                     and rr.class0_.is_verb):
                                 ok = True
                                 break
                     if (ok):
                         continue
                     if (len(mv.tail) > 0 and
                             not LanguageHelper.ends_with(word, mv.tail)):
                         continue
                     r = MorphWordForm(mv, word)
                     if (not MorphWordForm._has_morph_equals(res, r)):
                         r.undef_coef = mv.coef
                         if (res is None):
                             res = list()
                         res.append(r)
     if (word == "ПРИ" and res is not None):
         for i in range(len(res) - 1, -1, -1):
             if (res[i].class0_.is_proper_geo):
                 del res[i]
         else:
             i = -1
     if (res is None or len(res) == 0):
         return None
     MorphEngine.__sort(res, word)
     for v in res:
         if (v.normal_case is None):
             v.normal_case = word
         if (v.class0_.is_verb):
             if (v.normal_full is None
                     and LanguageHelper.ends_with(v.normal_case, "ТЬСЯ")):
                 v.normal_full = v.normal_case[0:0 + len(v.normal_case) - 2]
         v.language = self.language
         if (v.class0_.is_preposition):
             v.normal_case = LanguageHelper.normalize_preposition(
                 v.normal_case)
     mc = MorphClass()
     for i in range(len(res) - 1, -1, -1):
         if (not res[i].is_in_dictionary and res[i].class0_.is_adjective
                 and len(res) > 1):
             if ("к.ф." in res[i].misc.attrs
                     or "неизм." in res[i].misc.attrs):
                 del res[i]
                 continue
         if (res[i].is_in_dictionary):
             mc.value |= res[i].class0_.value
     else:
         i = -1
     if (mc == MorphClass.VERB and len(res) > 1):
         for r in res:
             if (r.undef_coef > (100)
                     and r.class0_ == MorphClass.ADJECTIVE):
                 r.undef_coef = (0)
     if (len(res) == 0):
         return None
     return res
Example #2
0
 def get_normal_case_text(self,
                          mc: 'MorphClass' = None,
                          num: 'MorphNumber' = MorphNumber.UNDEFINED,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
     from pullenti.ner.core.MiscHelper import MiscHelper
     empty = True
     if (mc is not None and mc.is_preposition):
         return LanguageHelper.normalize_preposition(self.term)
     for it in self.morph.items:
         if (mc is not None and not mc.is_undefined):
             cc = (it.class0_) & mc
             if (cc.is_undefined):
                 continue
             if (cc.is_misc and not cc.is_proper and mc != it.class0_):
                 continue
         wf = Utils.asObjectOrNull(it, MorphWordForm)
         normal_full = False
         if (gender != MorphGender.UNDEFINED):
             if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)):
                 if ((gender == MorphGender.MASCULINE and
                      ((it.gender != MorphGender.UNDEFINED or it.number
                        == MorphNumber.PLURAL)) and wf is not None)
                         and wf.normal_full is not None):
                     normal_full = True
                 elif (gender == MorphGender.MASCULINE
                       and it.class0_.is_personal_pronoun):
                     pass
                 else:
                     continue
         if (not it.case_.is_undefined):
             empty = False
         if (wf is not None):
             res = None
             if (num == MorphNumber.SINGULAR
                     and it.number == MorphNumber.PLURAL
                     and wf.normal_full is not None):
                 le = len(wf.normal_case)
                 if ((le == (len(wf.normal_full) + 2) and le > 4
                      and wf.normal_case[le - 2] == 'С')
                         and wf.normal_case[le - 1] == 'Я'):
                     res = wf.normal_case
                 else:
                     res = (wf.normal_full
                            if normal_full else wf.normal_full)
             else:
                 res = (wf.normal_full if normal_full else
                        (Utils.ifNotNull(wf.normal_case, self.term)))
             if (num == MorphNumber.SINGULAR and mc is not None
                     and mc == MorphClass.NOUN):
                 if (res == "ДЕТИ"):
                     res = "РЕБЕНОК"
             if (keep_chars):
                 if (self.chars.is_all_lower):
                     res = res.lower()
                 elif (self.chars.is_capital_upper):
                     res = MiscHelper.convert_first_char_upper_and_other_lower(
                         res)
             return res
     if (not empty):
         return None
     te = None
     if (num == MorphNumber.SINGULAR and mc is not None):
         bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender,
                                    MorphNumber.SINGULAR,
                                    self.morph.language)
         vars0_ = MorphologyService.get_wordform(self.term, bi)
         if (vars0_ is not None):
             te = vars0_
     if (te is None):
         te = self.term
     if (keep_chars):
         if (self.chars.is_all_lower):
             return te.lower()
         elif (self.chars.is_capital_upper):
             return MiscHelper.convert_first_char_upper_and_other_lower(te)
     return te