def process(self, word: str) -> typing.List['MorphWordForm']: """ Обработка одного слова Args: word(str): слово должно быть в верхнем регистре """ if (Utils.isNullOrEmpty(word)): return None res = None if (len(word) > 1): i = 0 while i < len(word): ch = word[i] if (LanguageHelper.is_cyrillic_vowel(ch) or LanguageHelper.is_latin_vowel(ch)): break i += 1 if (i >= len(word)): return res mvs = [] tn = self.m_root i = 0 while i <= len(word): if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.rules is not None): word_begin = None word_end = None if (i == 0): word_end = word elif (i < len(word)): word_end = word[i:] else: word_end = "" if (res is None): res = list() for r in tn.rules: wrapmvs20 = RefOutArgWrapper(None) inoutres21 = Utils.tryGetValue(r.variants, word_end, wrapmvs20) mvs = wrapmvs20.value if (inoutres21): if (word_begin is None): if (i == len(word)): word_begin = word elif (i > 0): word_begin = word[0:0 + i] else: word_begin = "" r.process_result(res, word_begin, mvs) if (tn.nodes is None or i >= len(word)): break ch = ord(word[i]) wraptn22 = RefOutArgWrapper(None) inoutres23 = Utils.tryGetValue(tn.nodes, ch, wraptn22) tn = wraptn22.value if (not inoutres23): break i += 1 need_test_unknown_vars = True if (res is not None): for r in res: if ((r.class0_.is_pronoun or r.class0_.is_noun or r.class0_.is_adjective) or (r.class0_.is_misc and r.class0_.is_conjunction) or r.class0_.is_preposition): need_test_unknown_vars = False elif (r.class0_.is_adverb and r.normal_case is not None): if (not LanguageHelper.ends_with_ex( r.normal_case, "О", "А", None, None)): need_test_unknown_vars = False elif (r.normal_case == "МНОГО"): need_test_unknown_vars = False elif (r.class0_.is_verb and len(res) > 1): ok = False for rr in res: if (rr != r and rr.class0_ != r.class0_): ok = True break if (ok and not LanguageHelper.ends_with(word, "ИМ")): need_test_unknown_vars = False if (need_test_unknown_vars and LanguageHelper.is_cyrillic_char(word[0])): gl = 0 sog = 0 j = 0 while j < len(word): if (LanguageHelper.is_cyrillic_vowel(word[j])): gl += 1 else: sog += 1 j += 1 if ((gl < 2) or (sog < 2)): need_test_unknown_vars = False if (need_test_unknown_vars and res is not None and len(res) == 1): if (res[0].class0_.is_verb): if ("н.вр." in res[0].misc.attrs and "нес.в." in res[0].misc.attrs and not "страд.з." in res[0].misc.attrs): need_test_unknown_vars = False elif ("б.вр." in res[0].misc.attrs and "сов.в." in res[0].misc.attrs): need_test_unknown_vars = False elif (res[0].normal_case is not None and LanguageHelper.ends_with(res[0].normal_case, "СЯ")): need_test_unknown_vars = False if (res[0].class0_.is_undefined and "прдктв." in res[0].misc.attrs): need_test_unknown_vars = False if (need_test_unknown_vars): if (self.m_root_reverce is None): return res tn = self.m_root_reverce tn0 = None for i in range(len(word) - 1, -1, -1): if (tn.lazy_pos > 0): self.__load_tree_node(tn) ch = ord(word[i]) if (tn.nodes is None): break wrapnext24 = RefOutArgWrapper(None) inoutres25 = Utils.tryGetValue(tn.nodes, ch, wrapnext24) next0_ = wrapnext24.value if (not inoutres25): break tn = next0_ if (tn.lazy_pos > 0): self.__load_tree_node(tn) if (tn.reverce_variants is not None): tn0 = tn break else: i = -1 if (tn0 is not None): glas = i < 4 while i >= 0: if (LanguageHelper.is_cyrillic_vowel(word[i]) or LanguageHelper.is_latin_vowel(word[i])): glas = True break i -= 1 if (glas): for mv in tn0.reverce_variants: if (((not mv.class0_.is_verb and not mv.class0_.is_adjective and not mv.class0_.is_noun) and not mv.class0_.is_proper_surname and not mv.class0_.is_proper_geo) and not mv.class0_.is_proper_secname): continue ok = False for rr in res: if (rr.is_in_dictionary): if (rr.class0_ == mv.class0_ or rr.class0_.is_noun): ok = True break if (not mv.class0_.is_adjective and rr.class0_.is_verb): ok = True break if (ok): continue if (len(mv.tail) > 0 and not LanguageHelper.ends_with(word, mv.tail)): continue r = MorphWordForm(mv, word) if (not MorphWordForm._has_morph_equals(res, r)): r.undef_coef = mv.coef if (res is None): res = list() res.append(r) if (word == "ПРИ" and res is not None): for i in range(len(res) - 1, -1, -1): if (res[i].class0_.is_proper_geo): del res[i] else: i = -1 if (res is None or len(res) == 0): return None MorphEngine.__sort(res, word) for v in res: if (v.normal_case is None): v.normal_case = word if (v.class0_.is_verb): if (v.normal_full is None and LanguageHelper.ends_with(v.normal_case, "ТЬСЯ")): v.normal_full = v.normal_case[0:0 + len(v.normal_case) - 2] v.language = self.language if (v.class0_.is_preposition): v.normal_case = LanguageHelper.normalize_preposition( v.normal_case) mc = MorphClass() for i in range(len(res) - 1, -1, -1): if (not res[i].is_in_dictionary and res[i].class0_.is_adjective and len(res) > 1): if ("к.ф." in res[i].misc.attrs or "неизм." in res[i].misc.attrs): del res[i] continue if (res[i].is_in_dictionary): mc.value |= res[i].class0_.value else: i = -1 if (mc == MorphClass.VERB and len(res) > 1): for r in res: if (r.undef_coef > (100) and r.class0_ == MorphClass.ADJECTIVE): r.undef_coef = (0) if (len(res) == 0): return None return res
def get_normal_case_text(self, mc: 'MorphClass' = None, num: 'MorphNumber' = MorphNumber.UNDEFINED, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: from pullenti.ner.core.MiscHelper import MiscHelper empty = True if (mc is not None and mc.is_preposition): return LanguageHelper.normalize_preposition(self.term) for it in self.morph.items: if (mc is not None and not mc.is_undefined): cc = (it.class0_) & mc if (cc.is_undefined): continue if (cc.is_misc and not cc.is_proper and mc != it.class0_): continue wf = Utils.asObjectOrNull(it, MorphWordForm) normal_full = False if (gender != MorphGender.UNDEFINED): if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)): if ((gender == MorphGender.MASCULINE and ((it.gender != MorphGender.UNDEFINED or it.number == MorphNumber.PLURAL)) and wf is not None) and wf.normal_full is not None): normal_full = True elif (gender == MorphGender.MASCULINE and it.class0_.is_personal_pronoun): pass else: continue if (not it.case_.is_undefined): empty = False if (wf is not None): res = None if (num == MorphNumber.SINGULAR and it.number == MorphNumber.PLURAL and wf.normal_full is not None): le = len(wf.normal_case) if ((le == (len(wf.normal_full) + 2) and le > 4 and wf.normal_case[le - 2] == 'С') and wf.normal_case[le - 1] == 'Я'): res = wf.normal_case else: res = (wf.normal_full if normal_full else wf.normal_full) else: res = (wf.normal_full if normal_full else (Utils.ifNotNull(wf.normal_case, self.term))) if (num == MorphNumber.SINGULAR and mc is not None and mc == MorphClass.NOUN): if (res == "ДЕТИ"): res = "РЕБЕНОК" if (keep_chars): if (self.chars.is_all_lower): res = res.lower() elif (self.chars.is_capital_upper): res = MiscHelper.convert_first_char_upper_and_other_lower( res) return res if (not empty): return None te = None if (num == MorphNumber.SINGULAR and mc is not None): bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender, MorphNumber.SINGULAR, self.morph.language) vars0_ = MorphologyService.get_wordform(self.term, bi) if (vars0_ is not None): te = vars0_ if (te is None): te = self.term if (keep_chars): if (self.chars.is_all_lower): return te.lower() elif (self.chars.is_capital_upper): return MiscHelper.convert_first_char_upper_and_other_lower(te) return te