def __deserialize_item(self, stream: Stream) -> 'MorphBaseInfo': from pullenti.ner.core.internal.SerializerHelper import SerializerHelper ty = stream.readbyte() res = (MorphBaseInfo() if ty == 0 else MorphWordForm()) res.class0_ = MorphClass._new53( SerializerHelper.deserialize_short(stream)) res.case_ = MorphCase._new29( SerializerHelper.deserialize_short(stream)) res.gender = Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphGender) res.number = Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphNumber) res.language = MorphLang._new56( SerializerHelper.deserialize_short(stream)) if (ty == 0): return res wf = Utils.asObjectOrNull(res, MorphWordForm) wf.normal_case = SerializerHelper.deserialize_string(stream) wf.normal_full = SerializerHelper.deserialize_string(stream) wf.undef_coef = SerializerHelper.deserialize_short(stream) cou = SerializerHelper.deserialize_int(stream) i = 0 while i < cou: if (wf.misc is None): wf.misc = MorphMiscInfo() wf.misc.attrs.append(SerializerHelper.deserialize_string(stream)) i += 1 return res
def __init__(self, source: 'MorphCollection' = None) -> None: super().__init__() self.__m_class = MorphClass() self.__m_gender = MorphGender.UNDEFINED self.__m_number = MorphNumber.UNDEFINED self.__m_case = MorphCase() self.__m_language = MorphLang() self.__m_voice = MorphVoice.UNDEFINED self.__m_need_recalc = True self.__m_items = None if (source is None): return for it in source.items: mi = None if (isinstance(it, MorphWordForm)): wf = MorphWordForm() wf.copy_from_word_form(Utils.asObjectOrNull(it, MorphWordForm)) mi = (wf) else: mi = MorphBaseInfo() mi.copy_from(it) if (self.__m_items is None): self.__m_items = list() self.__m_items.append(mi) self.__m_class = MorphClass._new53(source.__m_class.value) self.__m_gender = source.__m_gender self.__m_case = MorphCase._new29(source.__m_case.value) self.__m_number = source.__m_number self.__m_language = MorphLang._new56(source.__m_language.value) self.__m_voice = source.__m_voice self.__m_need_recalc = False
def clone(self) -> 'MorphCollection': """ Создать копию """ res = MorphCollection() if (self.__m_items is not None): res.__m_items = list() try: res.__m_items.extend(self.__m_items) except Exception as ex: pass if (not self.__m_need_recalc): res.__m_class = MorphClass._new53(self.__m_class.value) res.__m_gender = self.__m_gender res.__m_case = MorphCase._new29(self.__m_case.value) res.__m_number = self.__m_number res.__m_language = MorphLang._new56(self.__m_language.value) res.__m_need_recalc = False res.__m_voice = self.__m_voice return res
def _deserialize(self, stream: Stream) -> None: from pullenti.ner.core.internal.SerializerHelper import SerializerHelper self.__m_class = MorphClass._new53( SerializerHelper.deserialize_short(stream)) self.__m_case = MorphCase._new29( SerializerHelper.deserialize_short(stream)) self.__m_gender = (Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphGender)) self.__m_number = (Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphNumber)) self.__m_voice = (Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphVoice)) self.__m_language = MorphLang._new56( SerializerHelper.deserialize_short(stream)) cou = SerializerHelper.deserialize_int(stream) self.__m_items = list() i = 0 while i < cou: it = self.__deserialize_item(stream) if (it is not None): self.__m_items.append(it) i += 1 self.__m_need_recalc = False
def get_normal_case_text(self, mc: 'MorphClass' = None, num: 'MorphNumber' = MorphNumber.UNDEFINED, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: if ((isinstance(self.begin_token, ReferentToken)) and self.begin_token == self.end_token): return self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) res = None max_coef = 0 def_coef = -1 for it in self.morph.items: v = Utils.asObjectOrNull(it, NounPhraseItemTextVar) if (v is None): continue if (v.undef_coef > 0 and (((v.undef_coef < max_coef) or def_coef >= 0))): continue if (num == MorphNumber.SINGULAR and v.single_number_value is not None): if (mc is not None and ((gender == MorphGender.NEUTER or gender == MorphGender.FEMINIE)) and mc.is_adjective): bi = MorphBaseInfo._new401(MorphClass._new53(mc.value), gender, MorphNumber.SINGULAR, MorphCase.NOMINATIVE, self.morph.language) str0_ = MorphologyService.get_wordform( v.single_number_value, bi) if (str0_ is not None): res = str0_ else: res = v.single_number_value if (v.undef_coef == 0): break max_coef = v.undef_coef continue if (Utils.isNullOrEmpty(v.normal_value)): continue if (str.isdigit(v.normal_value[0]) and mc is not None and mc.is_adjective): val = 0 wrapval402 = RefOutArgWrapper(0) inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402) val = wrapval402.value if (inoutres403): str0_ = NumberHelper.get_number_adjective( val, gender, (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR or val == 1 else MorphNumber.PLURAL)) if (str0_ is not None): res = str0_ if (v.undef_coef == 0): break max_coef = v.undef_coef continue res1 = it.normal_value if (num == MorphNumber.SINGULAR): if (res1 == "ДЕТИ"): res1 = "РЕБЕНОК" elif (res1 == "ЛЮДИ"): res1 = "ЧЕЛОВЕК" max_coef = v.undef_coef if (v.undef_coef > 0): res = res1 continue def_co = 0 if (mc is not None and mc.is_adjective and v.undef_coef == 0): pass elif ( ((isinstance(self.begin_token, TextToken)) and res1 == self.begin_token.term and it.case_.is_nominative) and it.number == MorphNumber.SINGULAR): def_co = 1 if (num == MorphNumber.PLURAL and ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)): def_co += 3 if (res is None or def_co > def_coef): res = res1 def_coef = def_co if (def_co > 0): break if (res is not None): return self.__corr_chars(res, keep_chars) if (res is None and self.begin_token == self.end_token): res = self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) elif (res is None): res = self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) if (res is None): res = MiscHelper.get_text_value_of_meta_token( self, (GetTextAttr.KEEPREGISTER if keep_chars else GetTextAttr.NO)) else: res = "{0} {1}".format( res, MiscHelper.get_text_value( self.begin_token.next0_, self.end_token, (GetTextAttr.KEEPREGISTER if keep_chars else GetTextAttr.NO))) return Utils.ifNotNull(res, "?")
def get_normal_case_text(self, mc: 'MorphClass' = None, num: 'MorphNumber' = MorphNumber.UNDEFINED, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: from pullenti.ner.core.MiscHelper import MiscHelper empty = True if (mc is not None and mc.is_preposition): return LanguageHelper.normalize_preposition(self.term) for it in self.morph.items: if (mc is not None and not mc.is_undefined): cc = (it.class0_) & mc if (cc.is_undefined): continue if (cc.is_misc and not cc.is_proper and mc != it.class0_): continue wf = Utils.asObjectOrNull(it, MorphWordForm) normal_full = False if (gender != MorphGender.UNDEFINED): if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)): if ((gender == MorphGender.MASCULINE and ((it.gender != MorphGender.UNDEFINED or it.number == MorphNumber.PLURAL)) and wf is not None) and wf.normal_full is not None): normal_full = True elif (gender == MorphGender.MASCULINE and it.class0_.is_personal_pronoun): pass else: continue if (not it.case_.is_undefined): empty = False if (wf is not None): res = None if (num == MorphNumber.SINGULAR and it.number == MorphNumber.PLURAL and wf.normal_full is not None): le = len(wf.normal_case) if ((le == (len(wf.normal_full) + 2) and le > 4 and wf.normal_case[le - 2] == 'С') and wf.normal_case[le - 1] == 'Я'): res = wf.normal_case else: res = (wf.normal_full if normal_full else wf.normal_full) else: res = (wf.normal_full if normal_full else (Utils.ifNotNull(wf.normal_case, self.term))) if (num == MorphNumber.SINGULAR and mc is not None and mc == MorphClass.NOUN): if (res == "ДЕТИ"): res = "РЕБЕНОК" if (keep_chars): if (self.chars.is_all_lower): res = res.lower() elif (self.chars.is_capital_upper): res = MiscHelper.convert_first_char_upper_and_other_lower( res) return res if (not empty): return None te = None if (num == MorphNumber.SINGULAR and mc is not None): bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender, MorphNumber.SINGULAR, self.morph.language) vars0_ = MorphologyService.get_wordform(self.term, bi) if (vars0_ is not None): te = vars0_ if (te is None): te = self.term if (keep_chars): if (self.chars.is_all_lower): return te.lower() elif (self.chars.is_capital_upper): return MiscHelper.convert_first_char_upper_and_other_lower(te) return te