def __init__(self, source: 'MorphCollection' = None) -> None: super().__init__() self.__m_class = MorphClass() self.__m_gender = MorphGender.UNDEFINED self.__m_number = MorphNumber.UNDEFINED self.__m_case = MorphCase() self.__m_language = MorphLang() self.__m_voice = MorphVoice.UNDEFINED self.__m_need_recalc = True self.__m_items = None if (source is None): return for it in source.items: mi = None if (isinstance(it, MorphWordForm)): wf = MorphWordForm() wf.copy_from_word_form(Utils.asObjectOrNull(it, MorphWordForm)) mi = (wf) else: mi = MorphBaseInfo() mi.copy_from(it) if (self.__m_items is None): self.__m_items = list() self.__m_items.append(mi) self.__m_class = MorphClass._new53(source.__m_class.value) self.__m_gender = source.__m_gender self.__m_case = MorphCase._new29(source.__m_case.value) self.__m_number = source.__m_number self.__m_language = MorphLang._new56(source.__m_language.value) self.__m_voice = source.__m_voice self.__m_need_recalc = False
def add(self, val: str, shortval: str, gen: 'MorphGender', add_other_gender_var: bool = False) -> None: if (val is None): return if (self.head is None): if (len(val) > 3): self.head = val[0:0 + 3] else: self.head = val if (gen == MorphGender.MASCULINE or gen == MorphGender.FEMINIE): for it in self.items: if (it.value == val and it.gender == gen): return self.items.append( PersonMorphCollection.PersonMorphVariant._new2591( val, gen, shortval)) if (add_other_gender_var): g0 = (MorphGender.MASCULINE if gen == MorphGender.FEMINIE else MorphGender.FEMINIE) v = MorphologyService.get_wordform( val, MorphBaseInfo._new193(MorphClass._new2572(True), g0)) if (v is not None): self.items.append( PersonMorphCollection.PersonMorphVariant._new2591( v, g0, shortval)) else: self.add(val, shortval, MorphGender.MASCULINE, False) self.add(val, shortval, MorphGender.FEMINIE, False)
def __deserialize_item(self, stream: Stream) -> 'MorphBaseInfo': from pullenti.ner.core.internal.SerializerHelper import SerializerHelper ty = stream.readbyte() res = (MorphBaseInfo() if ty == 0 else MorphWordForm()) res.class0_ = MorphClass._new53( SerializerHelper.deserialize_short(stream)) res.case_ = MorphCase._new29( SerializerHelper.deserialize_short(stream)) res.gender = Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphGender) res.number = Utils.valToEnum( SerializerHelper.deserialize_short(stream), MorphNumber) res.language = MorphLang._new56( SerializerHelper.deserialize_short(stream)) if (ty == 0): return res wf = Utils.asObjectOrNull(res, MorphWordForm) wf.normal_case = SerializerHelper.deserialize_string(stream) wf.normal_full = SerializerHelper.deserialize_string(stream) wf.undef_coef = SerializerHelper.deserialize_short(stream) cou = SerializerHelper.deserialize_int(stream) i = 0 while i < cou: if (wf.misc is None): wf.misc = MorphMiscInfo() wf.misc.attrs.append(SerializerHelper.deserialize_string(stream)) i += 1 return res
def __remove_items_morph_case(self, cas: 'MorphCase') -> None: if (self.__m_items is None): return if (len(self.__m_items) == 0): self.__m_case = ((self.__m_case) & cas) for i in range(len(self.__m_items) - 1, -1, -1): if (((self.__m_items[i].case_) & cas).is_undefined): del self.__m_items[i] self.__m_need_recalc = True elif ((((self.__m_items[i].case_) & cas)) != self.__m_items[i].case_): if (isinstance(self.__m_items[i], MorphWordForm)): wf = MorphWordForm() wf.copy_from_word_form( Utils.asObjectOrNull(self.__m_items[i], MorphWordForm)) wf.case_ = (wf.case_) & cas self.__m_items[i] = (wf) else: bi = MorphBaseInfo() bi.copy_from(self.__m_items[i]) bi.case_ = (bi.case_) & cas self.__m_items[i] = bi self.__m_need_recalc = True self.__m_need_recalc = True
def get_morph_variant(self, cas : 'MorphCase', plural : bool) -> str: """ Сгенерировать текст именной группы в нужном падеже и числе Args: cas(MorphCase): нужный падеж plural(bool): нужное число Returns: str: результирующая строка """ mi = MorphBaseInfo._new499(cas, MorphLang.RU) if (plural): mi.number = MorphNumber.PLURAL else: mi.number = MorphNumber.SINGULAR res = None for a in self.adjectives: tt = MiscHelper.get_text_value_of_meta_token(a, GetTextAttr.NO) if (a.begin_token != a.end_token or not (isinstance(a.begin_token, TextToken))): pass else: tt2 = MorphologyService.get_wordform(tt, mi) if (tt2 is not None): tt = tt2 if (res is None): res = tt else: res = "{0} {1}".format(res, tt) if (self.noun is not None): tt = MiscHelper.get_text_value_of_meta_token(self.noun, GetTextAttr.NO) if (self.noun.begin_token != self.noun.end_token or not (isinstance(self.noun.begin_token, TextToken))): pass else: tt2 = MorphologyService.get_wordform(tt, mi) if (tt2 is not None): tt = tt2 if (res is None): res = tt else: res = "{0} {1}".format(res, tt) return res
def getMorphVariant(self, cas: 'MorphCase', plural: bool) -> str: """ Сгенерировать текст именной группы в нужном падеже и числе Args: cas(MorphCase): plural(bool): """ mi = MorphBaseInfo._new551(cas, MorphLang.RU) if (plural): mi.number = MorphNumber.PLURAL else: mi.number = MorphNumber.SINGULAR res = None for a in self.adjectives: tt = MiscHelper.getTextValueOfMetaToken(a, GetTextAttr.NO) if (a.begin_token != a.end_token or not ((isinstance(a.begin_token, TextToken)))): pass else: tt2 = Morphology.getWordform(tt, mi) if (tt2 is not None): tt = tt2 if (res is None): res = tt else: res = "{0} {1}".format(res, tt) if (self.noun is not None): tt = MiscHelper.getTextValueOfMetaToken(self.noun, GetTextAttr.NO) if (self.noun.begin_token != self.noun.end_token or not ((isinstance(self.noun.begin_token, TextToken)))): pass else: tt2 = Morphology.getWordform(tt, mi) if (tt2 is not None): tt = tt2 if (res is None): res = tt else: res = "{0} {1}".format(res, tt) return res
def __try_parse_en(first: 'Token', typ: 'NounPhraseParseAttr', max_char_pos: int) -> 'NounPhraseToken': if (first is None): return None items = None has_article = False has_prop = False has_misc = False if (first.previous is not None and first.previous.morph.class0_.is_preposition and (first.whitespaces_before_count < 3)): has_prop = True t = first first_pass3048 = True while True: if first_pass3048: first_pass3048 = False else: t = t.next0_ if (not (t is not None)): break if (max_char_pos > 0 and t.begin_char > max_char_pos): break if (not t.chars.is_latin_letter): break if (t != first and t.whitespaces_before_count > 2): if ((((typ) & (NounPhraseParseAttr.MULTILINES))) != (NounPhraseParseAttr.NO)): pass elif (MiscHelper.is_eng_article(t.previous)): pass else: break tt = Utils.asObjectOrNull(t, TextToken) if (t == first and tt is not None): if (MiscHelper.is_eng_article(tt)): has_article = True continue if (isinstance(t, ReferentToken)): if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == ( NounPhraseParseAttr.NO)): break elif (tt is None): break if ((t.is_value("SO", None) and t.next0_ is not None and t.next0_.is_hiphen) and t.next0_.next0_ is not None): if (t.next0_.next0_.is_value("CALL", None)): t = t.next0_.next0_ continue mc = t.get_morph_class_in_dictionary() if (mc.is_conjunction or mc.is_preposition): break if (mc.is_pronoun or mc.is_personal_pronoun): if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): break elif (mc.is_misc): if (t.is_value("THIS", None) or t.is_value("THAT", None)): has_misc = True if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): break is_adj = False if (((has_article or has_prop or has_misc)) and items is None): pass elif (isinstance(t, ReferentToken)): pass else: if (not mc.is_noun and not mc.is_adjective): if (mc.is_undefined and has_article): pass elif (items is None and mc.is_undefined and t.chars.is_capital_upper): pass elif (mc.is_pronoun): pass elif (tt.term.endswith("EAN")): is_adj = True elif (MiscHelper.is_eng_adj_suffix(tt.next0_)): pass else: break if (mc.is_verb): if (t.next0_ is not None and t.next0_.morph.class0_.is_verb and (t.whitespaces_after_count < 2)): pass elif (t.chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence(t)): pass elif ((t.chars.is_capital_upper and mc.is_noun and (isinstance(t.next0_, TextToken))) and t.next0_.chars.is_capital_upper): pass elif (isinstance(t, ReferentToken)): pass else: break if (items is None): items = list() it = NounPhraseItem(t, t) if (mc.is_noun): it.can_be_noun = True if (mc.is_adjective or mc.is_pronoun or is_adj): it.can_be_adj = True items.append(it) t = it.end_token if (len(items) == 1): if (MiscHelper.is_eng_adj_suffix(t.next0_)): mc.is_noun = False mc.is_adjective = True t = t.next0_.next0_ if (items is None): return None noun = items[len(items) - 1] res = NounPhraseToken(first, noun.end_token) res.noun = (noun) res.morph = MorphCollection() for v in noun.end_token.morph.items: if (v.class0_.is_verb): continue if (v.class0_.is_proper and noun.begin_token.chars.is_all_lower): continue if (isinstance(v, MorphWordForm)): wf = MorphWordForm() wf.copy_from_word_form(Utils.asObjectOrNull(v, MorphWordForm)) if (has_article and v.number != MorphNumber.SINGULAR): wf.number = MorphNumber.SINGULAR res.morph.add_item(wf) else: bi = MorphBaseInfo() bi.copy_from(v) if (has_article and v.number != MorphNumber.SINGULAR): bi.number = MorphNumber.SINGULAR res.morph.add_item(bi) if (res.morph.items_count == 0 and has_article): res.morph.add_item( MorphBaseInfo._new192(MorphClass.NOUN, MorphNumber.SINGULAR)) i = 0 while i < (len(items) - 1): res.adjectives.append(items[i]) i += 1 return res
def __getNameWithoutBrackets(begin: 'Token', end: 'Token', normalize_first_noun_group: bool = False, normal_first_group_single: bool = False, ignore_geo_referent: bool = False) -> str: """ Получить строковое значение между токенами, при этом исключая кавычки и скобки Args: begin(Token): начальный токен end(Token): конечный токен normalize_first_noun_group(bool): нормализовывать ли первую именную группу (именит. падеж) normal_first_group_single(bool): приводить ли к единственному числу первую именную группу ignore_geo_referent(bool): игнорировать внутри географические сущности """ res = None if (BracketHelper.canBeStartOfSequence(begin, False, False) and BracketHelper.canBeEndOfSequence(end, False, begin, False)): begin = begin.next0_ end = end.previous if (normalize_first_noun_group and not begin.morph.class0_.is_preposition): npt = NounPhraseHelper.tryParse( begin, NounPhraseParseAttr.REFERENTCANBENOUN, 0) if (npt is not None): if (npt.noun.getMorphClassInDictionary().is_undefined and len(npt.adjectives) == 0): npt = (None) if (npt is not None and npt.end_token.end_char > end.end_char): npt = (None) if (npt is not None): res = npt.getNormalCaseText(None, normal_first_group_single, MorphGender.UNDEFINED, False) te = npt.end_token.next0_ if (((te is not None and te.next0_ is not None and te.is_comma) and (isinstance(te.next0_, TextToken)) and te.next0_.end_char <= end.end_char) and te.next0_.morph.class0_.is_verb and te.next0_.morph.class0_.is_adjective): for it in te.next0_.morph.items: if (it.gender == npt.morph.gender or (((it.gender) & (npt.morph.gender))) != (MorphGender.UNDEFINED)): if (not ( (it.case_) & npt.morph.case_).is_undefined): if (it.number == npt.morph.number or (((it.number) & (npt.morph.number))) != (MorphNumber.UNDEFINED)): var = (te.next0_).term if (isinstance(it, MorphWordForm)): var = (it).normal_case bi = MorphBaseInfo._new549( MorphClass.ADJECTIVE, npt.morph.gender, npt.morph.number, npt.morph.language) var = Morphology.getWordform(var, bi) if (var is not None): res = "{0}, {1}".format(res, var) te = te.next0_.next0_ break if (te is not None and te.end_char <= end.end_char): s = ProperNameHelper.getNameEx(te, end, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, True, ignore_geo_referent) if (not Utils.isNullOrEmpty(s)): if (not str.isalnum(s[0])): res = "{0}{1}".format(res, s) else: res = "{0} {1}".format(res, s) elif ((isinstance(begin, TextToken)) and begin.chars.is_cyrillic_letter): mm = begin.getMorphClassInDictionary() if (not mm.is_undefined): res = begin.getNormalCaseText(mm, False, MorphGender.UNDEFINED, False) if (begin.end_char < end.end_char): res = "{0} {1}".format( res, ProperNameHelper.getNameEx(begin.next0_, end, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, True, False)) if (res is None): res = ProperNameHelper.getNameEx(begin, end, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, True, ignore_geo_referent) if (not Utils.isNullOrEmpty(res)): k = 0 i = len(res) - 1 while i >= 0: if (res[i] == '*' or Utils.isWhitespace(res[i])): pass else: break i -= 1 k += 1 if (k > 0): if (k == len(res)): return None res = res[0:0 + len(res) - k] return res
def get_normal_case_text(self, mc: 'MorphClass' = None, num: 'MorphNumber' = MorphNumber.UNDEFINED, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: if ((isinstance(self.begin_token, ReferentToken)) and self.begin_token == self.end_token): return self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) res = None max_coef = 0 def_coef = -1 for it in self.morph.items: v = Utils.asObjectOrNull(it, NounPhraseItemTextVar) if (v is None): continue if (v.undef_coef > 0 and (((v.undef_coef < max_coef) or def_coef >= 0))): continue if (num == MorphNumber.SINGULAR and v.single_number_value is not None): if (mc is not None and ((gender == MorphGender.NEUTER or gender == MorphGender.FEMINIE)) and mc.is_adjective): bi = MorphBaseInfo._new401(MorphClass._new53(mc.value), gender, MorphNumber.SINGULAR, MorphCase.NOMINATIVE, self.morph.language) str0_ = MorphologyService.get_wordform( v.single_number_value, bi) if (str0_ is not None): res = str0_ else: res = v.single_number_value if (v.undef_coef == 0): break max_coef = v.undef_coef continue if (Utils.isNullOrEmpty(v.normal_value)): continue if (str.isdigit(v.normal_value[0]) and mc is not None and mc.is_adjective): val = 0 wrapval402 = RefOutArgWrapper(0) inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402) val = wrapval402.value if (inoutres403): str0_ = NumberHelper.get_number_adjective( val, gender, (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR or val == 1 else MorphNumber.PLURAL)) if (str0_ is not None): res = str0_ if (v.undef_coef == 0): break max_coef = v.undef_coef continue res1 = it.normal_value if (num == MorphNumber.SINGULAR): if (res1 == "ДЕТИ"): res1 = "РЕБЕНОК" elif (res1 == "ЛЮДИ"): res1 = "ЧЕЛОВЕК" max_coef = v.undef_coef if (v.undef_coef > 0): res = res1 continue def_co = 0 if (mc is not None and mc.is_adjective and v.undef_coef == 0): pass elif ( ((isinstance(self.begin_token, TextToken)) and res1 == self.begin_token.term and it.case_.is_nominative) and it.number == MorphNumber.SINGULAR): def_co = 1 if (num == MorphNumber.PLURAL and ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)): def_co += 3 if (res is None or def_co > def_coef): res = res1 def_coef = def_co if (def_co > 0): break if (res is not None): return self.__corr_chars(res, keep_chars) if (res is None and self.begin_token == self.end_token): res = self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) elif (res is None): res = self.begin_token.get_normal_case_text( mc, num, gender, keep_chars) if (res is None): res = MiscHelper.get_text_value_of_meta_token( self, (GetTextAttr.KEEPREGISTER if keep_chars else GetTextAttr.NO)) else: res = "{0} {1}".format( res, MiscHelper.get_text_value( self.begin_token.next0_, self.end_token, (GetTextAttr.KEEPREGISTER if keep_chars else GetTextAttr.NO))) return Utils.ifNotNull(res, "?")
def _tryParseStreet(sli : typing.List['StreetItemToken'], ext_onto_regim : bool=False, for_metro : bool=False) -> 'AddressItemToken': if (sli is None or len(sli) == 0): return None i = 0 while i < len(sli): if (i == 0 and sli[i].typ == StreetItemType.FIX and ((len(sli) == 1 or sli[1].typ != StreetItemType.NOUN))): return StreetDefineHelper.__tryParseFix(sli) elif (sli[i].typ == StreetItemType.NOUN): if ((i == 0 and sli[i].termin.canonic_text == "УЛИЦА" and ((i + 2) < len(sli))) and sli[i + 1].typ == StreetItemType.NOUN and sli[i + 1].termin.canonic_text == "МИКРОРАЙОН"): sli[i + 1].begin_token = sli[i].begin_token del sli[i] if (sli[i].termin.canonic_text == "МЕТРО"): if ((i + 1) < len(sli)): sli1 = list() ii = i + 1 while ii < len(sli): sli1.append(sli[ii]) ii += 1 str1 = StreetDefineHelper._tryParseStreet(sli1, ext_onto_regim, True) if (str1 is not None): str1.begin_token = sli[i].begin_token str1.is_doubt = sli[i].is_abridge if (sli[i + 1].is_in_brackets): str1.is_doubt = False return str1 elif (i == 1 and sli[0].typ == StreetItemType.NAME): for_metro = True break if (i == 0 and len(sli) > 0): for_metro = True break return None if (i == 0 and (i + 1) >= len(sli) and ((sli[i].termin.canonic_text == "ВОЕННЫЙ ГОРОДОК" or sli[i].termin.canonic_text == "ПРОМЗОНА"))): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, "микрорайон", False, 0) stri0.addSlot(StreetReferent.ATTR_NAME, sli[i].termin.canonic_text, False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (i == 0 and (i + 1) >= len(sli) and sli[i].termin.canonic_text == "МИКРОРАЙОН"): stri0 = StreetReferent() stri0.addSlot(StreetReferent.ATTR_TYP, sli[i].termin.canonic_text.lower(), False, 0) return AddressItemToken._new85(AddressItemToken.ItemType.STREET, sli[0].begin_token, sli[0].end_token, stri0, True) if (sli[i].termin.canonic_text == "ПЛОЩАДЬ" or sli[i].termin.canonic_text == "ПЛОЩА"): tt = sli[i].end_token.next0_ if (tt is not None and ((tt.is_hiphen or tt.isChar(':')))): tt = tt.next0_ nex = NumberHelper.tryParseNumberWithPostfix(tt) if (nex is not None): return None break i += 1 if (i >= len(sli)): return StreetDefineHelper.__tryDetectNonNoun(sli, ext_onto_regim, for_metro) name = None number = None age = None adj = None noun = sli[i] alt_noun = None is_micro_raion = (noun.termin.canonic_text == "МИКРОРАЙОН" or noun.termin.canonic_text == "МІКРОРАЙОН" or noun.termin.canonic_text == "КВАРТАЛ") or LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК") before = 0 after = 0 j = 0 while j < i: if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): before += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].is_newline_after): return None if (sli[j].number.morph.class0_.is_adjective): before += 1 elif (is_micro_raion): before += 1 elif (sli[i].number_has_prefix): before += 1 else: before += 1 j += 1 j = (i + 1) while j < len(sli): if ((sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX) or sli[j].typ == StreetItemType.STDADJECTIVE or sli[j].typ == StreetItemType.STDPARTOFNAME): after += 1 elif (sli[j].typ == StreetItemType.NUMBER): if (sli[j].number is not None and sli[j].number.morph.class0_.is_adjective): after += 1 elif (is_micro_raion): after += 1 elif (sli[j].number_has_prefix): after += 1 elif (ext_onto_regim): after += 1 elif (sli[j].typ == StreetItemType.NOUN): break else: after += 1 j += 1 rli = list() if (before > after): if (noun.termin.canonic_text == "МЕТРО"): return None tt = sli[0].begin_token if (tt == sli[0].end_token and noun.begin_token == sli[0].end_token.next0_): if (not tt.morph.class0_.is_adjective and not ((isinstance(tt, NumberToken)))): if ((sli[0].is_newline_before or not MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token) or noun.morph.case_.is_genitive) or noun.morph.case_.is_instrumental): ok = False if (AddressItemToken.checkHouseAfter(noun.end_token.next0_, False, True)): ok = True elif (noun.end_token.next0_ is None): ok = True elif (noun.is_newline_after and MiscLocationHelper.checkGeoObjectBefore(sli[0].begin_token)): ok = True if (not ok): if ((noun.chars.is_latin_letter and noun.chars.is_capital_upper and sli[0].chars.is_latin_letter) and sli[0].chars.is_capital_upper): ok = True if (not ok): return None n0 = 0 n1 = (i - 1) elif (i == 1 and sli[0].typ == StreetItemType.NUMBER): if (not sli[0].is_whitespace_after): return None number = (sli[0].value if sli[0].number is None else str(sli[0].number.int_value)) if (sli[0].is_number_km): number += "км" n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[0]) rli.append(sli[i]) elif (after > before): n0 = (i + 1) n1 = (len(sli) - 1) rli.append(sli[i]) elif (after == 0): return None elif ((len(sli) > 2 and ((sli[0].typ == StreetItemType.NAME or sli[0].typ == StreetItemType.STDADJECTIVE or sli[0].typ == StreetItemType.STDNAME)) and sli[1].typ == StreetItemType.NOUN) and sli[2].typ == StreetItemType.NUMBER): n0 = 0 n1 = 0 num = False tt2 = sli[2].end_token.next0_ if (sli[2].is_number_km): num = True elif (sli[0].begin_token.previous is not None and sli[0].begin_token.previous.isValue("КИЛОМЕТР", None)): sli[2].is_number_km = True num = True elif (sli[2].begin_token.previous.is_comma): pass elif (sli[2].begin_token != sli[2].end_token): num = True elif (AddressItemToken.checkHouseAfter(sli[2].end_token.next0_, False, True)): num = True elif (sli[2].morph.class0_.is_adjective and (sli[2].whitespaces_before_count < 2)): if (sli[2].end_token.next0_ is None or sli[2].end_token.is_comma or sli[2].is_newline_after): num = True if (num): number = (sli[2].value if sli[2].number is None else str(sli[2].number.int_value)) if (sli[2].is_number_km): number += "км" rli.append(sli[2]) else: del sli[2:2+len(sli) - 2] else: return None sec_number = None j = n0 first_pass2732 = True while True: if first_pass2732: first_pass2732 = False else: j += 1 if (not (j <= n1)): break if (sli[j].typ == StreetItemType.NUMBER): if (age is not None or ((sli[j].is_newline_before and j > 0))): break if (number is not None): if (name is not None and name.typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.STDNAME): sec_number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): sec_number += "км" rli.append(sli[j]) continue break if (sli[j].number is not None and sli[j].number.typ == NumberSpellingType.DIGIT and not sli[j].number.morph.class0_.is_adjective): if (sli[j].whitespaces_before_count > 2 and j > 0): break if (sli[j].number is not None and sli[j].number.int_value > 20): if (j > n0): if (((j + 1) < len(sli)) and sli[j + 1].typ == StreetItemType.NOUN): pass else: break if (j == n0 and n0 > 0): pass elif (j == n0 and n0 == 0 and sli[j].whitespaces_after_count == 1): pass elif (sli[j].number_has_prefix): pass elif (j == n1 and ((n1 + 1) < len(sli)) and sli[n1 + 1].typ == StreetItemType.NOUN): pass else: break number = (sli[j].value if sli[j].number is None else str(sli[j].number.int_value)) if (sli[j].is_number_km): number += "км" rli.append(sli[j]) elif (sli[j].typ == StreetItemType.AGE): if (number is not None or age is not None): break age = str(sli[j].number.int_value) rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDADJECTIVE): if (adj is not None): return None adj = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NAME or sli[j].typ == StreetItemType.STDNAME or sli[j].typ == StreetItemType.FIX): if (name is not None): if (j > 1 and sli[j - 2].typ == StreetItemType.NOUN): break elif (i < j): break else: return None name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.STDPARTOFNAME and j == n1): if (name is not None): break name = sli[j] rli.append(sli[j]) elif (sli[j].typ == StreetItemType.NOUN): if ((sli[0] == noun and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ")) and j > 0) and name is None): alt_noun = noun noun = sli[j] rli.append(sli[j]) else: break if (((n1 < i) and number is None and ((i + 1) < len(sli))) and sli[i + 1].typ == StreetItemType.NUMBER and sli[i + 1].number_has_prefix): number = (sli[i + 1].value if sli[i + 1].number is None else str(sli[i + 1].number.int_value)) rli.append(sli[i + 1]) elif ((((i < n0) and ((name is not None or adj is not None)) and (j < len(sli))) and sli[j].typ == StreetItemType.NOUN and ((noun.termin.canonic_text == "УЛИЦА" or noun.termin.canonic_text == "ВУЛИЦЯ"))) and (((sli[j].termin.canonic_text == "ПЛОЩАДЬ" or sli[j].termin.canonic_text == "БУЛЬВАР" or sli[j].termin.canonic_text == "ПЛОЩА") or sli[j].termin.canonic_text == "МАЙДАН" or (j + 1) == len(sli)))): alt_noun = noun noun = sli[j] rli.append(sli[j]) if (name is None): if (number is None and adj is None): return None if (noun.is_abridge): if (is_micro_raion): pass elif (noun.termin is not None and ((noun.termin.canonic_text == "ПРОЕЗД" or noun.termin.canonic_text == "ПРОЇЗД"))): pass elif (adj is None or adj.is_abridge): return None if (adj is not None and adj.is_abridge): return None if (not sli[i] in rli): rli.append(sli[i]) street = StreetReferent() if (not for_metro): street.addSlot(StreetReferent.ATTR_TYP, noun.termin.canonic_text.lower(), False, 0) if (noun.alt_termin is not None): if (noun.alt_termin.canonic_text == "ПРОСПЕКТ" and number is not None): pass else: street.addSlot(StreetReferent.ATTR_TYP, noun.alt_termin.canonic_text.lower(), False, 0) else: street.addSlot(StreetReferent.ATTR_TYP, "метро", False, 0) res = AddressItemToken._new82(AddressItemToken.ItemType.STREET, rli[0].begin_token, rli[0].end_token, street) for r in rli: if (res.begin_char > r.begin_char): res.begin_token = r.begin_token if (res.end_char < r.end_char): res.end_token = r.end_token if (for_metro and noun in rli and noun.termin.canonic_text == "МЕТРО"): rli.remove(noun) if (noun.is_abridge and (noun.length_char < 4)): res.is_doubt = True elif (noun.noun_is_doubt_coef > 0): res.is_doubt = True if ((name is not None and name.end_char > noun.end_char and noun.chars.is_all_lower) and not name.chars.is_all_lower and not ((isinstance(name.begin_token, ReferentToken)))): npt2 = NounPhraseHelper.tryParse(name.begin_token, NounPhraseParseAttr.NO, 0) if (npt2 is not None and npt2.end_char > name.end_char): pass elif (AddressItemToken.checkHouseAfter(res.end_token.next0_, False, False)): res.is_doubt = False elif (name.chars.is_capital_upper and noun.noun_is_doubt_coef == 1): res.is_doubt = False name_base = io.StringIO() name_alt = io.StringIO() name_alt2 = None gen = noun.termin.gender adj_gen = MorphGender.UNDEFINED if (number is not None): street.number = number if (sec_number is not None): street.sec_number = sec_number if (age is not None): if (street.number is None): street.number = age else: street.sec_number = age if (name is not None and name.value is not None): if (street.kind == StreetKind.ROAD): for r in rli: if (r.typ == StreetItemType.NAME and r != name): print(r.value, end="", file=name_alt) break if (name.alt_value is not None and name_alt.tell() == 0): print("{0} {1}".format(Utils.toStringStringIO(name_base), name.alt_value), end="", file=name_alt, flush=True) print(" {0}".format(name.value), end="", file=name_base, flush=True) elif (name is not None): is_adj = False if (isinstance(name.end_token, TextToken)): for wf in name.end_token.morph.items: if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary): is_adj = (wf.class0_.is_adjective | wf.class0_.is_proper_geo) adj_gen = wf.gender break elif (wf.class0_.is_adjective | wf.class0_.is_proper_geo): is_adj = True if (is_adj): tmp = io.StringIO() vars0_ = list() t = name.begin_token while t is not None: tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): break if (tmp.tell() > 0): print(' ', end="", file=tmp) if (t == name.end_token): is_padez = False if (not noun.is_abridge): if (not noun.morph.case_.is_undefined and not noun.morph.case_.is_nominative): is_padez = True elif (noun.termin.canonic_text == "ШОССЕ" or noun.termin.canonic_text == "ШОСЕ"): is_padez = True if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): is_padez = True if (not is_padez): print(tt.term, end="", file=tmp) break for wf in tt.morph.items: if (((wf.class0_.is_adjective or wf.class0_.is_proper_geo)) and (((wf.gender) & (gen))) != (MorphGender.UNDEFINED)): if (noun.morph.case_.is_undefined or not ((wf.case_) & noun.morph.case_).is_undefined): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue if (gen == MorphGender.MASCULINE and "ОЙ" in wff.normal_case): continue if (not wff.normal_case in vars0_): vars0_.append(wff.normal_case) if (not tt.term in vars0_ and Utils.indexOfList(sli, name, 0) > Utils.indexOfList(sli, noun, 0)): vars0_.append(tt.term) if (len(vars0_) == 0): vars0_.append(tt.term) break if (not tt.is_hiphen): print(tt.term, end="", file=tmp) t = t.next0_ if (len(vars0_) == 0): print(" {0}".format(Utils.toStringStringIO(tmp)), end="", file=name_base, flush=True) else: head = Utils.toStringStringIO(name_base) print(" {0}{1}".format(Utils.toStringStringIO(tmp), vars0_[0]), end="", file=name_base, flush=True) if (len(vars0_) > 1): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[1]), end="", file=name_alt, flush=True) if (len(vars0_) > 2): name_alt2 = "{0} {1}{2}".format(head, Utils.toStringStringIO(tmp), vars0_[2]) else: str_nam = None nits = list() has_adj = False has_proper_name = False t = name.begin_token while t is not None: if (t.morph.class0_.is_adjective or t.morph.class0_.is_conjunction): has_adj = True if ((isinstance(t, TextToken)) and not t.is_hiphen): if (name.termin is not None): nits.append(name.termin.canonic_text) break elif (not t.chars.is_letter and len(nits) > 0): nits[len(nits) - 1] += (t).term else: nits.append((t).term) if (t == name.begin_token and t.getMorphClassInDictionary().is_proper_name): has_proper_name = True elif ((isinstance(t, ReferentToken)) and name.termin is None): nits.append(t.getSourceText().upper()) if (t == name.end_token): break t = t.next0_ if (not has_adj and not has_proper_name): nits.sort() str_nam = Utils.joinStrings(" ", list(nits)) if (has_proper_name and len(nits) == 2): Utils.setLengthStringIO(name_alt, 0) print("{0} {1}".format(Utils.toStringStringIO(name_base), nits[1]), end="", file=name_alt, flush=True) print(" {0}".format(str_nam), end="", file=name_base, flush=True) adj_str = None adj_can_be_initial = False if (adj is not None): if (adj_gen == MorphGender.UNDEFINED and name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) == (MorphNumber.UNDEFINED)): if (name.morph.gender == MorphGender.FEMINIE or name.morph.gender == MorphGender.MASCULINE or name.morph.gender == MorphGender.NEUTER): adj_gen = name.morph.gender if (name is not None and (((name.morph.number) & (MorphNumber.PLURAL))) != (MorphNumber.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new209(MorphClass.ADJECTIVE, MorphNumber.PLURAL)) elif (adj_gen != MorphGender.UNDEFINED): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj_gen)) elif ((((adj.morph.gender) & (gen))) == (MorphGender.UNDEFINED)): s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, adj.morph.gender)) else: s = Morphology.getWordform(adj.termin.canonic_text, MorphBaseInfo._new210(MorphClass.ADJECTIVE, gen)) adj_str = s if (name is not None and (Utils.indexOfList(sli, adj, 0) < Utils.indexOfList(sli, name, 0))): if (adj.end_token.isChar('.') and adj.length_char <= 3 and not adj.begin_token.chars.is_all_lower): adj_can_be_initial = True s1 = Utils.toStringStringIO(name_base).strip() s2 = Utils.toStringStringIO(name_alt).strip() if (len(s1) < 3): if (street.number is not None): if (adj_str is not None): if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_str is None): if (len(s1) < 1): return None if (is_micro_raion): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) if (not Utils.isNullOrEmpty(s2)): street.addSlot(StreetReferent.ATTR_NAME, s2, False, 0) else: return None else: if (adj.is_abridge): return None street.addSlot(StreetReferent.ATTR_NAME, adj_str, False, 0) elif (adj_can_be_initial): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) street.addSlot(StreetReferent.ATTR_NAME, MiscHelper.getTextValue(adj.begin_token, name.end_token, GetTextAttr.NO), False, 0) street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) elif (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt.tell() > 0): s1 = Utils.toStringStringIO(name_alt).strip() if (adj_str is None): street.addSlot(StreetReferent.ATTR_NAME, s1, False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, s1), False, 0) if (name_alt2 is not None): if (adj_str is None): if (for_metro and noun is not None): street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(alt_noun.termin.canonic_text, name_alt2.strip()), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, name_alt2.strip(), False, 0) else: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(adj_str, name_alt2.strip()), False, 0) if (name is not None and name.alt_value2 is not None): street.addSlot(StreetReferent.ATTR_NAME, name.alt_value2, False, 0) if ((name is not None and adj is None and name.exist_street is not None) and not for_metro): for n in name.exist_street.names: street.addSlot(StreetReferent.ATTR_NAME, n, False, 0) if (alt_noun is not None and not for_metro): street.addSlot(StreetReferent.ATTR_TYP, alt_noun.termin.canonic_text.lower(), False, 0) if (noun.termin.canonic_text == "ПЛОЩАДЬ" or noun.termin.canonic_text == "КВАРТАЛ" or noun.termin.canonic_text == "ПЛОЩА"): res.is_doubt = True if (name is not None and name.is_in_dictionary): res.is_doubt = False elif (alt_noun is not None or for_metro): res.is_doubt = False elif (res.begin_token.previous is None or MiscLocationHelper.checkGeoObjectBefore(res.begin_token.previous)): if (res.end_token.next0_ is None or AddressItemToken.checkHouseAfter(res.end_token.next0_, False, True)): res.is_doubt = False if (LanguageHelper.endsWith(noun.termin.canonic_text, "ГОРОДОК")): for s in street.slots: if (s.type_name == StreetReferent.ATTR_TYP): street.uploadSlot(s, "микрорайон") elif (s.type_name == StreetReferent.ATTR_NAME): street.uploadSlot(s, "{0} {1}".format(noun.termin.canonic_text, s.value)) if (street.findSlot(StreetReferent.ATTR_NAME, None, True) is None): street.addSlot(StreetReferent.ATTR_NAME, noun.termin.canonic_text, False, 0) t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma): t1 = t1.next0_ non = StreetItemToken.tryParse(t1, None, False, None, False) if (non is not None and non.typ == StreetItemType.NOUN and len(street.typs) > 0): if (AddressItemToken.checkHouseAfter(non.end_token.next0_, False, True)): street._correct() nams = street.names for t in street.typs: for n in nams: street.addSlot(StreetReferent.ATTR_NAME, "{0} {1}".format(t.upper(), n), False, 0) street.addSlot(StreetReferent.ATTR_TYP, non.termin.canonic_text.lower(), False, 0) res.end_token = non.end_token if (res.is_doubt): if (noun.is_road): if (street.number is not None and Utils.endsWithString(street.number, "КМ", True)): res.is_doubt = False elif (AddressItemToken.checkKmAfter(res.end_token.next0_)): res.is_doubt = False elif (AddressItemToken.checkKmBefore(res.begin_token.previous)): res.is_doubt = False elif (noun.termin.canonic_text == "ПРОЕЗД" and street.findSlot(StreetReferent.ATTR_NAME, "ПРОЕКТИРУЕМЫЙ", True) is not None): res.is_doubt = False tt0 = res.begin_token.previous first_pass2733 = True while True: if first_pass2733: first_pass2733 = False else: tt0 = tt0.previous if (not (tt0 is not None)): break if (tt0.isCharOf(",,") or tt0.is_comma_and): continue str0 = Utils.asObjectOrNull(tt0.getReferent(), StreetReferent) if (str0 is not None): res.is_doubt = False break if (noun.termin.canonic_text == "КВАРТАЛ" and (res.whitespaces_after_count < 2) and number is None): ait = AddressItemToken.tryParse(res.end_token.next0_, None, False, True, None) if (ait is not None and ait.typ == AddressItemToken.ItemType.NUMBER and ait.value is not None): street.addSlot(StreetReferent.ATTR_NUMBER, ait.value, False, 0) res.end_token = ait.end_token return res
def getNormalCaseText(self, mc: 'MorphClass' = None, single_number: bool = False, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: if ((isinstance(self.begin_token, ReferentToken)) and self.begin_token == self.end_token): return self.begin_token.getNormalCaseText(mc, single_number, gender, keep_chars) res = None max_coef = 0 def_coef = -1 for it in self.morph.items: v = Utils.asObjectOrNull(it, NounPhraseItemTextVar) if (v.undef_coef > 0 and (((v.undef_coef < max_coef) or def_coef >= 0))): continue if (single_number and v.single_number_value is not None): if (mc is not None and ((gender == MorphGender.NEUTER or gender == MorphGender.FEMINIE)) and mc.is_adjective): bi = MorphBaseInfo._new467(MorphClass(mc), gender, MorphNumber.SINGULAR, MorphCase.NOMINATIVE, self.morph.language) str0_ = Morphology.getWordform(v.single_number_value, bi) if (str0_ is not None): res = str0_ else: res = v.single_number_value if (v.undef_coef == 0): break max_coef = v.undef_coef continue if (Utils.isNullOrEmpty(v.normal_value)): continue if (str.isdigit(v.normal_value[0]) and mc is not None and mc.is_adjective): wrapval468 = RefOutArgWrapper(0) inoutres469 = Utils.tryParseInt(v.normal_value, wrapval468) val = wrapval468.value if (inoutres469): str0_ = NumberHelper.getNumberAdjective( val, gender, (MorphNumber.SINGULAR if single_number or val == 1 else MorphNumber.PLURAL)) if (str0_ is not None): res = str0_ if (v.undef_coef == 0): break max_coef = v.undef_coef continue res1 = (it).normal_value if (single_number): if (res1 == "ДЕТИ"): res1 = "РЕБЕНОК" elif (res1 == "ЛЮДИ"): res1 = "ЧЕЛОВЕК" max_coef = v.undef_coef if (v.undef_coef > 0): res = res1 continue def_co = 0 if (mc is not None and mc.is_adjective and v.undef_coef == 0): pass elif (((isinstance(self.begin_token, TextToken)) and res1 == (self.begin_token).term and it.case_.is_nominative) and it.number == MorphNumber.SINGULAR): def_co = 1 if (res is None or def_co > def_coef): res = res1 def_coef = def_co if (def_co > 0): break if (res is not None): return self.__corrChars(res, keep_chars) if (res is None and self.begin_token == self.end_token): res = self.begin_token.getNormalCaseText(mc, single_number, gender, keep_chars) return Utils.ifNotNull(res, "?")
def __TryAttach(t: 'Token', prev: 'OrgItemNameToken', ext_onto: bool) -> 'OrgItemNameToken': if (t is None): return None r = t.getReferent() if (r is not None): if (r.type_name == "DENOMINATION"): return OrgItemNameToken._new1689( t, t, r.toString(True, t.kit.base_language, 0), True) if ((isinstance(r, GeoReferent)) and t.chars.is_latin_letter): res2 = OrgItemNameToken.__TryAttach(t.next0_, prev, ext_onto) if (res2 is not None and res2.chars.is_latin_letter): res2.begin_token = t res2.value = "{0} {1}".format( MiscHelper.getTextValueOfMetaToken( Utils.asObjectOrNull(t, MetaToken), GetTextAttr.NO), res2.value) res2.is_in_dictionary = False return res2 return None tt = Utils.asObjectOrNull(t, TextToken) if (tt is None): return None res = None tok = OrgItemNameToken.__m_std_tails.tryParse(t, TerminParseAttr.NO) if (tok is None and t.isChar(',')): tok = OrgItemNameToken.__m_std_tails.tryParse( t.next0_, TerminParseAttr.NO) if (tok is not None): return OrgItemNameToken._new1690(t, tok.end_token, tok.termin.canonic_text, tok.termin.tag is None, tok.termin.tag is not None, tok.morph) tok = OrgItemNameToken.__m_std_names.tryParse(t, TerminParseAttr.NO) if ((tok) is not None): return OrgItemNameToken._new1691(t, tok.end_token, tok.termin.canonic_text, True) eng = OrgItemEngItem.tryAttach(t, False) if (eng is None and t.isChar(',')): eng = OrgItemEngItem.tryAttach(t.next0_, False) if (eng is not None): return OrgItemNameToken._new1692(t, eng.end_token, eng.full_value, True) if (tt.chars.is_all_lower and prev is not None): if (not prev.chars.is_all_lower and not prev.chars.is_capital_upper): return None if (tt.isChar(',') and prev is not None): npt1 = NounPhraseHelper.tryParse(t.next0_, NounPhraseParseAttr.NO, 0) if (npt1 is None or npt1.chars != prev.chars or ((npt1.morph.case_) & prev.morph.case_).is_undefined): return None ty = OrgItemTypeToken.tryAttach(t.next0_, False, None) if (ty is not None): return None if (npt1.end_token.next0_ is None or not npt1.end_token.next0_.isValue("И", None)): return None t1 = npt1.end_token.next0_ npt2 = NounPhraseHelper.tryParse(t1.next0_, NounPhraseParseAttr.NO, 0) if (npt2 is None or npt2.chars != prev.chars or ((npt2.morph.case_) & npt1.morph.case_ & prev.morph.case_).is_undefined): return None ty = OrgItemTypeToken.tryAttach(t1.next0_, False, None) if (ty is not None): return None res = OrgItemNameToken._new1693( npt1.begin_token, npt1.end_token, npt1.morph, npt1.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)) res.is_noun_phrase = True res.is_after_conjunction = True if (prev.preposition is not None): res.preposition = prev.preposition return res if (((tt.isChar('&') or tt.isValue("AND", None) or tt.isValue("UND", None))) and prev is not None): if ((isinstance(tt.next0_, TextToken)) and tt.length_char == 1 and tt.next0_.chars.is_latin_letter): res = OrgItemNameToken._new1694(tt, tt.next0_, tt.next0_.chars) res.is_after_conjunction = True res.value = ("& " + (tt.next0_).term) return res res = OrgItemNameToken.tryAttach(tt.next0_, None, ext_onto, False) if (res is None or res.chars != prev.chars): return None res.is_after_conjunction = True res.value = ("& " + res.value) return res if (not tt.chars.is_letter): return None expinf = None if (prev is not None and prev.end_token.getMorphClassInDictionary().is_noun): wo = prev.end_token.getNormalCaseText(MorphClass.NOUN, True, MorphGender.UNDEFINED, False) expinf = Explanatory.findDerivates(wo, True, prev.end_token.morph.language) npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is not None and npt.internal_noun is not None): npt = (None) expl_ok = False if (npt is not None and expinf is not None): for ei in expinf: if (ei.nexts is not None and "" in ei.nexts): mc = ei.nexts[""] if (not ((mc) & npt.morph.case_).is_undefined): expl_ok = True break if (ei.transitive > 0): if (npt.morph.case_.is_genitive): expl_ok = True break if (npt is not None and ((expl_ok or npt.morph.case_.is_genitive or ((prev is not None and not ( (prev.morph.case_) & npt.morph.case_).is_undefined))))): mc = npt.begin_token.getMorphClassInDictionary() if (mc.is_verb or mc.is_pronoun): return None if (mc.is_adverb): if (npt.begin_token.next0_ is not None and npt.begin_token.next0_.is_hiphen): pass else: return None if (mc.is_preposition): return None if (mc.is_noun and npt.chars.is_all_lower): ca = npt.morph.case_ if ((not ca.is_dative and not ca.is_genitive and not ca.is_instrumental) and not ca.is_prepositional): return None res = OrgItemNameToken._new1693( npt.begin_token, npt.end_token, npt.morph, npt.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)) res.is_noun_phrase = True if ((npt.end_token.whitespaces_after_count < 2) and (isinstance(npt.end_token.next0_, TextToken))): npt2 = NounPhraseHelper.tryParse(npt.end_token.next0_, NounPhraseParseAttr.NO, 0) if (npt2 is not None and npt2.morph.case_.is_genitive and npt2.chars.is_all_lower): typ = OrgItemTypeToken.tryAttach(npt.end_token.next0_, True, None) epo = OrgItemEponymToken.tryAttach(npt.end_token.next0_, False) rtt = t.kit.processReferent("PERSONPROPERTY", npt.end_token.next0_) if (typ is None and epo is None and ((rtt is None or rtt.morph.number == MorphNumber.PLURAL))): res.end_token = npt2.end_token res.value = "{0} {1}".format( res.value, MiscHelper.getTextValueOfMetaToken( npt2, GetTextAttr.NO)) elif (npt.end_token.next0_.is_comma and (isinstance(npt.end_token.next0_.next0_, TextToken))): tt2 = npt.end_token.next0_.next0_ mv2 = tt2.getMorphClassInDictionary() if (mv2.is_adjective and mv2.is_verb): bi = MorphBaseInfo._new1696(npt.morph.case_, npt.morph.gender, npt.morph.number) if (tt2.morph.checkAccord(bi, False)): npt2 = NounPhraseHelper.tryParse( tt2.next0_, NounPhraseParseAttr.NO, 0) if (npt2 is not None and ((npt2.morph.case_.is_dative or npt2.morph.case_.is_genitive)) and npt2.chars.is_all_lower): res.end_token = npt2.end_token res.value = "{0} {1}".format( res.value, MiscHelper.getTextValue( npt.end_token.next0_, res.end_token, GetTextAttr.NO)) if (expl_ok): res.is_after_conjunction = True elif (npt is not None and ((((prev is not None and prev.is_noun_phrase and npt.morph.case_.is_instrumental)) or ext_onto))): res = OrgItemNameToken._new1693( npt.begin_token, npt.end_token, npt.morph, npt.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)) res.is_noun_phrase = True elif (tt.is_and): res = OrgItemNameToken.tryAttach(tt.next0_, prev, ext_onto, False) if (res is None or not res.is_noun_phrase or prev is None): return None if (((prev.morph.case_) & res.morph.case_).is_undefined): return None if (prev.morph.number != MorphNumber.UNDEFINED and res.morph.number != MorphNumber.UNDEFINED): if ((((prev.morph.number) & (res.morph.number))) == (MorphNumber.UNDEFINED)): if (prev.chars != res.chars): return None ty = OrgItemTypeToken.tryAttach(res.end_token.next0_, False, None) if (ty is not None): return None ci = res.chars res.chars = ci res.is_after_conjunction = True return res elif (((tt.term == "ПО" or tt.term == "ПРИ" or tt.term == "ЗА") or tt.term == "С" or tt.term == "В") or tt.term == "НА"): npt = NounPhraseHelper.tryParse(t.next0_, NounPhraseParseAttr.NO, 0) if (npt is not None): if (OrgItemNameToken.__m_vervot_words.tryParse( npt.end_token, TerminParseAttr.NO) is not None): return None ok = False if (tt.term == "ПО"): ok = npt.morph.case_.is_dative elif (tt.term == "С"): ok = npt.morph.case_.is_instrumental elif (tt.term == "ЗА"): ok = (npt.morph.case_.is_genitive | npt.morph.case_.is_instrumental) elif (tt.term == "НА"): ok = npt.morph.case_.is_prepositional elif (tt.term == "В"): ok = (npt.morph.case_.is_dative | npt.morph.case_.is_prepositional) if (ok): ok = False if (t.next0_.isValue("СФЕРА", None) or t.next0_.isValue("ОБЛАСТЬ", None)): ok = True elif (tt.term == "ПРИ"): ok = npt.morph.case_.is_prepositional if (ok): if (OrgItemTypeToken.tryAttach(tt.next0_, True, None) is not None): ok = False else: rt = tt.kit.processReferent( OrganizationAnalyzer.ANALYZER_NAME, tt.next0_) if (rt is not None): ok = False s = npt.noun.getNormalCaseText(None, False, MorphGender.UNDEFINED, False) if (s == "ПОДДЕРЖКА" or s == "УЧАСТИЕ"): ok = False else: ok = npt.morph.case_.is_prepositional if (ok): res = OrgItemNameToken._new1698( t, npt.end_token, npt.morph, npt.getNormalCaseText(None, True, MorphGender.UNDEFINED, False), npt.chars) res.is_noun_phrase = True res.preposition = tt.term if (((res.value == "ДЕЛО" or res.value == "ВОПРОС")) and not res.is_newline_after): res2 = OrgItemNameToken.__TryAttach( res.end_token.next0_, res, ext_onto) if (res2 is not None and res2.morph.case_.is_genitive): res.value = "{0} {1}".format(res.value, res2.value) res.end_token = res2.end_token ttt = res2.end_token.next0_ while ttt is not None: if (not ttt.is_comma_and): break res3 = OrgItemNameToken.__TryAttach( ttt.next0_, res2, ext_onto) if (res3 is None): break res.value = "{0} {1}".format( res.value, res3.value) res.end_token = res3.end_token if (ttt.is_and): break ttt = res.end_token ttt = ttt.next0_ if (res is None): return None elif (tt.term == "OF"): t1 = tt.next0_ if (t1 is not None and MiscHelper.isEngArticle(t1)): t1 = t1.next0_ if (t1 is not None and t1.chars.is_latin_letter and not t1.chars.is_all_lower): res = OrgItemNameToken._new1699(t, t1, t1.chars, t1.morph) ttt = t1.next0_ first_pass3046 = True while True: if first_pass3046: first_pass3046 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.whitespaces_before_count > 2): break if (MiscHelper.isEngAdjSuffix(ttt)): ttt = ttt.next0_ continue if (not ttt.chars.is_latin_letter): break if (ttt.morph.class0_.is_preposition): break res.end_token = ttt t1 = res.end_token res.value = MiscHelper.getTextValue(t, t1, GetTextAttr.IGNOREARTICLES) res.preposition = tt.term return res if (res is None): if (tt.chars.is_latin_letter and tt.length_char == 1): pass elif (tt.chars.is_all_lower or (tt.length_char < 2)): if (not tt.chars.is_latin_letter or prev is None or not prev.chars.is_latin_letter): return None if (tt.chars.is_cyrillic_letter): mc = tt.getMorphClassInDictionary() if (mc.is_verb or mc.is_adverb): return None elif (tt.chars.is_latin_letter and not tt.is_whitespace_after): if (not tt.is_whitespace_after and (tt.length_char < 5)): if (isinstance(tt.next0_, NumberToken)): return None res = OrgItemNameToken._new1700(tt, tt, tt.term, tt.morph) t = tt.next0_ while t is not None: if ((((t.is_hiphen or t.isCharOf("\\/"))) and t.next0_ is not None and (isinstance(t.next0_, TextToken))) and not t.is_whitespace_before and not t.is_whitespace_after): t = t.next0_ res.end_token = t res.value = "{0}{1}{2}".format( res.value, ('.' if t.previous.isChar('.') else '-'), (t).term) elif (t.isChar('.')): if (not t.is_whitespace_after and not t.is_whitespace_before and (isinstance(t.next0_, TextToken))): res.end_token = t.next0_ t = t.next0_ res.value = "{0}.{1}".format(res.value, (t).term) elif ((t.next0_ is not None and not t.is_newline_after and t.next0_.chars.is_latin_letter) and tt.chars.is_latin_letter): res.end_token = t else: break else: break t = t.next0_ t0 = res.begin_token while t0 is not None: tt = Utils.asObjectOrNull(t0, TextToken) if ((tt) is not None and tt.is_letters): if (not tt.morph.class0_.is_conjunction and not tt.morph.class0_.is_preposition): for mf in tt.morph.items: if ((mf).is_in_dictionary): res.is_in_dictionary = True if (t0 == res.end_token): break t0 = t0.next0_ if (res.begin_token == res.end_token and res.begin_token.chars.is_all_upper): if (res.end_token.next0_ is not None and not res.end_token.is_whitespace_after): t1 = res.end_token.next0_ if (t1.next0_ is not None and not t1.is_whitespace_after and t1.is_hiphen): t1 = t1.next0_ if (isinstance(t1, NumberToken)): res.value += str((t1).value) res.end_token = t1 if (res.begin_token == res.end_token and res.begin_token.chars.is_last_lower): src = res.begin_token.getSourceText() for i in range(len(src) - 1, -1, -1): if (str.isupper(src[i])): res.value = src[0:0 + i + 1] break return res
def get_normal_case_text(self, mc: 'MorphClass' = None, num: 'MorphNumber' = MorphNumber.UNDEFINED, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: from pullenti.ner.core.MiscHelper import MiscHelper empty = True if (mc is not None and mc.is_preposition): return LanguageHelper.normalize_preposition(self.term) for it in self.morph.items: if (mc is not None and not mc.is_undefined): cc = (it.class0_) & mc if (cc.is_undefined): continue if (cc.is_misc and not cc.is_proper and mc != it.class0_): continue wf = Utils.asObjectOrNull(it, MorphWordForm) normal_full = False if (gender != MorphGender.UNDEFINED): if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)): if ((gender == MorphGender.MASCULINE and ((it.gender != MorphGender.UNDEFINED or it.number == MorphNumber.PLURAL)) and wf is not None) and wf.normal_full is not None): normal_full = True elif (gender == MorphGender.MASCULINE and it.class0_.is_personal_pronoun): pass else: continue if (not it.case_.is_undefined): empty = False if (wf is not None): res = None if (num == MorphNumber.SINGULAR and it.number == MorphNumber.PLURAL and wf.normal_full is not None): le = len(wf.normal_case) if ((le == (len(wf.normal_full) + 2) and le > 4 and wf.normal_case[le - 2] == 'С') and wf.normal_case[le - 1] == 'Я'): res = wf.normal_case else: res = (wf.normal_full if normal_full else wf.normal_full) else: res = (wf.normal_full if normal_full else (Utils.ifNotNull(wf.normal_case, self.term))) if (num == MorphNumber.SINGULAR and mc is not None and mc == MorphClass.NOUN): if (res == "ДЕТИ"): res = "РЕБЕНОК" if (keep_chars): if (self.chars.is_all_lower): res = res.lower() elif (self.chars.is_capital_upper): res = MiscHelper.convert_first_char_upper_and_other_lower( res) return res if (not empty): return None te = None if (num == MorphNumber.SINGULAR and mc is not None): bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender, MorphNumber.SINGULAR, self.morph.language) vars0_ = MorphologyService.get_wordform(self.term, bi) if (vars0_ is not None): te = vars0_ if (te is None): te = self.term if (keep_chars): if (self.chars.is_all_lower): return te.lower() elif (self.chars.is_capital_upper): return MiscHelper.convert_first_char_upper_and_other_lower(te) return te
def try_attach_territory( li: typing.List['TerrItemToken'], ad: 'AnalyzerData', attach_always: bool = False, cits: typing.List['CityItemToken'] = None, exists: typing.List['GeoReferent'] = None) -> 'ReferentToken': if (li is None or len(li) == 0): return None ex_obj = None new_name = None adj_list = list() noun = None add_noun = None rt = TerrAttachHelper.__try_attach_moscowao(li, ad) if (rt is not None): return rt if (li[0].termin_item is not None and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"): res2 = TerrAttachHelper.__try_attach_pure_terr(li, ad) return res2 if (len(li) == 2): if (li[0].rzd is not None and li[1].rzd_dir is not None): rzd = GeoReferent() rzd._add_name(li[1].rzd_dir) rzd._add_typ_ter(li[0].kit.base_language) rzd.add_slot(GeoReferent.ATTR_REF, li[0].rzd.referent, False, 0) rzd.add_ext_referent(li[0].rzd) return ReferentToken(rzd, li[0].begin_token, li[1].end_token) if (li[1].rzd is not None and li[0].rzd_dir is not None): rzd = GeoReferent() rzd._add_name(li[0].rzd_dir) rzd._add_typ_ter(li[0].kit.base_language) rzd.add_slot(GeoReferent.ATTR_REF, li[1].rzd.referent, False, 0) rzd.add_ext_referent(li[1].rzd) return ReferentToken(rzd, li[0].begin_token, li[1].end_token) can_be_city_before = False adj_terr_before = False if (cits is not None): if (cits[0].typ == CityItemToken.ItemType.CITY): can_be_city_before = True elif (cits[0].typ == CityItemToken.ItemType.NOUN and len(cits) > 1): can_be_city_before = True k = 0 k = 0 while k < len(li): if (li[k].onto_item is not None): if (ex_obj is not None or new_name is not None): break if (noun is not None): if (k == 1): if (noun.termin_item.canonic_text == "РАЙОН" or noun.termin_item.canonic_text == "ОБЛАСТЬ" or noun.termin_item.canonic_text == "СОЮЗ"): if (isinstance(li[k].onto_item.referent, GeoReferent)): if (li[k].onto_item.referent.is_state): break ok = False tt = li[k].end_token.next0_ if (tt is None): ok = True elif (tt.is_char_of(",.")): ok = True if (not ok): ok = MiscLocationHelper.check_geo_object_before( li[0].begin_token) if (not ok): adr = AddressItemToken.try_parse( tt, None, False, False, None) if (adr is not None): if (adr.typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): break if (li[k].onto_item is not None): if (noun.begin_token.is_value("МО", None) or noun.begin_token.is_value("ЛО", None)): return None ex_obj = li[k] elif (li[k].termin_item is not None): if (noun is not None): break if (li[k].termin_item.is_always_prefix and k > 0): break if (k > 0 and li[k].is_doubt): if (li[k].begin_token == li[k].end_token and li[k].begin_token.is_value("ЗАО", None)): break if (li[k].termin_item.is_adjective or li[k].is_geo_in_dictionary): adj_list.append(li[k]) else: if (ex_obj is not None): geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent, GeoReferent) if (geo_ is None): break if (ex_obj.is_adjective and ((li[k].termin_item.canonic_text == "СОЮЗ" or li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))): str0_ = str(ex_obj.onto_item) if (not li[k].termin_item.canonic_text in str0_): return None if (li[k].termin_item.canonic_text == "РАЙОН" or li[k].termin_item.canonic_text == "ОКРУГ" or li[k].termin_item.canonic_text == "КРАЙ"): tmp = io.StringIO() for s in geo_.slots: if (s.type_name == GeoReferent.ATTR_TYPE): print("{0};".format(s.value), end="", file=tmp, flush=True) if (not li[k].termin_item.canonic_text in Utils.toStringStringIO(tmp).upper()): if (k != 1 or new_name is not None): break new_name = li[0] new_name.is_adjective = True new_name.onto_item = (None) ex_obj = (None) noun = li[k] if (k == 0): tt = TerrItemToken.try_parse( li[k].begin_token.previous, None, True, False, None) if (tt is not None and tt.morph.class0_.is_adjective): adj_terr_before = True else: if (ex_obj is not None): break if (new_name is not None): break new_name = li[k] k += 1 name = None alt_name = None full_name = None morph_ = None if (ex_obj is not None): if (ex_obj.is_adjective and not ex_obj.morph.language.is_en and noun is None): if (attach_always and ex_obj.end_token.next0_ is not None): npt = NounPhraseHelper.try_parse(ex_obj.begin_token, NounPhraseParseAttr.NO, 0, None) if (ex_obj.end_token.next0_.is_comma_and): pass elif (npt is None): pass else: str0_ = StreetItemToken.try_parse( ex_obj.end_token.next0_, None, False, None, False) if (str0_ is not None): if (str0_.typ == StreetItemType.NOUN and str0_.end_token == npt.end_token): return None else: cit = CityItemToken.try_parse(ex_obj.end_token.next0_, None, False, None) if (cit is not None and ((cit.typ == CityItemToken.ItemType.NOUN or cit.typ == CityItemToken.ItemType.CITY))): npt = NounPhraseHelper.try_parse( ex_obj.begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.end_token == cit.end_token): pass else: return None elif (ex_obj.begin_token.is_value("ПОДНЕБЕСНЫЙ", None)): pass else: return None if (noun is None and ex_obj.can_be_city): cit0 = CityItemToken.try_parse_back( ex_obj.begin_token.previous) if (cit0 is not None and cit0.typ != CityItemToken.ItemType.PROPERNAME): return None if (ex_obj.is_doubt and noun is None): ok2 = False if (TerrAttachHelper.__can_be_geo_after( ex_obj.end_token.next0_)): ok2 = True elif (not ex_obj.can_be_surname and not ex_obj.can_be_city): if ((ex_obj.end_token.next0_ is not None and ex_obj.end_token.next0_.is_char(')') and ex_obj.begin_token.previous is not None) and ex_obj.begin_token.previous.is_char('(')): ok2 = True elif (ex_obj.chars.is_latin_letter and ex_obj.begin_token.previous is not None): if (ex_obj.begin_token.previous.is_value("IN", None)): ok2 = True elif (ex_obj.begin_token.previous.is_value( "THE", None) and ex_obj.begin_token.previous.previous is not None and ex_obj.begin_token.previous.previous.is_value( "IN", None)): ok2 = True if (not ok2): cit0 = CityItemToken.try_parse_back( ex_obj.begin_token.previous) if (cit0 is not None and cit0.typ != CityItemToken.ItemType.PROPERNAME): pass elif (MiscLocationHelper.check_geo_object_before( ex_obj.begin_token.previous)): pass else: return None name = ex_obj.onto_item.canonic_text morph_ = ex_obj.morph elif (new_name is not None): if (noun is None): return None j = 1 while j < k: if (li[j].is_newline_before and not li[0].is_newline_before): if (BracketHelper.can_be_start_of_sequence( li[j].begin_token, False, False)): pass else: return None j += 1 morph_ = noun.morph if (new_name.is_adjective): if (noun.termin_item.acronym == "АО"): if (noun.begin_token != noun.end_token): return None if (new_name.morph.gender != MorphGender.FEMINIE): return None geo_before = None tt0 = li[0].begin_token.previous if (tt0 is not None and tt0.is_comma_and): tt0 = tt0.previous if (not li[0].is_newline_before and tt0 is not None): geo_before = (Utils.asObjectOrNull(tt0.get_referent(), GeoReferent)) if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList( li, new_name, 0)): if (noun.termin_item.is_state): return None if (new_name.can_be_surname and geo_before is None): if (((noun.morph.case_) & new_name.morph.case_).is_undefined): return None if (MiscHelper.is_exists_in_dictionary( new_name.begin_token, new_name.end_token, (MorphClass.ADJECTIVE) | MorphClass.PRONOUN | MorphClass.VERB)): if (noun.begin_token != new_name.begin_token): if (geo_before is None): if (len(li) == 2 and TerrAttachHelper.__can_be_geo_after( li[1].end_token.next0_)): pass elif (len(li) == 3 and li[2].termin_item is not None and TerrAttachHelper.__can_be_geo_after( li[2].end_token.next0_)): pass elif (new_name.is_geo_in_dictionary): pass elif (new_name.end_token.is_newline_after): pass else: return None npt = NounPhraseHelper.try_parse( new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS, 0, None) if (npt is not None and npt.end_token != new_name.end_token): if (len(li) >= 3 and li[2].termin_item is not None and npt.end_token == li[2].end_token): add_noun = li[2] else: return None rtp = new_name.kit.process_referent( "PERSON", new_name.begin_token) if (rtp is not None): return None name = ProperNameHelper.get_name_ex( new_name.begin_token, new_name.end_token, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False) else: ok = False if (((k + 1) < len(li)) and li[k].termin_item is None and li[k + 1].termin_item is not None): ok = True elif ((k < len(li)) and li[k].onto_item is not None): ok = True elif (k == len(li) and not new_name.is_adj_in_dictionary): ok = True elif (MiscLocationHelper.check_geo_object_before( li[0].begin_token) or can_be_city_before): ok = True elif (MiscLocationHelper.check_geo_object_after( li[k - 1].end_token, False)): ok = True elif (len(li) == 3 and k == 2): cit = CityItemToken.try_parse(li[2].begin_token, None, False, None) if (cit is not None): if (cit.typ == CityItemToken.ItemType.CITY or cit.typ == CityItemToken.ItemType.NOUN): ok = True elif (len(li) == 2): ok = TerrAttachHelper.__can_be_geo_after( li[len(li) - 1].end_token.next0_) if (not ok and not li[0].is_newline_before and not li[0].chars.is_all_lower): rt00 = li[0].kit.process_referent( "PERSONPROPERTY", li[0].begin_token.previous) if (rt00 is not None): ok = True if (noun.termin_item is not None and noun.termin_item.is_strong and new_name.is_adjective): ok = True if (noun.is_doubt and len(adj_list) == 0 and geo_before is None): return None name = ProperNameHelper.get_name_ex( new_name.begin_token, new_name.end_token, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False) if (not ok and not attach_always): if (MiscHelper.is_exists_in_dictionary( new_name.begin_token, new_name.end_token, (MorphClass.ADJECTIVE) | MorphClass.PRONOUN | MorphClass.VERB)): if (exists is not None): for e0_ in exists: if (e0_.find_slot(GeoReferent.ATTR_NAME, name, True) is not None): ok = True break if (not ok): return None full_name = "{0} {1}".format( ProperNameHelper.get_name_ex(li[0].begin_token, noun.begin_token.previous, MorphClass.ADJECTIVE, MorphCase.UNDEFINED, noun.termin_item.gender, False, False), noun.termin_item.canonic_text) else: if (not attach_always or ((noun.termin_item is not None and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))): is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList( li, new_name, 0)): if (not is_latin): return None if (not new_name.is_district_name and not BracketHelper.can_be_start_of_sequence( new_name.begin_token, False, False)): if (len(adj_list) == 0 and MiscHelper.is_exists_in_dictionary( new_name.begin_token, new_name.end_token, (MorphClass.NOUN) | MorphClass.PRONOUN)): if (len(li) == 2 and noun.is_city_region and (noun.whitespaces_after_count < 2)): pass else: return None if (not is_latin): if ((noun.termin_item.is_region and not attach_always and ((not adj_terr_before or new_name.is_doubt))) and not noun.is_city_region and not noun.termin_item.is_specific_prefix): if (not MiscLocationHelper. check_geo_object_before( noun.begin_token)): if (not noun.is_doubt and noun.begin_token != noun.end_token): pass elif ((noun.termin_item.is_always_prefix and len(li) == 2 and li[0] == noun) and li[1] == new_name): pass else: return None if (noun.is_doubt and len(adj_list) == 0): if (noun.termin_item.acronym == "МО" or noun.termin_item.acronym == "ЛО"): if (k == (len(li) - 1) and li[k].termin_item is not None): add_noun = li[k] k += 1 elif (len(li) == 2 and noun == li[0] and str(new_name).endswith("совет")): pass else: return None else: return None pers = new_name.kit.process_referent( "PERSON", new_name.begin_token) if (pers is not None): return None name = MiscHelper.get_text_value(new_name.begin_token, new_name.end_token, GetTextAttr.NO) if (new_name.begin_token != new_name.end_token): ttt = new_name.begin_token.next0_ while ttt is not None and ttt.end_char <= new_name.end_char: if (ttt.chars.is_letter): ty = TerrItemToken.try_parse( ttt, None, False, False, None) if ((ty is not None and ty.termin_item is not None and noun is not None) and ((noun.termin_item.canonic_text in ty.termin_item.canonic_text or ty.termin_item.canonic_text in noun.termin_item.canonic_text))): name = MiscHelper.get_text_value( new_name.begin_token, ttt.previous, GetTextAttr.NO) break ttt = ttt.next0_ if (len(adj_list) > 0): npt = NounPhraseHelper.try_parse(adj_list[0].begin_token, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.end_token == noun.end_token): alt_name = "{0} {1}".format( npt.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False), name) else: if ((len(li) == 1 and noun is not None and noun.end_token.next0_ is not None) and (isinstance( noun.end_token.next0_.get_referent(), GeoReferent))): g = Utils.asObjectOrNull(noun.end_token.next0_.get_referent(), GeoReferent) if (noun.termin_item is not None): tyy = noun.termin_item.canonic_text.lower() ooo = False if (g.find_slot(GeoReferent.ATTR_TYPE, tyy, True) is not None): ooo = True elif (tyy.endswith("район") and g.find_slot( GeoReferent.ATTR_TYPE, "район", True) is not None): ooo = True if (ooo): return ReferentToken._new734(g, noun.begin_token, noun.end_token.next0_, noun.begin_token.morph) if ((len(li) == 1 and noun == li[0] and li[0].termin_item is not None) and TerrItemToken.try_parse(li[0].end_token.next0_, None, True, False, None) is None and TerrItemToken.try_parse(li[0].begin_token.previous, None, True, False, None) is None): if (li[0].morph.number == MorphNumber.PLURAL): return None cou = 0 str0_ = li[0].termin_item.canonic_text.lower() tt = li[0].begin_token.previous first_pass3158 = True while True: if first_pass3158: first_pass3158 = False else: tt = tt.previous if (not (tt is not None)): break if (tt.is_newline_after): cou += 10 else: cou += 1 if (cou > 500): break g = Utils.asObjectOrNull(tt.get_referent(), GeoReferent) if (g is None): continue ok = True cou = 0 tt = li[0].end_token.next0_ first_pass3159 = True while True: if first_pass3159: first_pass3159 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): cou += 10 else: cou += 1 if (cou > 500): break tee = TerrItemToken.try_parse(tt, None, True, False, None) if (tee is None): continue ok = False break if (ok): ii = 0 while g is not None and (ii < 3): if (g.find_slot(GeoReferent.ATTR_TYPE, str0_, True) is not None): return ReferentToken._new734( g, li[0].begin_token, li[0].end_token, noun.begin_token.morph) g = g.higher ii += 1 break return None ter = None if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))): ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent)) else: ter = GeoReferent() if (ex_obj is not None): geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent, GeoReferent) if (geo_ is not None and not geo_.is_city): ter._merge_slots2(geo_, li[0].kit.base_language) else: ter._add_name(name) if (noun is None and ex_obj.can_be_city): ter._add_typ_city(li[0].kit.base_language) else: pass elif (new_name is not None): ter._add_name(name) if (alt_name is not None): ter._add_name(alt_name) if (noun is not None): if (noun.termin_item.canonic_text == "АО"): ter._add_typ( ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua else "АВТОНОМНЫЙ ОКРУГ")) elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ" or noun.termin_item.canonic_text == "МУНІЦИПАЛЬНЕ ЗБОРИ"): ter._add_typ(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ" if li[0].kit.base_language.is_ua else "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ")) elif (noun.termin_item.acronym == "МО" and add_noun is not None): ter._add_typ(add_noun.termin_item.canonic_text) else: if (noun.termin_item.canonic_text == "СОЮЗ" and ex_obj is not None and ex_obj.end_char > noun.end_char): return ReferentToken._new734(ter, ex_obj.begin_token, ex_obj.end_token, ex_obj.morph) ter._add_typ(noun.termin_item.canonic_text) if (noun.termin_item.is_region and ter.is_state): ter._add_typ_reg(li[0].kit.base_language) if (ter.is_state and ter.is_region): for a in adj_list: if (a.termin_item.is_region): ter._add_typ_reg(li[0].kit.base_language) break if (ter.is_state): if (full_name is not None): ter._add_name(full_name) res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token) if (noun is not None and noun.morph.class0_.is_noun): res.morph = noun.morph else: res.morph = MorphCollection() ii = 0 while ii < k: for v in li[ii].morph.items: bi = MorphBaseInfo() bi.copy_from(v) if (noun is not None): if (bi.class0_.is_adjective): bi.class0_ = MorphClass.NOUN res.morph.add_item(bi) ii += 1 if (li[0].termin_item is not None and li[0].termin_item.is_specific_prefix): res.begin_token = li[0].end_token.next0_ if (add_noun is not None and add_noun.end_char > res.end_char): res.end_token = add_noun.end_token if ((isinstance(res.begin_token.previous, TextToken)) and (res.whitespaces_before_count < 2)): tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken) if (tt.term == "АР"): for ty in ter.typs: if ("республика" in ty or "республіка" in ty): res.begin_token = tt break return res
def __get_name_without_brackets(begin: 'Token', end: 'Token', normalize_first_noun_group: bool = False, normal_first_group_single: bool = False, ignore_geo_referent: bool = False) -> str: res = None if (BracketHelper.can_be_start_of_sequence(begin, False, False) and BracketHelper.can_be_end_of_sequence( end, False, begin, False)): begin = begin.next0_ end = end.previous if (normalize_first_noun_group and not begin.morph.class0_.is_preposition): npt = NounPhraseHelper.try_parse( begin, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None) if (npt is not None): if (npt.noun.get_morph_class_in_dictionary().is_undefined and len(npt.adjectives) == 0): npt = (None) if (npt is not None and npt.end_token.end_char > end.end_char): npt = (None) if (npt is not None): res = npt.get_normal_case_text( None, (MorphNumber.SINGULAR if normal_first_group_single else MorphNumber.UNDEFINED), MorphGender.UNDEFINED, False) te = npt.end_token.next0_ if (((te is not None and te.next0_ is not None and te.is_comma) and (isinstance(te.next0_, TextToken)) and te.next0_.end_char <= end.end_char) and te.next0_.morph.class0_.is_verb and te.next0_.morph.class0_.is_adjective): for it in te.next0_.morph.items: if (it.gender == npt.morph.gender or ((it.gender) & (npt.morph.gender)) != (MorphGender.UNDEFINED)): if (not ( (it.case_) & npt.morph.case_).is_undefined): if (it.number == npt.morph.number or ((it.number) & (npt.morph.number)) != (MorphNumber.UNDEFINED)): var = te.next0_.term if (isinstance(it, MorphWordForm)): var = it.normal_case bi = MorphBaseInfo._new492( MorphClass.ADJECTIVE, npt.morph.gender, npt.morph.number, npt.morph.language) var = MorphologyService.get_wordform( var, bi) if (var is not None): res = "{0}, {1}".format(res, var) te = te.next0_.next0_ break if (te is not None and te.end_char <= end.end_char): s = ProperNameHelper.get_name_ex(te, end, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, True, ignore_geo_referent) if (not Utils.isNullOrEmpty(s)): if (not str.isalnum(s[0])): res = "{0}{1}".format(res, s) else: res = "{0} {1}".format(res, s) elif ((isinstance(begin, TextToken)) and begin.chars.is_cyrillic_letter): mm = begin.get_morph_class_in_dictionary() if (not mm.is_undefined): res = begin.get_normal_case_text(mm, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) if (begin.end_char < end.end_char): res = "{0} {1}".format( res, ProperNameHelper.get_name_ex( begin.next0_, end, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, True, False)) if (res is None): res = ProperNameHelper.get_name_ex(begin, end, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, True, ignore_geo_referent) if (not Utils.isNullOrEmpty(res)): k = 0 i = len(res) - 1 while i >= 0: if (res[i] == '*' or Utils.isWhitespace(res[i])): pass else: break i -= 1 k += 1 if (k > 0): if (k == len(res)): return None res = res[0:0 + len(res) - k] return res
def _createReferentToken(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken': from pullenti.ner.person.internal.PersonIdentityToken import PersonIdentityToken if (p is None): return None has_prefix = False if (attrs is not None): for a in attrs: if (a.typ == PersonAttrTerminType.BESTREGARDS): has_prefix = True else: if (a.begin_char < begin.begin_char): begin = a.begin_token if (a.typ != PersonAttrTerminType.PREFIX): if (a.age is not None): p.addSlot(PersonReferent.ATTR_AGE, a.age, False, 0) if (a.prop_ref is None): p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0) else: p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) elif (a.gender == MorphGender.FEMINIE and not p.is_female): p.is_female = True elif (a.gender == MorphGender.MASCULINE and not p.is_male): p.is_male = True elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): if ((begin.previous).term == "ИП"): a = PersonAttrToken(begin.previous, begin.previous) a.prop_ref = PersonPropertyReferent() a.prop_ref.name = "индивидуальный предприниматель" p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) begin = begin.previous m0 = MorphCollection() for it in morph_.items: bi = MorphBaseInfo(it) bi.number = MorphNumber.SINGULAR if (bi.gender == MorphGender.UNDEFINED): if (p.is_male and not p.is_female): bi.gender = MorphGender.MASCULINE if (not p.is_male and p.is_female): bi.gender = MorphGender.FEMINIE m0.addItem(bi) morph_ = m0 if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): morph_.case_ = attrs[0].morph.case_ if (attrs[0].morph.number == MorphNumber.SINGULAR): morph_.number = MorphNumber.SINGULAR if (p.is_male and not p.is_female): morph_.gender = MorphGender.MASCULINE elif (p.is_female): morph_.gender = MorphGender.FEMINIE if (begin.previous is not None): ttt = begin.previous if (ttt.isValue("ИМЕНИ", "ІМЕНІ")): for_attribute = True else: if (ttt.isChar('.') and ttt.previous is not None): ttt = ttt.previous if (ttt.whitespaces_after_count < 3): if (ttt.isValue("ИМ", "ІМ")): for_attribute = True if (for_attribute): return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ) if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.getReferent(), PersonReferent))): rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken) ttt = rt00 while ttt is not None: if (ttt.previous is None or not ((isinstance(ttt.previous.previous, ReferentToken)))): break if (not ttt.previous.is_comma_and or not ((isinstance(ttt.previous.previous.getReferent(), PersonReferent)))): break rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken)) ttt = (rt00) if (isinstance(rt00.begin_token.getReferent(), PersonPropertyReferent)): ok = False if ((rt00.begin_token).end_token.next0_ is not None and (rt00.begin_token).end_token.next0_.isChar(':')): ok = True elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): ok = True if (ok): p.addSlot(PersonReferent.ATTR_ATTR, rt00.begin_token.getReferent(), False, 0) if (ad is not None): if (ad.overflow_level > 10): return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ) ad.overflow_level += 1 attrs1 = None has_position = False open_br = False t = end.next0_ first_pass3095 = True while True: if first_pass3095: first_pass3095 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_newline_before): if (t.newlines_before_count > 2): break if (attrs1 is not None and len(attrs1) > 0): break ml = MailLine.parse(t, 0) if (ml is not None and ml.typ == MailLine.Types.FROM): break if (t.chars.is_capital_upper): attr1 = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO) ok1 = False if (attr1 is not None): if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): ok1 = True else: tt2 = t.next0_ while tt2 is not None and tt2.end_char <= attr1.end_char: if (tt2.is_whitespace_before): ok1 = True tt2 = tt2.next0_ else: ttt = PersonHelper.__correctTailAttributes(p, t) if (ttt is not None and ttt != t): t = ttt end = t continue if (not ok1): break if (t.is_hiphen or t.isCharOf("_>|")): continue if (t.isValue("МОДЕЛЬ", None)): break tt = PersonHelper.__correctTailAttributes(p, t) if (tt != t and tt is not None): t = tt end = t continue is_be = False if (t.isChar('(') and t == end.next0_): open_br = True t = t.next0_ if (t is None): break pit1 = PersonItemToken.tryAttach(t, None, PersonItemToken.ParseAttr.NO, None) if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.isChar(')')): if (pit1.lastname is not None): inf = MorphBaseInfo._new2321(MorphCase.NOMINATIVE) if (p.is_male): inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender) if (p.is_female): inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender) sur = PersonIdentityToken.createLastname(pit1, inf) if (sur is not None): p._addFioIdentity(sur, None, None) t = pit1.end_token.next0_ end = t continue elif (t.is_comma): t = t.next0_ if ((isinstance(t, TextToken)) and (t).isValue("WHO", None)): continue elif ((isinstance(t, TextToken)) and (t).is_verb_be): t = t.next0_ elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): if (t == end.next0_): break t = t.next0_ elif (t.is_hiphen and t == end.next0_): t = t.next0_ elif (t.isChar('.') and t == end.next0_ and has_prefix): t = t.next0_ ttt2 = PersonHelper.createNickname(p, t) if (ttt2 is not None): end = ttt2 t = end continue if (t is None): break attr = None attr = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO) if (attr is None): if ((t is not None and t.getReferent() is not None and t.getReferent().type_name == "GEO") and attrs1 is not None and open_br): continue if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.isChar(')')): if (p.findSlot(PersonReferent.ATTR_LASTNAME, None, True) is None): p.addSlot(PersonReferent.ATTR_LASTNAME, t.getSourceText().upper(), False, 0) t = t.next0_ end = t if (t is not None and t.isValue("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): p.is_female = True p._correctData() elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): p.is_male = True p._correctData() break if (attr.morph.number == MorphNumber.PLURAL): break if (attr.typ == PersonAttrTerminType.BESTREGARDS): break if (attr.is_doubt): if (has_prefix): pass elif (t.is_newline_before and attr.is_newline_after): pass elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.isChar(':')))): pass else: break if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): break if (open_br): if (PersonAnalyzer._tryAttachPerson(t, ad, False, 0, True) is not None): break if (attrs1 is None): if (t.previous.is_comma and t.previous == end.next0_): ttt = attr.end_token.next0_ if (ttt is not None): if (ttt.morph.class0_.is_verb): if (MiscHelper.canBeStartOfSentence(begin)): pass else: break attrs1 = list() attrs1.append(attr) if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): if (not is_be): has_position = True elif (attr.typ != PersonAttrTerminType.PREFIX): if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): pass else: attrs1 = (None) break t = attr.end_token if (attrs1 is not None and has_position and attrs is not None): te1 = attrs[len(attrs) - 1].end_token.next0_ te2 = attrs1[0].begin_token if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): pass elif (attrs1[0].age is not None): pass elif (((te1.is_hiphen or te1.isChar(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): pass else: for a in attrs: if (a.typ == PersonAttrTerminType.POSITION): te = attrs1[len(attrs1) - 1].end_token if (te.next0_ is not None): if (not te.next0_.isChar('.')): attrs1 = (None) break if (attrs1 is not None and not has_prefix): attr = attrs1[len(attrs1) - 1] ok = False if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): ok = True else: rt = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False) if (rt is not None and (isinstance(rt.referent, PersonReferent))): ok = True if (ok): if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): attrs1 = (None) elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): rt1 = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False) if (rt1 is not None): attrs1 = (None) if (attrs1 is not None): for a in attrs1: if (a.typ != PersonAttrTerminType.PREFIX): if (a.age is not None): p.addSlot(PersonReferent.ATTR_AGE, a.age, True, 0) elif (a.prop_ref is None): p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0) else: p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) end = a.end_token if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): if (a.gender == MorphGender.MASCULINE and not p.is_male): p.is_male = True p._correctData() elif (a.gender == MorphGender.FEMINIE and not p.is_female): p.is_female = True p._correctData() if (open_br): if (end.next0_ is not None and end.next0_.isChar(')')): end = end.next0_ crlf_cou = 0 t = end.next0_ first_pass3096 = True while True: if first_pass3096: first_pass3096 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_newline_before): ml = MailLine.parse(t, 0) if (ml is not None and ml.typ == MailLine.Types.FROM): break crlf_cou += 1 if (t.isCharOf(":,(") or t.is_hiphen): continue if (t.isChar('.') and t == end.next0_): continue r = t.getReferent() if (r is not None): if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): ty = r.getStringValue("SCHEME") if (r.type_name == "URI"): if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): break p._addContact(r) end = t crlf_cou = 0 continue if (isinstance(r, PersonIdentityReferent)): p.addSlot(PersonReferent.ATTR_IDDOC, r, False, 0) end = t crlf_cou = 0 continue if (r is not None and r.type_name == "ORGANIZATION"): if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): break if (begin.previous is not None and begin.previous.morph.class0_.is_verb): break if (t.whitespaces_after_count == 1): break exist = False for s in p.slots: if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent) if (pr.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): exist = True break elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): pr = Utils.asObjectOrNull(s.value, PersonAttrToken) if (pr.referent.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): exist = True break if (not exist): pat = PersonAttrToken(t, t) pat.prop_ref = PersonPropertyReferent._new2291("сотрудник") pat.prop_ref.addSlot(PersonPropertyReferent.ATTR_REF, r, False, 0) p.addSlot(PersonReferent.ATTR_ATTR, pat, False, 0) continue if (r is not None): break if (not has_prefix or crlf_cou >= 2): break rt = t.kit.processReferent("PERSON", t) if (rt is not None): break if (ad is not None): ad.overflow_level -= 1 return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
def getNormalCaseText(self, mc: 'MorphClass' = None, single_number: bool = False, gender: 'MorphGender' = MorphGender.UNDEFINED, keep_chars: bool = False) -> str: from pullenti.ner.core.MiscHelper import MiscHelper empty = True if (mc is not None and mc.is_preposition): return LanguageHelper.normalizePreposition(self.term) for it in self.morph.items: if (mc is not None and not mc.is_undefined): cc = (it.class0_.value) & (mc.value) if (cc == 0): continue if (MorphClass.isMiscInt(cc) and not MorphClass.isProperInt(cc) and mc.value != it.class0_.value): continue wf = Utils.asObjectOrNull(it, MorphWordForm) normal_full = False if (gender != MorphGender.UNDEFINED): if ((((it.gender) & (gender))) == (MorphGender.UNDEFINED)): if ((gender == MorphGender.MASCULINE and ((it.gender != MorphGender.UNDEFINED or it.number == MorphNumber.PLURAL)) and wf is not None) and wf.normal_full is not None): normal_full = True elif (gender == MorphGender.MASCULINE and it.class0_.is_personal_pronoun): pass else: continue if (not it.case_.is_undefined): empty = False if (wf is not None): if (single_number and it.number == MorphNumber.PLURAL and wf.normal_full is not None): le = len(wf.normal_case) if ((le == (len(wf.normal_full) + 2) and le > 4 and wf.normal_case[le - 2] == 'С') and wf.normal_case[le - 1] == 'Я'): res = wf.normal_case else: res = (wf.normal_full if normal_full else wf.normal_full) else: res = (wf.normal_full if normal_full else (Utils.ifNotNull(wf.normal_case, self.term))) if (single_number and mc is not None and mc == MorphClass.NOUN): if (res == "ДЕТИ"): res = "РЕБЕНОК" if (keep_chars): if (self.chars.is_all_lower): res = res.lower() elif (self.chars.is_capital_upper): res = MiscHelper.convertFirstCharUpperAndOtherLower( res) return res if (not empty): return None te = None if (single_number and mc is not None): bi = MorphBaseInfo._new549(MorphClass(mc), gender, MorphNumber.SINGULAR, self.morph.language) vars0_ = Morphology.getWordform(self.term, bi) if (vars0_ is not None): te = vars0_ if (self.chars.is_cyrillic_letter and te is None and len(self.term) > 3): ch0 = self.term[len(self.term) - 1] ch1 = self.term[len(self.term) - 2] if (ch0 == 'М' and ((ch1 == 'О' or ch1 == 'А'))): te = self.term[0:0 + len(self.term) - 2] elif (not LanguageHelper.isCyrillicVowel(ch1) and LanguageHelper.isCyrillicVowel(ch0)): te = self.term[0:0 + len(self.term) - 1] if (te is None): te = self.term if (keep_chars): if (self.chars.is_all_lower): return te.lower() elif (self.chars.is_capital_upper): return MiscHelper.convertFirstCharUpperAndOtherLower(te) return te