def addAllAbridges(self, tail_len: int = 0, max_first_len: int = 0, min_first_len: int = 0) -> None: """ Добавить все сокращения (с первой буквы до любого согласного) """ if (len(self.terms) < 1): return txt = self.terms[0].canonical_text if (tail_len == 0): for i in range(len(txt) - 2, -1, -1): if (not LanguageHelper.isCyrillicVowel(txt[i])): if (min_first_len > 0 and (i < (min_first_len - 1))): break a = Termin.Abridge() a.addPart(txt[0:0 + i + 1], False) j = 1 while j < len(self.terms): a.addPart(self.terms[j].canonical_text, False) j += 1 if (self.abridges is None): self.abridges = list() self.abridges.append(a) else: tail = txt[len(txt) - tail_len:] txt = txt[0:0 + len(txt) - tail_len - 1] for i in range(len(txt) - 2, -1, -1): if (max_first_len > 0 and i >= max_first_len): pass elif (not LanguageHelper.isCyrillicVowel(txt[i])): self.addAbridge("{0}-{1}".format(txt[0:0 + i + 1], tail))
def can_has_ref(self, r: 'Referent') -> bool: # Проверка, что этот референт может выступать в качестве ATTR_REF nam = self.name if (nam is None or r is None): return False if (isinstance(r, GeoReferent)): g = Utils.asObjectOrNull(r, GeoReferent) if (LanguageHelper.ends_with_ex(nam, "президент", "губернатор", None, None)): return g.is_state or g.is_region if (nam == "мэр" or nam == "градоначальник"): return g.is_city if (nam == "глава"): return True return False if (r.type_name == "ORGANIZATION"): if ((LanguageHelper.ends_with(nam, "губернатор") or nam == "мэр" or nam == "градоначальник") or nam == "президент"): return False if ("министр" in nam): if (r.find_slot(None, "министерство", True) is None): return False if (nam.endswith("директор")): if ((r.find_slot(None, "суд", True)) is not None): return False return True return False
def __init__(self, v : 'MorphRuleVariant'=None, word : str=None) -> None: super().__init__(None) self.normal_full = None; self.normal_case = None; self.misc = None; self.undef_coef = 0 self.tag = None; if (v is None): return v.copy_to(self) self.misc = v.misc_info self.tag = (v) if (v.normal_tail is not None and word is not None): word_begin = word if (LanguageHelper.ends_with(word, v.tail)): word_begin = word[0:0+len(word) - len(v.tail)] if (len(v.normal_tail) > 0): self.normal_case = (word_begin + v.normal_tail) else: self.normal_case = word_begin if (v.full_normal_tail is not None and word is not None): word_begin = word if (LanguageHelper.ends_with(word, v.tail)): word_begin = word[0:0+len(word) - len(v.tail)] if (len(v.full_normal_tail) > 0): self.normal_full = (word_begin + v.full_normal_tail) else: self.normal_full = word_begin
def _DelSurnameEnd(s : str) -> str: if (len(s) < 3): return s if (LanguageHelper.endsWithEx(s, "А", "У", "Е", None)): return s[0:0+len(s) - 1] if (LanguageHelper.endsWith(s, "ОМ") or LanguageHelper.endsWith(s, "ЫМ")): return s[0:0+len(s) - 2] if (LanguageHelper.endsWithEx(s, "Я", "Ю", None, None)): ch1 = s[len(s) - 2] if (ch1 == 'Н' or ch1 == 'Л'): return s[0:0+len(s) - 1] + "Ь" return s
def __findForSurname(self, attr_name : str, surname : str, find_shortest : bool=False) -> str: rus = LanguageHelper.isCyrillicChar(surname[0]) res = None for a in self.slots: if (a.type_name == attr_name): v = str(a.value) if (LanguageHelper.isCyrillicChar(v[0]) != rus): continue if (res is None): res = v elif (find_shortest and (len(v) < len(res))): res = v return res
def __correctModel(self) -> None: tt = self.end_token.next0_ if (tt is None or tt.whitespaces_before_count > 2): return if (tt.isValue(":\\/.", None) or tt.is_hiphen): tt = tt.next0_ if (isinstance(tt, NumberToken)): tmp = io.StringIO() print((tt).value, end="", file=tmp) is_lat = LanguageHelper.isLatinChar(self.value[0]) self.end_token = tt tt = tt.next0_ first_pass3157 = True while True: if first_pass3157: first_pass3157 = False else: tt = tt.next0_ if (not (tt is not None)): break if ((isinstance(tt, TextToken)) and tt.length_char == 1 and tt.chars.is_letter): if (not tt.is_whitespace_before or ((tt.previous is not None and tt.previous.is_hiphen))): ch = (tt).term[0] self.end_token = tt ch2 = chr(0) if (LanguageHelper.isLatinChar(ch) and not is_lat): ch2 = LanguageHelper.getCyrForLat(ch) if (ch2 != (chr(0))): ch = ch2 elif (LanguageHelper.isCyrillicChar(ch) and is_lat): ch2 = LanguageHelper.getLatForCyr(ch) if (ch2 != (chr(0))): ch = ch2 print(ch, end="", file=tmp) continue break self.value = "{0}-{1}".format(self.value, Utils.toStringStringIO(tmp)) self.alt_value = MiscHelper.createCyrLatAlternative(self.value) if (not self.end_token.is_whitespace_after and self.end_token.next0_ is not None and ((self.end_token.next0_.is_hiphen or self.end_token.next0_.isCharOf("\\/")))): if (not self.end_token.next0_.is_whitespace_after and (isinstance(self.end_token.next0_.next0_, NumberToken))): self.end_token = self.end_token.next0_.next0_ self.value = "{0}-{1}".format(self.value, (self.end_token).value) if (self.alt_value is not None): self.alt_value = "{0}-{1}".format(self.alt_value, (self.end_token).value)
def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None: merge_statistic = True for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): nam = s.value if (LanguageHelper.isLatinChar(nam[0])): if (not lang.is_en): continue elif (lang.is_en): continue if (LanguageHelper.endsWith(nam, " ССР")): continue self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_TYPE): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.is_territory): if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True) if (s is not None): self.slots.remove(s) if (self.is_state): for s in self.slots: if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): self.slots.remove(s) break if (self.is_city): s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True))) if (s is not None): for ss in self.slots: if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): self.slots.remove(s) break has = False i = 0 while i < len(self.slots): if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): if (not has): has = True else: del self.slots[i] i -= 1 i += 1 self._mergeExtReferents(obj)
def __getName(self, cyr : bool) -> str: name = None for i in range(2): for s in self.slots: if (s.type_name == GeoReferent.ATTR_NAME): v = str(s.value) if (Utils.isNullOrEmpty(v)): continue if (i == 0): if (not LanguageHelper.isCyrillicChar(v[0])): if (cyr): continue elif (not cyr): continue if (name is None): name = v elif (len(name) > len(v)): if ((len(v) < 4) and (len(name) < 10)): pass elif (name[len(name) - 1] == 'В'): pass else: name = v elif ((len(name) < 4) and len(v) >= 4 and (len(v) < 10)): name = v if (name is not None): break if (name == "МОЛДОВА"): name = "МОЛДАВИЯ" elif (name == "БЕЛАРУСЬ"): name = "БЕЛОРУССИЯ" return Utils.ifNotNull(name, "?")
def try_parse(t: 'Token') -> 'PrepositionToken': """ Попытаться выделить предлог с указанного токена Args: t(Token): начальный токен Returns: PrepositionToken: результат или null """ if (not (isinstance(t, TextToken))): return None tok = PrepositionHelper.__m_ontology.try_parse(t, TerminParseAttr.NO) if (tok is not None): return PrepositionToken._new529(t, tok.end_token, tok.termin.canonic_text, tok.termin.tag) mc = t.get_morph_class_in_dictionary() if (not mc.is_preposition): return None res = PrepositionToken(t, t) res.normal = t.get_normal_case_text(MorphClass.PREPOSITION, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) res.next_case = LanguageHelper.get_case_after_preposition(res.normal) if ((t.next0_ is not None and t.next0_.is_hiphen and not t.is_whitespace_after) and (isinstance(t.next0_.next0_, TextToken)) and t.next0_.next0_.get_morph_class_in_dictionary().is_preposition ): res.end_token = t.next0_.next0_ return res
def tryParse(t : 'Token', typ : 'NounPhraseParseAttr'=NounPhraseParseAttr.NO, max_char_pos : int=0) -> 'NounPhraseToken': """ Попробовать создать именную группу с указанного токена Args: t(Token): начальный токен typ(NounPhraseParseAttr): параметры (можно битовую маску) max_char_pos(int): максимальная позиция в тексте, до которой выделять, если 0, то без ограничений Returns: NounPhraseToken: именная группа или null """ from pullenti.ner.core._NounPraseHelperInt import _NounPraseHelperInt res = _NounPraseHelperInt.tryParse(t, typ, max_char_pos) if (res is not None): return res if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) != (NounPhraseParseAttr.NO)): if ((isinstance(t, TextToken)) and t.morph.class0_.is_preposition and (t.whitespaces_after_count < 3)): res = _NounPraseHelperInt.tryParse(t.next0_, typ, max_char_pos) if (res is not None): mc = LanguageHelper.getCaseAfterPreposition((t).lemma) res.preposition = t res.begin_token = t if (not ((mc) & res.morph.case_).is_undefined): res.morph.removeItems(mc, False) elif (t.morph.class0_.is_adverb): return None return res return None
def toString(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: nam = None for l_ in range(2): for s in self.slots: if (((s.type_name == UnitReferent.ATTR_NAME and short_variant)) or ((s.type_name == UnitReferent.ATTR_FULLNAME and not short_variant))): val = Utils.asObjectOrNull(s.value, str) if (lang is not None and l_ == 0): if (lang.is_ru != LanguageHelper.isCyrillic(val)): continue nam = val break if (nam is not None): break if (nam is None): nam = self.getStringValue(UnitReferent.ATTR_NAME) pow0_ = self.getStringValue(UnitReferent.ATTR_POW) if (Utils.isNullOrEmpty(pow0_) or lev > 0): return Utils.ifNotNull(nam, "?") res = ("{0}{1}".format(nam, pow0_) if (pow0_[0] != '-') else "{0}<{1}>".format(nam, pow0_)) if (not short_variant and self.is_unknown): res = ("(?)" + res) return res
def __find_in_tree(self, key : str, lang : 'MorphLang') -> typing.List['Termin']: if (key is None): return None nod = self.__get_root(lang, ((lang is None or lang.is_undefined)) and LanguageHelper.is_latin(key)) i = 0 while i < len(key): ch = ord(key[i]) nn = None if (nod.children is not None): wrapnn582 = RefOutArgWrapper(None) Utils.tryGetValue(nod.children, ch, wrapnn582) nn = wrapnn582.value if (nn is None): if (ch == (32)): if (nod.termins is not None): pp = Utils.splitString(key, ' ', False) res = None for t in nod.termins: if (len(t.terms) == len(pp)): k = 0 k = 1 while k < len(pp): if (not pp[k] in t.terms[k].variants): break k += 1 if (k >= len(pp)): if (res is None): res = list() res.append(t) return res return None nod = nn i += 1 return nod.termins
def get_doc_types(name : str, name2 : str) -> typing.List[str]: res = list() if (name is None): return res if (name == "АРЕНДОДАТЕЛЬ"): res.append("ДОГОВОР АРЕНДЫ") res.append("ДОГОВОР СУБАРЕНДЫ") elif (name == "АРЕНДАТОР"): res.append("ДОГОВОР АРЕНДЫ") elif (name == "СУБАРЕНДАТОР"): res.append("ДОГОВОР СУБАРЕНДЫ") elif (name == "НАЙМОДАТЕЛЬ" or name == "НАНИМАТЕЛЬ"): res.append("ДОГОВОР НАЙМА") elif (name == "АГЕНТ" or name == "ПРИНЦИПАЛ"): res.append("АГЕНТСКИЙ ДОГОВОР") elif (name == "ПРОДАВЕЦ" or name == "ПОКУПАТЕЛЬ"): res.append("ДОГОВОР КУПЛИ-ПРОДАЖИ") elif (name == "ЗАКАЗЧИК" or name == "ИСПОЛНИТЕЛЬ" or LanguageHelper.ends_with(name, "ПОДРЯДЧИК")): res.append("ДОГОВОР УСЛУГ") elif (name == "ПОСТАВЩИК"): res.append("ДОГОВОР ПОСТАВКИ") elif (name == "ЛИЦЕНЗИАР" or name == "ЛИЦЕНЗИАТ"): res.append("ЛИЦЕНЗИОННЫЙ ДОГОВОР") elif (name == "СТРАХОВЩИК" or name == "СТРАХОВАТЕЛЬ"): res.append("ДОГОВОР СТРАХОВАНИЯ") if (name2 is None): return res tmp = ParticipantToken.get_doc_types(name2, None) for i in range(len(res) - 1, -1, -1): if (not res[i] in tmp): del res[i] return res
def __try_attach_moscowao(li: typing.List['TerrItemToken'], ad: 'AnalyzerData') -> 'ReferentToken': if (li[0].termin_item is None or not li[0].termin_item.is_moscow_region): return None if (li[0].is_doubt): ok = False if (CityAttachHelper.check_city_after(li[0].end_token.next0_)): ok = True else: ali = AddressItemToken.try_parse_list(li[0].end_token.next0_, None, 2) if (ali is not None and len(ali) > 0 and ali[0].typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): return None reg = GeoReferent() typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ" reg._add_typ(typ) name = li[0].termin_item.canonic_text if (LanguageHelper.ends_with(name, typ)): name = name[0:0 + len(name) - len(typ) - 1].strip() reg._add_name(name) return ReferentToken(reg, li[0].begin_token, li[0].end_token)
def to_string(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: res = io.StringIO() str0_ = None for s in self.slots: if (s.type_name == WeaponReferent.ATTR_TYPE): n = s.value if (str0_ is None or (len(n) < len(str0_))): str0_ = n if (str0_ is not None): print(str0_.lower(), end="", file=res) str0_ = self.get_string_value(WeaponReferent.ATTR_BRAND) if ((str0_) is not None): print(" {0}".format( MiscHelper.convert_first_char_upper_and_other_lower(str0_)), end="", file=res, flush=True) str0_ = self.get_string_value(WeaponReferent.ATTR_MODEL) if ((str0_) is not None): print(" {0}".format(str0_), end="", file=res, flush=True) str0_ = self.get_string_value(WeaponReferent.ATTR_NAME) if ((str0_) is not None): print(" \"{0}\"".format( MiscHelper.convert_first_char_upper_and_other_lower(str0_)), end="", file=res, flush=True) for s in self.slots: if (s.type_name == WeaponReferent.ATTR_NAME and str0_ != (s.value)): if (LanguageHelper.is_cyrillic_char(str0_[0]) != LanguageHelper.is_cyrillic_char(s.value[0])): print(" ({0})".format( MiscHelper. convert_first_char_upper_and_other_lower(s.value)), end="", file=res, flush=True) break str0_ = self.get_string_value(WeaponReferent.ATTR_NUMBER) if ((str0_) is not None): print(", номер {0}".format(str0_), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def mergeSlots(self, obj: 'Referent', merge_statistic: bool = True) -> None: ph = Utils.asObjectOrNull(obj, PhoneReferent) if (ph is None): return if (ph.country_code is not None and self.country_code is None): self.country_code = ph.country_code if (ph.number is not None and LanguageHelper.endsWith(ph.number, self.number)): self.number = ph.number
def find(self, key : str) -> 'Termin': if (Utils.isNullOrEmpty(key)): return None li = [ ] if (LanguageHelper.is_latin_char(key[0])): li = self.__find_in_tree(key, MorphLang.EN) else: li = self.__find_in_tree(key, MorphLang.RU) if (li is None): li = self.__find_in_tree(key, MorphLang.UA) return (li[0] if li is not None and len(li) > 0 else None)
def get_lemma(self) -> str: """ Лемма (вариант морфологической нормализации) """ if (self.__m_lemma is not None): return self.__m_lemma res = None if (self.word_forms is not None and len(self.word_forms) > 0): if (len(self.word_forms) == 1): res = (Utils.ifNotNull(self.word_forms[0].normal_full, self.word_forms[0].normal_case)) if (res is None and not self.char_info.is_all_lower): for m in self.word_forms: if (m.class0_.is_proper_surname): s = Utils.ifNotNull(m.normal_full, Utils.ifNotNull(m.normal_case, "")) if (LanguageHelper.ends_with_ex( s, "ОВ", "ЕВ", None, None)): res = s break elif (m.class0_.is_proper_name and m.is_in_dictionary): return m.normal_case if (res is None): best = None for m in self.word_forms: if (best is None): best = m elif (self.__compare_forms(best, m) > 0): best = m res = (Utils.ifNotNull(best.normal_full, best.normal_case)) if (res is not None): if (LanguageHelper.ends_with_ex(res, "АНЫЙ", "ЕНЫЙ", None, None)): res = (res[0:0 + len(res) - 3] + "ННЫЙ") elif (LanguageHelper.ends_with(res, "ЙСЯ")): res = res[0:0 + len(res) - 2] elif (LanguageHelper.ends_with(res, "АНИЙ") and res == self.term): for wf in self.word_forms: if (wf.is_in_dictionary): return res return res[0:0 + len(res) - 1] + "Е" return res return Utils.ifNotNull(self.term, "?")
def remove_items_by_preposition(self, prep: 'Token') -> None: """ Убрать элементы, не соответствующие по падежу предлогу Args: prep(Token): """ from pullenti.ner.TextToken import TextToken if (not (isinstance(prep, TextToken))): return mc = LanguageHelper.get_case_after_preposition(prep.lemma) if (((mc) & self.case_).is_undefined): return self.remove_items(mc, False)
def correctWordByMorph(self, word: str, lang: 'MorphLang') -> str: if (LanguageHelper.isCyrillicChar(word[0])): if (lang is not None): if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru): return InnerMorphology.M_ENGINE_RU.correctWordByMorph(word) if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua): return InnerMorphology.M_ENGINE_UA.correctWordByMorph(word) if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by): return InnerMorphology.M_ENGINE_BY.correctWordByMorph(word) if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz): return InnerMorphology.M_ENGINE_KZ.correctWordByMorph(word) return InnerMorphology.M_ENGINE_RU.correctWordByMorph(word) else: return InnerMorphology.M_ENGINE_EN.correctWordByMorph(word)
def add(self, tail: str, var: 'MorphRuleVariant') -> None: tail = LanguageHelper.correctWord(tail) if (var.class0_.is_undefined): pass li = [] wrapli34 = RefOutArgWrapper(None) inoutres35 = Utils.tryGetValue(self.variants, tail, wrapli34) li = wrapli34.value if (not inoutres35): li = list() self.variants[tail] = li var.tail = tail li.append(var) var.rule = self
def correct_word_by_morph(self, word: str, lang: 'MorphLang') -> str: if (LanguageHelper.is_cyrillic_char(word[0])): if (lang is not None): if (self.__m_engine_ru.language.is_ru and lang.is_ru): return self.__m_engine_ru.correct_word_by_morph(word) if (self.__m_engine_ua.language.is_ua and lang.is_ua): return self.__m_engine_ua.correct_word_by_morph(word) if (self.__m_engine_by.language.is_by and lang.is_by): return self.__m_engine_by.correct_word_by_morph(word) if (self.__m_engine_kz.language.is_kz and lang.is_kz): return self.__m_engine_kz.correct_word_by_morph(word) return self.__m_engine_ru.correct_word_by_morph(word) else: return self.__m_engine_en.correct_word_by_morph(word)
def get_all_wordforms(self, word: str, lang: 'MorphLang') -> typing.List['MorphWordForm']: if (LanguageHelper.is_cyrillic_char(word[0])): if (lang is not None): if (self.__m_engine_ru.language.is_ru and lang.is_ru): return self.__m_engine_ru.get_all_wordforms(word) if (self.__m_engine_ua.language.is_ua and lang.is_ua): return self.__m_engine_ua.get_all_wordforms(word) if (self.__m_engine_by.language.is_by and lang.is_by): return self.__m_engine_by.get_all_wordforms(word) if (self.__m_engine_kz.language.is_kz and lang.is_kz): return self.__m_engine_kz.get_all_wordforms(word) return self.__m_engine_ru.get_all_wordforms(word) else: return self.__m_engine_en.get_all_wordforms(word)
def getAllWordforms(self, word: str, lang: 'MorphLang') -> typing.List['MorphWordForm']: if (LanguageHelper.isCyrillicChar(word[0])): if (lang is not None): if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru): return InnerMorphology.M_ENGINE_RU.getAllWordforms(word) if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua): return InnerMorphology.M_ENGINE_UA.getAllWordforms(word) if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by): return InnerMorphology.M_ENGINE_BY.getAllWordforms(word) if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz): return InnerMorphology.M_ENGINE_KZ.getAllWordforms(word) return InnerMorphology.M_ENGINE_RU.getAllWordforms(word) else: return InnerMorphology.M_ENGINE_EN.getAllWordforms(word)
def canBeGeneralFor(self, obj: 'Referent') -> bool: if (not self.__canBeEqual(obj, Referent.EqualType.WITHINONETEXT, True)): return False ph = Utils.asObjectOrNull(obj, PhoneReferent) if (self.country_code is not None and ph.country_code is None): return False if (self.add_number is None): if (ph.add_number is not None): return True elif (ph.add_number is None): return False if (LanguageHelper.endsWith(ph.number, self.number)): return True return False
def __canBeEqual(self, obj: 'Referent', typ: 'EqualType', ignore_add_number: bool) -> bool: ph = Utils.asObjectOrNull(obj, PhoneReferent) if (ph is None): return False if (ph.country_code is not None and self.country_code is not None): if (ph.country_code != self.country_code): return False if (ignore_add_number): if (self.add_number is not None and ph.add_number is not None): if (ph.add_number != self.add_number): return False elif (self.add_number is not None or ph.add_number is not None): if (self.add_number != ph.add_number): return False if (self.number is None or ph.number is None): return False if (self.number == ph.number): return True if (typ != Referent.EqualType.DIFFERENTTEXTS): if (LanguageHelper.endsWith(self.number, ph.number) or LanguageHelper.endsWith(ph.number, self.number)): return True return False
def set_shortest_canonical_text( self, ignore_termins_with_notnull_tags: bool = False) -> None: self.__m_canonic_text = (None) for t in self.termins: if (ignore_termins_with_notnull_tags and t.tag is not None): continue if (len(t.terms) == 0): continue s = t.canonic_text if (not LanguageHelper.is_cyrillic_char(s[0])): continue if (self.__m_canonic_text is None): self.__m_canonic_text = s elif (len(s) < len(self.__m_canonic_text)): self.__m_canonic_text = s
def __FindInTree(self, key: str, lang: 'MorphLang') -> typing.List['Termin']: if (key is None): return None nod = self.__getRoot(lang, ((lang is None or lang.is_undefined)) and LanguageHelper.isLatin(key)) i = 0 while i < len(key): ch = ord(key[i]) if (nod.children is None): return None wrapnn616 = RefOutArgWrapper(None) inoutres617 = Utils.tryGetValue(nod.children, ch, wrapnn616) nn = wrapnn616.value if (not inoutres617): return None nod = nn i += 1 return nod.termins
def __remove_from_tree(self, key : str, t : 'Termin') -> None: if (key is None): return nod = self.__get_root(t.lang, t.lang.is_undefined and LanguageHelper.is_latin(key)) i = 0 while i < len(key): ch = ord(key[i]) if (nod.children is None): return nn = None wrapnn580 = RefOutArgWrapper(None) inoutres581 = Utils.tryGetValue(nod.children, ch, wrapnn580) nn = wrapnn580.value if (not inoutres581): return nod = nn i += 1 if (nod.termins is None): return if (t in nod.termins): nod.termins.remove(t)
def create_question(li : 'NGItem') -> str: res = (Utils.ifNotNull(li.source.prep, "")).lower() if (len(res) > 0): res += " " cas = li.source.source.morph.case_ if (not Utils.isNullOrEmpty(li.source.prep)): cas1 = LanguageHelper.get_case_after_preposition(li.source.prep) if (not cas1.is_undefined): if (not ((cas1) & cas).is_undefined): cas = ((cas) & cas1) if (cas.is_genitive): res += "чего" elif (cas.is_instrumental): res += "чем" elif (cas.is_dative): res += "чему" elif (cas.is_accusative): res += "что" elif (cas.is_prepositional): res += "чём" return res