def __str__(self) -> str: return "{0}: {1} {2}{3}".format( Utils.enumToString(self.typ), Utils.ifNotNull(self.value, (("" if self.ref is None else str(self.ref)))), Utils.ifNotNull(self.alt_value, ""), ("[int]" if self.is_internal else ""))
def to_string(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: misc = self.get_string_value(DefinitionReferent.ATTR_TERMIN_ADD) if (misc is None): misc = self.get_string_value(DefinitionReferent.ATTR_MISC) return "[{0}] {1}{2} = {3}".format( Utils.enumToString(self.kind), Utils.ifNotNull(self.termin, "?"), ("" if misc is None else " ({0})".format(misc)), Utils.ifNotNull(self.value, "?"))
def __str__(self) -> str: if (self.internal_noun is None): return "{0} {1}".format( Utils.ifNotNull( self.getNormalCaseText(None, False, MorphGender.UNDEFINED, False), "?"), str(self.morph)) else: return "{0} {1} / {2}".format( Utils.ifNotNull( self.getNormalCaseText(None, False, MorphGender.UNDEFINED, False), "?"), str(self.morph), str(self.internal_noun))
def get_normal_case_text_without_adjective(self, adj_index : int) -> str: res = io.StringIO() i = 0 while i < len(self.adjectives): if (i != adj_index): s = self.adjectives[i].get_normal_case_text((MorphClass.ADJECTIVE) | MorphClass.PRONOUN, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) print("{0} ".format(Utils.ifNotNull(s, "?")), end="", file=res, flush=True) i += 1 r = self.noun.get_normal_case_text((MorphClass.NOUN) | MorphClass.PRONOUN, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) if (r is None): r = self.noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) print(Utils.ifNotNull(r, str(self.noun)), end="", file=res) return Utils.toStringStringIO(res)
def deserialize_derivate_group(str0_: 'ByteArrayWrapper', dg: 'DerivateGroup', pos: int) -> None: attr = str0_.deserialize_short(pos) if (((attr & 1)) != 0): dg.is_dummy = True if (((attr & 2)) != 0): dg.not_generate = True if (((attr & 4)) != 0): dg.m_transitive = 0 if (((attr & 8)) != 0): dg.m_transitive = 1 if (((attr & 0x10)) != 0): dg.m_rev_agent_case = 0 if (((attr & 0x20)) != 0): dg.m_rev_agent_case = 1 if (((attr & 0x40)) != 0): dg.m_rev_agent_case = 2 dg.questions = (Utils.valToEnum(str0_.deserialize_short(pos), NextModelQuestion)) dg.questions_ref = (Utils.valToEnum(str0_.deserialize_short(pos), NextModelQuestion)) dg.prefix = str0_.deserialize_string(pos) cou = str0_.deserialize_short(pos) while cou > 0: w = DerivateWord(dg) w.spelling = str0_.deserialize_string(pos) w.class0_ = MorphClass() w.class0_.value = (str0_.deserialize_short(pos)) w.lang = MorphLang._new10(str0_.deserialize_short(pos)) w.attrs.value = (str0_.deserialize_short(pos)) dg.words.append(w) cou -= 1 cou = str0_.deserialize_short(pos) while cou > 0: pref = Utils.ifNotNull(str0_.deserialize_string(pos), "") cas = MorphCase() cas.value = (str0_.deserialize_short(pos)) if (dg.nexts is None): dg.nexts = dict() dg.nexts[pref] = cas cou -= 1 cou = str0_.deserialize_short(pos) while cou > 0: pref = Utils.ifNotNull(str0_.deserialize_string(pos), "") cas = MorphCase() cas.value = (str0_.deserialize_short(pos)) if (dg.nexts_ref is None): dg.nexts_ref = dict() dg.nexts_ref[pref] = cas cou -= 1
def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None: merge_statistic = True for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): nam = s.value if (LanguageHelper.isLatinChar(nam[0])): if (not lang.is_en): continue elif (lang.is_en): continue if (LanguageHelper.endsWith(nam, " ССР")): continue self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_TYPE): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.is_territory): if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True) if (s is not None): self.slots.remove(s) if (self.is_state): for s in self.slots: if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): self.slots.remove(s) break if (self.is_city): s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True))) if (s is not None): for ss in self.slots: if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): self.slots.remove(s) break has = False i = 0 while i < len(self.slots): if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): if (not has): has = True else: del self.slots[i] i -= 1 i += 1 self._mergeExtReferents(obj)
def __calc_actant(self) -> float: if (self.can_be_participle): self.coef = -1 return self.coef vf2 = self.to_verb.last_verb.verb_morph if (vf2 is None): return -1 if (self.from_prep is None): self.coef = 0 return self.coef fm = self.from0_.source.source.morph grs = DerivateService.find_derivates( Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm.nexts is None or not self.from_prep in gr.cm.nexts): continue cas = gr.cm.nexts[self.from_prep] if (not ((cas) & fm.case_).is_undefined): self.coef = SemanticService.PARAMS.next_model if (Utils.isNullOrEmpty(self.from_prep)): if (fm.case_.is_nominative): self.coef /= (2) self.coef /= (2) return self.coef if (self.from0_.source.source.morph.case_.is_undefined): self.coef = 0 return self.coef self.coef = 0.1 return self.coef
def __serialize_morph_tree_node(res: io.IOBase, tn: 'MorphTreeNode') -> None: if (tn.rules is not None): for r in tn.rules: MorphSerializeHelper.__serialize_short(res, r._id0_) MorphSerializeHelper.__serialize_short(res, 0) if (tn.reverce_variants is not None): for v in tn.reverce_variants: MorphSerializeHelper.__serialize_string( res, Utils.ifNotNull(v.tail, "")) if (v.rule is not None): pass MorphSerializeHelper.__serialize_short( res, (0 if v.rule is None else v.rule._id0_)) MorphSerializeHelper.__serialize_short(res, v.coef) MorphSerializeHelper.__serialize_morph_rule_variant(res, v) MorphSerializeHelper.__serialize_string(res, None) if (tn.nodes is not None): for n in tn.nodes.items(): MorphSerializeHelper.__serialize_short(res, n[0]) p0 = res.tell() MorphSerializeHelper.__serialize_int(res, 0) MorphSerializeHelper.__serialize_morph_tree_node(res, n[1]) p1 = res.tell() res.seek(p0, io.SEEK_SET) MorphSerializeHelper.__serialize_int(res, p1) res.seek(p1, io.SEEK_SET) MorphSerializeHelper.__serialize_short(res, 0xFFFF)
def try_attach_by_referent(self, referent : 'Referent', item : 'IntOntologyItem'=None, must_be_single : bool=False) -> typing.List['Referent']: if (referent is None): return None if (item is None): item = referent.create_ontology_item() if (item is None): return None li = self.try_attach_by_item(item) if (li is None): return None res = None for oi in li: r = Utils.ifNotNull(oi.referent, Utils.asObjectOrNull(oi.tag, Referent)) if (r is not None): if (referent.can_be_equals(r, ReferentsEqualType.WITHINONETEXT)): if (res is None): res = list() if (not r in res): res.append(r) if (must_be_single): if (res is not None and len(res) > 1): i = 0 while i < (len(res) - 1): j = i + 1 while j < len(res): if (not res[i].can_be_equals(res[j], ReferentsEqualType.FORMERGING)): return None j += 1 i += 1 return res
def find_derivates(t : 'Token') -> typing.List['DerivateGroup']: res = None cla = None if (isinstance(t, NounPhraseToken)): t = t.noun.end_token cla = MorphClass.NOUN if (isinstance(t, TextToken)): for f in t.morph.items: if (isinstance(f, MorphWordForm)): if (cla is not None): if (((cla) & f.class0_).is_undefined): continue res = DerivateService.find_derivates(Utils.ifNotNull(f.normal_full, f.normal_case), True, None) if (res is not None and len(res) > 0): return res return None if (isinstance(t, VerbPhraseToken)): return SemanticHelper.find_derivates(t.last_verb) if (isinstance(t, VerbPhraseItemToken)): vpt = Utils.asObjectOrNull(t, VerbPhraseItemToken) if (vpt.verb_morph is not None): res = DerivateService.find_derivates(vpt.verb_morph.normal_case, True, t.morph.language) if (res is None or (len(res) == 0 and vpt.verb_morph.normal_full is not None and vpt.verb_morph.normal_case != vpt.verb_morph.normal_full)): res = DerivateService.find_derivates(vpt.verb_morph.normal_full, True, t.morph.language) return res if (isinstance(t, NumberToken)): if (t.value == "1"): return DerivateService.find_derivates("ОДИН", True, MorphLang.RU) if (isinstance(t, MetaToken)): return SemanticHelper.find_derivates(t.end_token) return None
def __str__(self) -> str: res = io.StringIO() print("{0}: {1}".format(Utils.enumToString(self.kind), Utils.ifNotNull(self.typ, "?")), end="", file=res, flush=True) if (self.parts is not None): for p in self.parts: print("; {0}".format(p.to_string(True, None, 0)), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def get_wordform(word: str, morph_info: 'MorphBaseInfo') -> str: """ Получить вариант написания словоформы Args: word(str): слово morph_info(MorphBaseInfo): морфологическая информация Returns: str: вариант написания """ if (morph_info is None or Utils.isNullOrEmpty(word)): return word cla = morph_info.class0_ if (cla.is_undefined): mi0 = Morphology.get_word_base_info(word, None, False, False) if (mi0 is not None): cla = mi0.class0_ for ch in word: if (str.islower(ch)): word = word.upper() break return Utils.ifNotNull( Morphology.__m_inner.get_wordform( word, cla, morph_info.gender, morph_info.case_, morph_info.number, morph_info.language, Utils.asObjectOrNull(morph_info, MorphWordForm)), word)
def create_specific_processor(spec_analyzer_names: str) -> 'Processor': """ Создать процессор с набором стандартных и указанных параметром специфических анализаторов. Args: spec_analyzer_names(str): можно несколько, разделённые запятой или точкой с запятой. Если список пустой, то эквивалентно CreateProcessor() Returns: Processor: Экземпляр процессора """ from pullenti.ner.Processor import Processor if (not ProcessorService.__m_inited): return None proc = Processor() names = list( Utils.splitString((Utils.ifNotNull(spec_analyzer_names, "")), ',' + ';' + ' ', False)) for t in ProcessorService.__m_analizer_instances: a = t.clone() if (a is not None): if (not a.is_specific or a.name in names): proc.add_analyzer(a) return proc
def to_string(self, short_variant: bool, lang: 'MorphLang', lev: int = 0) -> str: res = io.StringIO() print(MiscHelper.convert_first_char_upper_and_other_lower( Utils.ifNotNull(self.typ, "?")), end="", file=res) org0_ = Utils.asObjectOrNull( self.get_slot_value(InstrumentParticipantReferent.ATTR_REF), Referent) del0_ = Utils.asObjectOrNull( self.get_slot_value(InstrumentParticipantReferent.ATTR_DELEGATE), Referent) if (org0_ is not None): print(": {0}".format(org0_.to_string(short_variant, lang, 0)), end="", file=res, flush=True) if (not short_variant and del0_ is not None): print(" (в лице {0})".format( del0_.to_string(True, lang, lev + 1)), end="", file=res, flush=True) elif (del0_ is not None): print(": в лице {0}".format( del0_.to_string(short_variant, lang, lev + 1)), end="", file=res, flush=True) return Utils.toStringStringIO(res)
def __parse_subsent(npt : 'NounPhraseToken', t1 : 'Token', lev : int, prev : typing.List['SentItem']) -> typing.List['SentItem']: ok = False if (prev is not None): for i in range(len(prev) - 1, -1, -1): it = prev[i] if (it.typ == SentItemType.CONJ or it.typ == SentItemType.DELIM): ok = True break if (it.typ == SentItemType.VERB): break if (not ok): return None sents = Utils.ifNotNull(Sentence.parse_variants(npt.end_token.next0_, t1, lev + 1, 20, SentItemType.SUBSENT), list()) endpos = list() res = list() for s in sents: s.items.insert(0, SentItem(npt)) s.calc_coef(True) s.trunc_oborot(False) end = s.items[len(s.items) - 1].end_token.end_char if (end in endpos): continue endpos.append(end) s.calc_coef(False) part = SentItem(npt) part.typ = SentItemType.SUBSENT part.sub_typ = SentItemSubtype.WICH part.sub_sent = s part.result = s.items[0].result part.end_token = s.items[len(s.items) - 1].end_token res.append(part) return res
def deserializeDerivateGroup(str0_ : 'ByteArrayWrapper', dg : 'DerivateGroup') -> None: attr = str0_.deserializeShort() if (((attr & 1)) != 0): dg.is_dummy = True if (((attr & 2)) != 0): dg.not_generate = True if (((attr & 4)) != 0): dg.m_transitive = 0 if (((attr & 8)) != 0): dg.m_transitive = 1 dg.prefix = str0_.deserializeString() cou = str0_.deserializeShort() while cou > 0: w = DerivateWord(dg) w.spelling = str0_.deserializeString() w.class0_ = MorphClass() w.class0_.value = (str0_.deserializeShort()) w.lang = MorphLang._new5(str0_.deserializeShort()) w.attrs.value = (str0_.deserializeShort()) dg.words.append(w) cou -= 1 cou = str0_.deserializeShort() while cou > 0: pref = Utils.ifNotNull(str0_.deserializeString(), "") cas = MorphCase() cas.value = (str0_.deserializeShort()) if (dg.nexts is None): dg.nexts = dict() dg.nexts[pref] = cas cou -= 1
def __getName(self, cyr : bool) -> str: name = None for i in range(2): for s in self.slots: if (s.type_name == GeoReferent.ATTR_NAME): v = str(s.value) if (Utils.isNullOrEmpty(v)): continue if (i == 0): if (not LanguageHelper.isCyrillicChar(v[0])): if (cyr): continue elif (not cyr): continue if (name is None): name = v elif (len(name) > len(v)): if ((len(v) < 4) and (len(name) < 10)): pass elif (name[len(name) - 1] == 'В'): pass else: name = v elif ((len(name) < 4) and len(v) >= 4 and (len(v) < 10)): name = v if (name is not None): break if (name == "МОЛДОВА"): name = "МОЛДАВИЯ" elif (name == "БЕЛАРУСЬ"): name = "БЕЛОРУССИЯ" return Utils.ifNotNull(name, "?")
def try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken': res = WeaponItemToken.__try_parse(t, prev, after_conj, attach_high) if (res is None): npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.noun.begin_char > npt.begin_char): res = WeaponItemToken.__try_parse(npt.noun.begin_token, prev, after_conj, attach_high) if (res is not None): if (res.typ == WeaponItemToken.Typs.NOUN): str0_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if (str0_ == "РУЧНОЙ ГРАНАТ"): str0_ = "РУЧНАЯ ГРАНАТА" if ((Utils.ifNotNull(str0_, "")).endswith(res.value)): if (res.alt_value is None): res.alt_value = str0_ else: str0_ = str0_[0:0+len(str0_) - len(res.value)].strip() res.alt_value = "{0} {1}".format(str0_, res.alt_value) res.begin_token = t return res return None if (res.typ == WeaponItemToken.Typs.NAME): br = BracketHelper.try_parse(res.end_token.next0_, BracketParseAttr.NO, 100) if (br is not None and br.is_char('(')): alt = MiscHelper.get_text_value_of_meta_token(br, GetTextAttr.NO) if (MiscHelper.can_be_equal_cyr_and_latss(res.value, alt)): res.alt_value = alt res.end_token = br.end_token return res
def __ToString(self, short_variant : bool, lang : 'MorphLang', out_cladr : bool, lev : int) -> str: if (self.is_union and not self.is_state): res = io.StringIO() print(self.getStringValue(GeoReferent.ATTR_TYPE), end="", file=res) for s in self.slots: if (s.type_name == GeoReferent.ATTR_REF and (isinstance(s.value, Referent))): print("; {0}".format((s.value).toString(True, lang, 0)), end="", file=res, flush=True) return Utils.toStringStringIO(res) name = MiscHelper.convertFirstCharUpperAndOtherLower(self.__getName(lang is not None and lang.is_en)) if (not short_variant): if (not self.is_state): if (self.is_city and self.is_region): pass else: typ = self.getStringValue(GeoReferent.ATTR_TYPE) if (typ is not None): if (not self.is_city): i = typ.rfind(' ') if (i > 0): typ = typ[i + 1:] name = "{0} {1}".format(typ, name) if (not short_variant and out_cladr): kladr = self.getSlotValue(GeoReferent.ATTR_FIAS) if (isinstance(kladr, Referent)): name = "{0} (ФИАС: {1})".format(name, Utils.ifNotNull((kladr).getStringValue("GUID"), "?")) bti = self.getStringValue(GeoReferent.ATTR_BTI) if (bti is not None): name = "{0} (БТИ {1})".format(name, bti) if (not short_variant and self.higher is not None and (lev < 10)): if (((self.higher.is_city and self.is_region)) or ((self.findSlot(GeoReferent.ATTR_TYPE, "город", True) is None and self.findSlot(GeoReferent.ATTR_TYPE, "місто", True) is None and self.is_city))): return "{0}; {1}".format(name, self.higher.__ToString(False, lang, False, lev + 1)) return name
def convert_adverb_to_adjective(adverb: str, bi: 'MorphBaseInfo') -> str: """ Преобразовать наречие в прилагательное (это пока только для русского языка) Args: adverb(str): наречие bi(MorphBaseInfo): род число падеж Returns: str: прилагательное """ if (adverb is None or (len(adverb) < 4)): return None last = adverb[len(adverb) - 1] if (last != 'О' and last != 'Е'): return adverb var1 = adverb[0:0 + len(adverb) - 1] + "ИЙ" var2 = adverb[0:0 + len(adverb) - 1] + "ЫЙ" bi1 = Morphology.get_word_base_info(var1, None, False, False) bi2 = Morphology.get_word_base_info(var2, None, False, False) var = var1 if (not bi1.class0_.is_adjective and bi2.class0_.is_adjective): var = var2 if (bi is None): return var return Utils.ifNotNull( Morphology.__m_inner.get_wordform(var, MorphClass.ADJECTIVE, bi.gender, bi.case_, bi.number, MorphLang.UNKNOWN, None), var)
def toString(self, short_variant: bool, lang: 'MorphLang' = None, lev: int = 0) -> str: nam = None for l_ in range(2): for s in self.slots: if (((s.type_name == UnitReferent.ATTR_NAME and short_variant)) or ((s.type_name == UnitReferent.ATTR_FULLNAME and not short_variant))): val = Utils.asObjectOrNull(s.value, str) if (lang is not None and l_ == 0): if (lang.is_ru != LanguageHelper.isCyrillic(val)): continue nam = val break if (nam is not None): break if (nam is None): nam = self.getStringValue(UnitReferent.ATTR_NAME) pow0_ = self.getStringValue(UnitReferent.ATTR_POW) if (Utils.isNullOrEmpty(pow0_) or lev > 0): return Utils.ifNotNull(nam, "?") res = ("{0}{1}".format(nam, pow0_) if (pow0_[0] != '-') else "{0}<{1}>".format(nam, pow0_)) if (not short_variant and self.is_unknown): res = ("(?)" + res) return res
def _set_morph(obj : 'SemObject', wf : 'MorphWordForm') -> None: if (wf is None): return obj.morph.normal_case = wf.normal_case obj.morph.normal_full = (Utils.ifNotNull(wf.normal_full, wf.normal_case)) obj.morph.number = wf.number obj.morph.gender = wf.gender obj.morph.misc = wf.misc
def get_keyword(mt : 'MetaToken') -> str: vpt = Utils.asObjectOrNull(mt, VerbPhraseToken) if (vpt is not None): return Utils.ifNotNull(vpt.last_verb.verb_morph.normal_full, vpt.last_verb.verb_morph.normal_case) npt = Utils.asObjectOrNull(mt, NounPhraseToken) if (npt is not None): return npt.noun.end_token.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) return None
def __str__(self) -> str: res = Utils.ifNotNull(self.unknown_name, ((str(self.unit) if self.ext_onto is None else str(self.ext_onto)))) if (self.pow0_ != 1): res = "{0}<{1}>".format(res, self.pow0_) if (self.is_doubt): res += "?" if (self.keyword is not None): res = "{0} (<-{1})".format(res, self.keyword.getNormalCaseText(None, False, MorphGender.UNDEFINED, False)) return res
def __init__(self, source: 'MorphToken', kit_: 'AnalysisKit', bchar: int = -1, echar: int = -1) -> None: super().__init__(kit_, (bchar if bchar >= 0 else (0 if source is None else source.begin_char)), (echar if echar >= 0 else (0 if source is None else source.end_char))) self.term = None self.lemma = None self.term0 = None self.invariant_prefix_length_of_morph_vars = 0 self.max_length_of_morph_vars = 0 if (source is None): return self.chars = source.char_info self.term = source.term self.lemma = (Utils.ifNotNull(source.get_lemma(), self.term)) self.max_length_of_morph_vars = (len(self.term)) self.morph = MorphCollection() if (source.word_forms is not None): for wf in source.word_forms: self.morph.add_item(wf) if (wf.normal_case is not None and (self.max_length_of_morph_vars < len(wf.normal_case))): self.max_length_of_morph_vars = (len(wf.normal_case)) if (wf.normal_full is not None and (self.max_length_of_morph_vars < len(wf.normal_full))): self.max_length_of_morph_vars = (len(wf.normal_full)) i = 0 while i < len(self.term): ch = self.term[i] j = 0 j = 0 while j < self.morph.items_count: wf = Utils.asObjectOrNull(self.morph.get_indexer_item(j), MorphWordForm) if (wf.normal_case is not None): if (i >= len(wf.normal_case)): break if (wf.normal_case[i] != ch): break if (wf.normal_full is not None): if (i >= len(wf.normal_full)): break if (wf.normal_full[i] != ch): break j += 1 if (j < self.morph.items_count): break self.invariant_prefix_length_of_morph_vars = ((i + 1)) i += 1 if (self.morph.language.is_undefined and not source.language.is_undefined): self.morph.language = source.language
def year(self) -> int: wrapyear395 = RefOutArgWrapper(0) inoutres396 = Utils.tryParseInt( Utils.ifNotNull(self.getStringValue(BookLinkReferent.ATTR_YEAR), ""), wrapyear395) year_ = wrapyear395.value if (inoutres396): return year_ else: return 0
def refresh_variants(self) -> None: vars0_ = list() for v in self.variants_list: vars0_.extend(v) self.variants.clear() self.variants_key.clear() self.variants_list.clear() for v in vars0_: li = [ ] wrapli38 = RefOutArgWrapper(None) inoutres39 = Utils.tryGetValue(self.variants, Utils.ifNotNull(v.tail, ""), wrapli38) li = wrapli38.value if (not inoutres39): li = list() self.variants[Utils.ifNotNull(v.tail, "")] = li li.append(v) for kp in self.variants.items(): self.variants_key.append(kp[0]) self.variants_list.append(kp[1])
def normal(self) -> str: """ Нормализованное значение """ wf = self.verb_morph if (wf is not None): if (not wf.class0_.is_adjective and not wf.case_.is_undefined and self.__m_normal is not None): return self.__m_normal if (wf.class0_.is_adjective and not wf.class0_.is_verb): return Utils.ifNotNull(wf.normal_full, wf.normal_case) return wf.normal_case return self.__m_normal
def toString(self, short_variant : bool, lang : 'MorphLang'=None, lev : int=0) -> str: if (self.scheme is not None): split = ":" if (self.scheme == "ISBN" or self.scheme == "ББК" or self.scheme == "УДК"): split = " " elif (self.scheme == "http" or self.scheme == "ftp" or self.scheme == "https"): split = "://" return "{0}{1}{2}".format(self.scheme, split, Utils.ifNotNull(self.value, "?")) else: return self.value
def tryParse(t : 'Token') -> 'DefinitionWithNumericToken': """ Выделить определение с указанного токена Args: t(Token): токен """ if (not MiscHelper.canBeStartOfSentence(t)): return None tt = t noun_ = None num = None first_pass2886 = True while True: if first_pass2886: first_pass2886 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt != t and MiscHelper.canBeStartOfSentence(tt)): return None if (not ((isinstance(tt, NumberToken)))): continue if (tt.whitespaces_after_count > 2 or tt == t): continue if (tt.morph.class0_.is_adjective): continue nn = NounPhraseHelper.tryParse(tt.next0_, NounPhraseParseAttr.NO, 0) if (nn is None): continue num = (Utils.asObjectOrNull(tt, NumberToken)) noun_ = nn break if (num is None or num.int_value is None): return None res = DefinitionWithNumericToken(t, noun_.end_token) res.number = num.int_value res.number_begin_char = num.begin_char res.number_end_char = num.end_char res.noun = noun_.getNormalCaseText(None, True, MorphGender.UNDEFINED, False) res.nouns_genetive = (Utils.ifNotNull(noun_.getMorphVariant(MorphCase.GENITIVE, True), (res.noun if res is not None else None))) res.text = MiscHelper.getTextValue(t, num.previous, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) if (num.is_whitespace_before): res.text += " " res.number_substring = MiscHelper.getTextValue(num, noun_.end_token, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) res.text += res.number_substring tt = noun_.end_token while tt is not None: if (MiscHelper.canBeStartOfSentence(tt)): break res.end_token = tt tt = tt.next0_ if (res.end_token != noun_.end_token): if (noun_.is_whitespace_after): res.text += " " res.text += MiscHelper.getTextValue(noun_.end_token.next0_, res.end_token, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) return res