def find_derivates(t : 'Token') -> typing.List['DerivateGroup']: res = None cla = None if (isinstance(t, NounPhraseToken)): t = t.noun.end_token cla = MorphClass.NOUN if (isinstance(t, TextToken)): for f in t.morph.items: if (isinstance(f, MorphWordForm)): if (cla is not None): if (((cla) & f.class0_).is_undefined): continue res = DerivateService.find_derivates(Utils.ifNotNull(f.normal_full, f.normal_case), True, None) if (res is not None and len(res) > 0): return res return None if (isinstance(t, VerbPhraseToken)): return SemanticHelper.find_derivates(t.last_verb) if (isinstance(t, VerbPhraseItemToken)): vpt = Utils.asObjectOrNull(t, VerbPhraseItemToken) if (vpt.verb_morph is not None): res = DerivateService.find_derivates(vpt.verb_morph.normal_case, True, t.morph.language) if (res is None or (len(res) == 0 and vpt.verb_morph.normal_full is not None and vpt.verb_morph.normal_case != vpt.verb_morph.normal_full)): res = DerivateService.find_derivates(vpt.verb_morph.normal_full, True, t.morph.language) return res if (isinstance(t, NumberToken)): if (t.value == "1"): return DerivateService.find_derivates("ОДИН", True, MorphLang.RU) if (isinstance(t, MetaToken)): return SemanticHelper.find_derivates(t.end_token) return None
def __init__(self, mt : 'MetaToken') -> None: self.source = None; self.prep = None; self.typ = SentItemType.UNDEFINED self.sub_typ = SentItemSubtype.UNDEFINED self.sub_sent = None; self.plural = -1 self.dr_groups = None; self.dr_groups2 = None; self.part_verb_typ = NGLinkType.UNDEFINED self.participle_coef = 1 self.quant = None; self.attrs = None self.can_be_question = False self.result = None; self.result_verb_last = None; self.__m_res_graph = None; self.res_frag = None; self.result_list = None self.result_list_or = False self.__m_begin_token = None; self.__m_end_token = None; self.source = mt if (isinstance(mt, NounPhraseToken)): npt = Utils.asObjectOrNull(mt, NounPhraseToken) if (npt.preposition is not None): self.prep = npt.preposition.normal else: self.prep = "" self.typ = SentItemType.NOUN normal = npt.noun.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.MASCULINE, False) if (normal is not None): self.dr_groups = DerivateService.find_derivates(normal, True, None) elif ((isinstance(mt, ReferentToken)) or (isinstance(mt, NumbersWithUnitToken))): self.typ = SentItemType.NOUN elif (isinstance(mt, AdverbToken)): self.typ = SentItemType.ADVERB elif (isinstance(mt, ConjunctionToken)): self.typ = SentItemType.CONJ elif (isinstance(mt, DelimToken)): self.typ = SentItemType.DELIM elif (isinstance(mt, VerbPhraseToken)): vpt = Utils.asObjectOrNull(mt, VerbPhraseToken) normal = (None if vpt.first_verb.verb_morph is None else Utils.ifNotNull(vpt.first_verb.verb_morph.normal_full, vpt.first_verb.verb_morph.normal_case)) if (normal is not None): self.dr_groups = DerivateService.find_derivates(normal, True, None) if (vpt.first_verb != vpt.last_verb): normal = (vpt.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) if vpt.last_verb.verb_morph is None else Utils.ifNotNull(vpt.last_verb.verb_morph.normal_full, vpt.last_verb.verb_morph.normal_case)) self.dr_groups2 = DerivateService.find_derivates(normal, True, None) else: self.dr_groups2 = self.dr_groups self.prep = ("" if vpt.preposition is None else vpt.preposition.normal) self.typ = SentItemType.VERB
def __calc_actant(self) -> float: if (self.can_be_participle): self.coef = -1 return self.coef vf2 = self.to_verb.last_verb.verb_morph if (vf2 is None): return -1 if (self.from_prep is None): self.coef = 0 return self.coef fm = self.from0_.source.source.morph grs = DerivateService.find_derivates( Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm.nexts is None or not self.from_prep in gr.cm.nexts): continue cas = gr.cm.nexts[self.from_prep] if (not ((cas) & fm.case_).is_undefined): self.coef = SemanticService.PARAMS.next_model if (Utils.isNullOrEmpty(self.from_prep)): if (fm.case_.is_nominative): self.coef /= (2) self.coef /= (2) return self.coef if (self.from0_.source.source.morph.case_.is_undefined): self.coef = 0 return self.coef self.coef = 0.1 return self.coef
def create_noun_group(gr : 'SemGraph', npt : 'NounPhraseToken') -> 'SemObject': noun = npt.noun.begin_token sem = SemObject(gr) sem.tokens.append(npt.noun) sem.typ = SemObjectType.NOUN if (npt.noun.morph.class0_.is_personal_pronoun): sem.typ = SemObjectType.PERSONALPRONOUN elif (npt.noun.morph.class0_.is_pronoun): sem.typ = SemObjectType.PRONOUN if (npt.noun.begin_token != npt.noun.end_token): sem.morph.normal_case = npt.noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) sem.morph.normal_full = npt.noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) sem.morph.class0_ = MorphClass.NOUN sem.morph.number = npt.morph.number sem.morph.gender = npt.morph.gender sem.morph.case_ = npt.morph.case_ elif (isinstance(noun, TextToken)): for wf in noun.morph.items: if (wf.check_accord(npt.morph, False, False) and (isinstance(wf, MorphWordForm))): CreateHelper._set_morph(sem, Utils.asObjectOrNull(wf, MorphWordForm)) break if (sem.morph.normal_case is None): sem.morph.normal_case = noun.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) sem.morph.normal_full = noun.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) grs = DerivateService.find_derivates(sem.morph.normal_full, True, None) if (grs is not None and len(grs) > 0): sem.concept = (grs[0]) elif (isinstance(noun, ReferentToken)): r = noun.referent if (r is None): return None sem.morph.normal_case = str(r) sem.morph.normal_full = sem.morph.normal_case sem.concept = (r) elif (isinstance(noun, NumberToken)): num = Utils.asObjectOrNull(noun, NumberToken) sem.morph.gender = noun.morph.gender sem.morph.number = noun.morph.number if (num.int_value is not None): sem.morph.normal_case = NumberHelper.get_number_adjective(num.int_value, noun.morph.gender, noun.morph.number) sem.morph.normal_full = NumberHelper.get_number_adjective(num.int_value, MorphGender.MASCULINE, MorphNumber.SINGULAR) else: sem.morph.normal_case = noun.get_source_text().upper() sem.morph.normal_full = sem.morph.normal_case noun.tag = (sem) if (len(npt.adjectives) > 0): for a in npt.adjectives: if (npt.multi_nouns and a != npt.adjectives[0]): break asem = CreateHelper.create_npt_adj(gr, npt, a) if (asem is not None): gr.add_link(SemLinkType.DETAIL, sem, asem, "какой", False, None) if (npt.internal_noun is not None): intsem = CreateHelper.create_noun_group(gr, npt.internal_noun) if (intsem is not None): gr.add_link(SemLinkType.DETAIL, sem, intsem, None, False, None) gr.objects.append(sem) return sem
def create_adverb(gr : 'SemGraph', adv : 'AdverbToken') -> 'SemObject': res = SemObject(gr) gr.objects.append(res) res.tokens.append(adv) res.typ = SemObjectType.ADVERB res.not0_ = adv.not0_ res.morph.normal_full = adv.spelling res.morph.normal_case = res.morph.normal_full grs = DerivateService.find_derivates(res.morph.normal_full, True, None) if (grs is not None and len(grs) > 0): res.concept = (grs[0]) return res
def create_npt_adj(gr : 'SemGraph', npt : 'NounPhraseToken', a : 'MetaToken') -> 'SemObject': if (a.morph.class0_.is_pronoun): asem = SemObject(gr) gr.objects.append(asem) asem.tokens.append(a) asem.typ = (SemObjectType.PERSONALPRONOUN if a.begin_token.morph.class0_.is_personal_pronoun else SemObjectType.PRONOUN) for it in a.begin_token.morph.items: wf = Utils.asObjectOrNull(it, MorphWordForm) if (wf is None): continue if (not npt.morph.case_.is_undefined): if (((npt.morph.case_) & wf.case_).is_undefined): continue CreateHelper._set_morph(asem, wf) if (asem.morph.normal_full == "КАКОВ"): asem.morph.normal_full = "КАКОЙ" break if (asem.morph.normal_full is None): asem.morph.normal_case = a.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) asem.morph.normal_full = asem.morph.normal_case return asem if (not a.morph.class0_.is_verb): asem = SemObject(gr) gr.objects.append(asem) asem.tokens.append(a) asem.typ = SemObjectType.ADJECTIVE for wf in a.begin_token.morph.items: if (wf.check_accord(npt.morph, False, False) and wf.class0_.is_adjective and (isinstance(wf, MorphWordForm))): CreateHelper._set_morph(asem, Utils.asObjectOrNull(wf, MorphWordForm)) break if (asem.morph.normal_case is None): asem.morph.normal_case = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) asem.morph.normal_full = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, MorphGender.MASCULINE, False) CreateHelper._set_morph0(asem, a.begin_token.morph) grs = DerivateService.find_derivates(asem.morph.normal_full, True, None) if (grs is not None and len(grs) > 0): asem.concept = (grs[0]) return asem return None
def create_verb_group(gr : 'SemGraph', vpt : 'VerbPhraseToken') -> 'SemObject': sems = list() attrs = list() adverbs = list() i = 0 first_pass3439 = True while True: if first_pass3439: first_pass3439 = False else: i += 1 if (not (i < len(vpt.items))): break v = vpt.items[i] if (v.is_adverb): adv = AdverbToken.try_parse(v.begin_token) if (adv is None): continue if (adv.typ != SemAttributeType.UNDEFINED): attrs.append(SemAttribute._new2912(adv.not0_, adv.typ, adv.spelling)) continue adverb = CreateHelper.create_adverb(gr, adv) if (len(attrs) > 0): adverb.attrs.extend(attrs) attrs.clear() adverbs.append(adverb) continue if (v.normal == "БЫТЬ"): j = 0 j = (i + 1) while j < len(vpt.items): if (not vpt.items[j].is_adverb): break j += 1 if (j < len(vpt.items)): continue sem = SemObject(gr) gr.objects.append(sem) sem.tokens.append(v) v.tag = (sem) CreateHelper._set_morph(sem, v.verb_morph) sem.morph.normal_full = v.normal sem.morph.normal_case = sem.morph.normal_full if (v.is_participle or v.is_dee_participle): sem.typ = SemObjectType.PARTICIPLE sem.morph.normal_full = (Utils.ifNotNull(v.end_token.get_normal_case_text(MorphClass.VERB, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False), (sem.morph.normal_case if sem is not None and sem.morph is not None else None))) sem.morph.normal_case = v.end_token.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) if (sem.morph.normal_case == sem.morph.normal_full and v.normal.endswith("Й")): grs2 = DerivateService.find_derivates(v.normal, True, None) if (grs2 is not None): for g in grs2: for w in g.words: if (w.lang == v.end_token.morph.language and w.class0_.is_verb and not w.class0_.is_adjective): sem.morph.normal_full = w.spelling break elif (sem.morph.normal_case == sem.morph.normal_full and v.is_participle and sem.morph.normal_full.endswith("Ь")): for it in v.end_token.morph.items: wf = Utils.asObjectOrNull(it, MorphWordForm) if (wf is None): continue if (wf.normal_case.endswith("Й") or ((wf.normal_full is not None and wf.normal_full.endswith("Й")))): sem.morph.normal_case = (Utils.ifNotNull(wf.normal_full, wf.normal_case)) break if (sem.morph.normal_case == sem.morph.normal_full): grs2 = DerivateService.find_derivates(sem.morph.normal_case, True, None) if (grs2 is not None): for g in grs2: for w in g.words: if (w.lang == v.end_token.morph.language and w.class0_.is_verb and w.class0_.is_adjective): sem.morph.normal_case = w.spelling break break else: sem.typ = SemObjectType.VERB if (v.verb_morph is not None and v.verb_morph.contains_attr("возвр.", None)): if (sem.morph.normal_full.endswith("СЯ") or sem.morph.normal_full.endswith("СЬ")): sem.morph.normal_full = sem.morph.normal_full[0:0+len(sem.morph.normal_full) - 2] grs = DerivateService.find_derivates(sem.morph.normal_full, True, None) if (grs is not None and len(grs) > 0): sem.concept = (grs[0]) if (v.verb_morph is not None and v.verb_morph.misc.aspect == MorphAspect.IMPERFECTIVE): for w in grs[0].words: if (w.class0_.is_verb and not w.class0_.is_adjective): if (w.aspect == MorphAspect.PERFECTIVE): sem.morph.normal_full = w.spelling break sem.not0_ = v.not0_ sems.append(sem) if (len(attrs) > 0): sem.attrs.extend(attrs) attrs.clear() if (len(adverbs) > 0): for a in adverbs: gr.add_link(SemLinkType.DETAIL, sem, a, "как", False, None) adverbs.clear() if (len(sems) == 0): return None if (len(attrs) > 0): sems[len(sems) - 1].attrs.extend(attrs) if (len(adverbs) > 0): sem = sems[len(sems) - 1] for a in adverbs: gr.add_link(SemLinkType.DETAIL, sem, a, "как", False, None) for i in range(len(sems) - 1, 0, -1): gr.add_link(SemLinkType.DETAIL, sems[i - 1], sems[i], "что делать", False, None) return sems[0]
def process(self, kit : 'AnalysisKit') -> None: # Основная функция выделения телефонов ad = kit.get_analyzer_data(self) has_denoms = False for a in kit.processor.analyzers: if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): has_denoms = True if (not has_denoms): a = DenominationAnalyzer() a.process(kit) li = list() tmp = io.StringIO() tmp2 = list() max0_ = 0 t = kit.first_token while t is not None: max0_ += 1 t = t.next0_ cur = 0 t = kit.first_token first_pass3292 = True while True: if first_pass3292: first_pass3292 = False else: t = t.next0_; cur += 1 if (not (t is not None)): break r = t.get_referent() if (r is not None): t = self.__add_referents(ad, t, cur, max0_) continue if (not (isinstance(t, TextToken))): continue if (not t.chars.is_letter or (t.length_char < 3)): continue term = t.term if (term == "ЕСТЬ"): if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): pass else: continue npt = None npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None) if (npt is None): mc = t.get_morph_class_in_dictionary() if (mc.is_verb and not mc.is_preposition): if (t.is_verb_be): continue if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): continue kref = KeywordReferent._new1595(KeywordType.PREDICATE) norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if (norm is None): norm = t.lemma if (norm.endswith("ЬСЯ")): norm = norm[0:0+len(norm) - 2] kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0) drv = DerivateService.find_derivates(norm, True, t.morph.language) KeywordAnalyzer.__add_normals(kref, drv, norm) kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent)) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph) kit.embed_token(rt1) t = (rt1) continue continue if (npt.internal_noun is not None): continue if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): if (npt.preposition is not None): t = npt.end_token continue if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): t = npt.end_token continue if (npt.begin_token == npt.end_token): mc = t.get_morph_class_in_dictionary() if (mc.is_preposition): continue elif (mc.is_adverb): if (t.is_value("ПОТОМ", None)): continue else: pass li.clear() t0 = t tt = t first_pass3293 = True while True: if first_pass3293: first_pass3293 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= npt.end_char)): break if (not (isinstance(tt, TextToken))): continue if (tt.is_value("NATURAL", None)): pass if ((tt.length_char < 3) or not tt.chars.is_letter): continue mc = tt.get_morph_class_in_dictionary() if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): if (tt.is_value("ОТНОШЕНИЕ", None)): pass else: continue if (mc.is_misc): if (MiscHelper.is_eng_article(tt)): continue kref = KeywordReferent._new1595(KeywordType.OBJECT) norm = tt.lemma kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0) if (norm != "ЕСТЬ"): drv = DerivateService.find_derivates(norm, True, tt.morph.language) KeywordAnalyzer.__add_normals(kref, drv, norm) kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent)) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken._new734(kref, tt, tt, tt.morph) kit.embed_token(rt1) if (tt == t and len(li) == 0): t0 = (rt1) t = (rt1) li.append(kref) if (len(li) > 1): kref = KeywordReferent._new1595(KeywordType.OBJECT) Utils.setLengthStringIO(tmp, 0) tmp2.clear() has_norm = False for kw in li: s = kw.get_string_value(KeywordReferent.ATTR_VALUE) if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) n = kw.get_string_value(KeywordReferent.ATTR_NORMAL) if (n is not None): has_norm = True tmp2.append(n) else: tmp2.append(s) kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0) val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0) Utils.setLengthStringIO(tmp, 0) tmp2.sort() for s in tmp2: if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) norm = Utils.toStringStringIO(tmp) if (norm != val): kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0) kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent)) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken._new734(kref, t0, t, npt.morph) kit.embed_token(rt1) t = (rt1) cur = 0 t = kit.first_token first_pass3294 = True while True: if first_pass3294: first_pass3294 = False else: t = t.next0_; cur += 1 if (not (t is not None)): break kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent) if (kw is None or kw.typ != KeywordType.OBJECT): continue if (t.next0_ is None or kw.child_words > 2): continue t1 = t.next0_ if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): t1 = t1.next0_ if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): t1 = t1.next0_ elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): continue kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent) if (kw2 is None): continue if (kw == kw2): continue if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): continue kw_un = KeywordReferent() kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO)) kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent)) KeywordAnalyzer.__set_rank(kw_un, cur, max0_) rt1 = ReferentToken._new734(kw_un, t, t1, t.morph) kit.embed_token(rt1) t = (rt1) if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): all0_ = list(ad.referents) all0_.sort(key=operator.attrgetter('rank'), reverse=True) ad.referents = all0_ if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES) if (ano is not None): ad.register_referent(ano)
def __calc_pacient(self, noplural: bool) -> float: if (not Utils.isNullOrEmpty(self.from_prep)): self.coef = -1 return self.coef vf = self.to_verb.first_verb.verb_morph if (vf is None): return -1 vf2 = self.to_verb.last_verb.verb_morph if (vf2 is None): return -1 morph_ = self.from_morph if (vf2.misc.voice == MorphVoice.PASSIVE or self.to_verb.last_verb.morph.contains_attr( "страд.з.", None)): if (vf.number == MorphNumber.PLURAL): if (noplural): if (self.from_is_plural): pass elif (not NGLink.__check_morph_accord(morph_, False, vf)): return -1 elif (len(morph_.items) > 0 and not vf.case_.is_undefined): ok = False for it in morph_.items: if (((it.number) & (MorphNumber.PLURAL)) == ( MorphNumber.PLURAL)): if (not it.case_.is_undefined and ((it.case_) & vf.case_).is_undefined): continue ok = True break if (not ok): self.coef = -1 return self.coef self.coef = SemanticService.PARAMS.verb_plural self.plural = 1 else: if (vf.number == MorphNumber.SINGULAR): self.plural = 0 if (self.from_is_plural): return -1 if (not NGLink.__check_morph_accord(morph_, False, vf)): return -1 self.coef = SemanticService.PARAMS.morph_accord return self.coef is_trans = False is_ref_dative = False grs = DerivateService.find_derivates( Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm.transitive): is_trans = True if (gr.cm_rev.agent is not None and not gr.cm_rev.agent.case_.is_nominative): is_ref_dative = True if (NGLink.__is_rev_verb(vf2)): if (not Utils.isNullOrEmpty(self.from_prep)): return -1 if (not morph_.case_.is_undefined): if (is_ref_dative): if (morph_.case_.is_nominative): self.coef = SemanticService.PARAMS.transitive_coef return self.coef elif (morph_.case_.is_instrumental): self.coef = SemanticService.PARAMS.transitive_coef return self.coef return -1 self.coef = 0 return self.coef if (vf2 != vf and not is_trans): grs = DerivateService.find_derivates( Utils.ifNotNull(vf.normal_full, vf.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm.transitive): is_trans = True if (is_trans): if (not Utils.isNullOrEmpty(self.from_prep)): return -1 if (not morph_.case_.is_undefined): if (morph_.case_.is_accusative): self.coef = SemanticService.PARAMS.transitive_coef if (morph_.case_.is_dative): self.coef /= (2) if (morph_.case_.is_genitive): self.coef /= (2) if (morph_.case_.is_instrumental): self.coef /= (2) return self.coef else: return -1 if (vf2.normal_case == "БЫТЬ"): if (not Utils.isNullOrEmpty(self.from_prep)): return -1 if (morph_.case_.is_instrumental): self.coef = SemanticService.PARAMS.transitive_coef return self.coef if (morph_.case_.is_nominative): if (self.from0_.source.begin_token.begin_char > self.to_verb.end_char): self.coef = SemanticService.PARAMS.transitive_coef return self.coef else: self.coef = SemanticService.PARAMS.transitive_coef / (2) return self.coef if (morph_.case_.is_undefined): self.coef = SemanticService.PARAMS.transitive_coef / (2) return self.coef return -1
def __calc_agent(self, noplural: bool) -> float: if (not Utils.isNullOrEmpty(self.from_prep)): self.coef = -1 return self.coef vf = self.to_verb.first_verb.verb_morph if (vf is None): self.coef = -1 return self.coef vf2 = self.to_verb.last_verb.verb_morph if (vf2 is None): self.coef = -1 return self.coef if (vf.misc.mood == MorphMood.IMPERATIVE): self.coef = -1 return self.coef morph_ = self.from_morph if (vf2.misc.voice == MorphVoice.PASSIVE or self.to_verb.last_verb.morph.contains_attr( "страд.з.", None)): if (not morph_.case_.is_undefined): if (morph_.case_.is_instrumental): self.coef = SemanticService.PARAMS.transitive_coef if (vf2.case_.is_instrumental): self.coef /= (2) return self.coef self.coef = -1 return self.coef self.coef = 0 return self.coef if ("инф." in vf.misc.attrs): self.coef = -1 return self.coef if (NGLink.__is_rev_verb(vf2)): ag_case = MorphCase.UNDEFINED grs = DerivateService.find_derivates( Utils.ifNotNull(vf2.normal_full, vf2.normal_case), True, None) if (grs is not None): for gr in grs: if (gr.cm_rev.agent is not None): ag_case = gr.cm_rev.agent.case_ break if (not morph_.case_.is_undefined): if (ag_case.is_dative): if (morph_.case_.is_dative): self.coef = SemanticService.PARAMS.transitive_coef if (morph_.case_.is_genitive): self.coef /= (2) return self.coef self.coef = -1 return self.coef if (ag_case.is_instrumental): if (morph_.case_.is_instrumental): if (morph_.case_.is_nominative): self.coef = 0 return self.coef self.coef = SemanticService.PARAMS.transitive_coef return self.coef self.coef = -1 return self.coef if (not morph_.case_.is_nominative): self.coef = -1 return self.coef else: self.coef = 0 return self.coef if (vf.number == MorphNumber.PLURAL): if (not morph_.case_.is_undefined): if (vf.case_.is_undefined): if (not morph_.case_.is_nominative): self.coef = -1 return self.coef elif (((vf.case_) & morph_.case_).is_undefined): self.coef = -1 return self.coef if (noplural): if (self.from_is_plural): pass elif (((morph_.number) & (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED)): self.coef = -1 return self.coef elif (not NGLink.__check_morph_accord(morph_, False, vf)): self.coef = -1 return self.coef elif (len(morph_.items) > 0 and not vf.case_.is_undefined): ok = False for it in morph_.items: if (((it.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)): if (not it.case_.is_undefined and ((it.case_) & vf.case_).is_undefined): continue ok = True break if (not ok): self.coef = -1 return self.coef self.plural = 1 self.coef = SemanticService.PARAMS.verb_plural if (vf2.normal_case == "БЫТЬ"): if (morph_.case_.is_undefined and self.from0_.source.begin_token.begin_char > self.to_verb.end_char): self.coef /= (2) else: if (vf.number == MorphNumber.SINGULAR): self.plural = 0 if (self.from_is_plural): self.coef = -1 return self.coef if (not NGLink.__check_morph_accord(morph_, False, vf)): self.coef = -1 return self.coef if (not morph_.case_.is_undefined): if (not morph_.case_.is_nominative): if (self.to_verb.first_verb.is_participle): pass else: self.coef = -1 return self.coef if (vf.misc.person != MorphPerson.UNDEFINED): if (((vf.misc.person) & (MorphPerson.THIRD)) == (MorphPerson.UNDEFINED)): if (((vf.misc.person) & (MorphPerson.FIRST)) == (MorphPerson.FIRST)): if (not morph_.contains_attr("1 л.", None)): self.coef = -1 return self.coef if (((vf.misc.person) & (MorphPerson.SECOND)) == (MorphPerson.SECOND)): if (not morph_.contains_attr("2 л.", None)): self.coef = -1 return self.coef self.coef = SemanticService.PARAMS.morph_accord if (morph_.case_.is_undefined): self.coef /= (4) return self.coef
def __try_parse_ru(t: 'Token', can_be_partition: bool, can_be_adj_partition: bool, force_parse: bool) -> 'VerbPhraseToken': res = None t0 = t not0_ = None has_verb = False verb_be_before = False prep = None first_pass3070 = True while True: if first_pass3070: first_pass3070 = False else: t = t.next0_ if (not (t is not None)): break if (not (isinstance(t, TextToken))): break tt = Utils.asObjectOrNull(t, TextToken) is_participle = False if (tt.term == "НЕ"): not0_ = t continue ty = 0 norm = None mc = tt.get_morph_class_in_dictionary() if (tt.term == "НЕТ"): if (has_verb): break ty = 1 elif (tt.term == "ДОПУСТИМО"): ty = 3 elif (mc.is_adverb and not mc.is_verb): ty = 2 elif (tt.is_pure_verb or tt.is_verb_be): ty = 1 if (has_verb): if (not tt.morph.contains_attr("инф.", None)): if (verb_be_before): pass else: break elif (mc.is_verb): if (mc.is_preposition or mc.is_misc or mc.is_pronoun): pass elif (mc.is_noun): if (tt.term == "СТАЛИ" or tt.term == "СТЕКЛО" or tt.term == "БЫЛИ"): ty = 1 elif (not tt.chars.is_all_lower and not MiscHelper.can_be_start_of_sentence(tt)): ty = 1 elif (mc.is_adjective and can_be_partition): ty = 1 elif (force_parse): ty = 1 elif (mc.is_proper): if (tt.chars.is_all_lower): ty = 1 else: ty = 1 if (mc.is_adjective): is_participle = True if (not tt.morph.case_.is_undefined): is_participle = True if (not can_be_partition and is_participle): break if (has_verb): if (tt.morph.contains_attr("инф.", None)): pass elif (not is_participle): pass else: break elif ((mc.is_adjective and tt.morph.contains_attr("к.ф.", None) and tt.term.endswith("О")) and NounPhraseHelper.try_parse( tt, NounPhraseParseAttr.NO, 0, None) is None): ty = 2 elif (mc.is_adjective and ((can_be_partition or can_be_adj_partition))): if (tt.morph.contains_attr("к.ф.", None) and not can_be_adj_partition): break norm = tt.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, MorphGender.MASCULINE, False) if (norm.endswith("ЙШИЙ")): pass else: grs = DerivateService.find_derivates(norm, True, None) if (grs is not None and len(grs) > 0): hverb = False hpart = False for gr in grs: for w in gr.words: if (w.class0_.is_adjective and w.class0_.is_verb): if (w.spelling == norm): hpart = True elif (w.class0_.is_verb): hverb = True if (hpart and hverb): ty = 3 elif (can_be_adj_partition): ty = 3 if (ty != 3 and not Utils.isNullOrEmpty(grs[0].prefix) and norm.startswith(grs[0].prefix)): hverb = False hpart = False norm1 = norm[len(grs[0].prefix):] grs = DerivateService.find_derivates( norm1, True, None) if (grs is not None and len(grs) > 0): for gr in grs: for w in gr.words: if (w.class0_.is_adjective and w.class0_.is_verb): if (w.spelling == norm1): hpart = True elif (w.class0_.is_verb): hverb = True if (hpart and hverb): ty = 3 if (ty == 0 and t == t0 and can_be_partition): prep = PrepositionHelper.try_parse(t) if (prep is not None): t = prep.end_token continue if (ty == 0): break if (res is None): res = VerbPhraseToken(t0, t) res.end_token = t it = VerbPhraseItemToken._new603(t, t, MorphCollection(t.morph)) if (not0_ is not None): it.begin_token = not0_ it.not0_ = True not0_ = (None) it.is_adverb = ty == 2 if (prep is not None and not t.morph.case_.is_undefined and len(res.items) == 0): if (((prep.next_case) & t.morph.case_).is_undefined): return None it.morph.remove_items(prep.next_case, False) res.preposition = prep if (norm is None): norm = t.get_normal_case_text( (MorphClass.ADJECTIVE if ty == 3 else (MorphClass.ADVERB if ty == 2 else MorphClass.VERB)), MorphNumber.SINGULAR, MorphGender.MASCULINE, False) if (ty == 1 and not tt.morph.case_.is_undefined): mi = MorphWordForm._new604(MorphCase.NOMINATIVE, MorphNumber.SINGULAR, MorphGender.MASCULINE) for mit in tt.morph.items: if (isinstance(mit, MorphWordForm)): mi.misc = mit.misc break nnn = MorphologyService.get_wordform("КК" + t.term, mi) if (nnn is not None): norm = nnn[2:] it.normal = norm res.items.append(it) if (not has_verb and ((ty == 1 or ty == 3))): res.morph = it.morph has_verb = True if (ty == 1 or ty == 3): if (ty == 1 and tt.is_verb_be): verb_be_before = True else: verb_be_before = False if (not has_verb): return None for i in range(len(res.items) - 1, 0, -1): if (res.items[i].is_adverb): del res.items[i] res.end_token = res.items[i - 1].end_token else: break return res