def try_parse(t: 'Token') -> 'PrepositionToken': """ Попытаться выделить предлог с указанного токена Args: t(Token): начальный токен Returns: PrepositionToken: результат или null """ if (not (isinstance(t, TextToken))): return None tok = PrepositionHelper.__m_ontology.try_parse(t, TerminParseAttr.NO) if (tok is not None): return PrepositionToken._new529(t, tok.end_token, tok.termin.canonic_text, tok.termin.tag) mc = t.get_morph_class_in_dictionary() if (not mc.is_preposition): return None res = PrepositionToken(t, t) res.normal = t.get_normal_case_text(MorphClass.PREPOSITION, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) res.next_case = LanguageHelper.get_case_after_preposition(res.normal) if ((t.next0_ is not None and t.next0_.is_hiphen and not t.is_whitespace_after) and (isinstance(t.next0_.next0_, TextToken)) and t.next0_.next0_.get_morph_class_in_dictionary().is_preposition ): res.end_token = t.next0_.next0_ return res
def remove_items_by_preposition(self, prep: 'Token') -> None: """ Убрать элементы, не соответствующие по падежу предлогу Args: prep(Token): """ from pullenti.ner.TextToken import TextToken if (not (isinstance(prep, TextToken))): return mc = LanguageHelper.get_case_after_preposition(prep.lemma) if (((mc) & self.case_).is_undefined): return self.remove_items(mc, False)
def create_question(li : 'NGItem') -> str: res = (Utils.ifNotNull(li.source.prep, "")).lower() if (len(res) > 0): res += " " cas = li.source.source.morph.case_ if (not Utils.isNullOrEmpty(li.source.prep)): cas1 = LanguageHelper.get_case_after_preposition(li.source.prep) if (not cas1.is_undefined): if (not ((cas1) & cas).is_undefined): cas = ((cas) & cas1) if (cas.is_genitive): res += "чего" elif (cas.is_instrumental): res += "чем" elif (cas.is_dative): res += "чему" elif (cas.is_accusative): res += "что" elif (cas.is_prepositional): res += "чём" return res
def try_parse(t: 'Token', items: typing.List['NounPhraseItem'], attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem': if (t is None): return None t0 = t _can_be_surname = False _is_doubt_adj = False rt = Utils.asObjectOrNull(t, ReferentToken) if (rt is not None and rt.begin_token == rt.end_token and (isinstance(rt.begin_token, TextToken))): res = NounPhraseItem.try_parse(rt.begin_token, items, attrs) if (res is not None): res.begin_token = res.end_token = t res.can_be_noun = True return res if (rt is not None): res = NounPhraseItem(t, t) for m in t.morph.items: v = NounPhraseItemTextVar(m, None) v.normal_value = str(t.get_referent()) res.noun_morph.append(v) res.can_be_noun = True return res if (isinstance(t, NumberToken)): pass has_legal_verb = False if (isinstance(t, TextToken)): if (not t.chars.is_letter): return None str0_ = t.term if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'): for wf in t.morph.items: if ((isinstance(wf, MorphWordForm)) and wf.is_in_dictionary): if (wf.class0_.is_verb): mc = t.get_morph_class_in_dictionary() if (not mc.is_noun and (((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.NO)): if (not LanguageHelper.ends_with_ex( str0_, "ОГО", "ЕГО", None, None)): return None has_legal_verb = True if (wf.class0_.is_adverb): if (t.next0_ is None or not t.next0_.is_hiphen): if ((str0_ == "ВСЕГО" or str0_ == "ДОМА" or str0_ == "НЕСКОЛЬКО") or str0_ == "МНОГО" or str0_ == "ПОРЯДКА"): pass else: return None if (wf.class0_.is_adjective): if (wf.contains_attr("к.ф.", None)): if (t.get_morph_class_in_dictionary() == MorphClass.ADJECTIVE): pass else: _is_doubt_adj = True mc0 = t.morph.class0_ if (mc0.is_proper_surname and not t.chars.is_all_lower): for wf in t.morph.items: if (wf.class0_.is_proper_surname and wf.number != MorphNumber.PLURAL): wff = Utils.asObjectOrNull(wf, MorphWordForm) if (wff is None): continue s = Utils.ifNotNull((Utils.ifNotNull( wff.normal_full, wff.normal_case)), "") if (LanguageHelper.ends_with_ex( s, "ИН", "ЕН", "ЫН", None)): if (not wff.is_in_dictionary): _can_be_surname = True else: return None if (wff.is_in_dictionary and LanguageHelper.ends_with(s, "ОВ")): _can_be_surname = True if (mc0.is_proper_name and not t.chars.is_all_lower): for wff in t.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (wf.normal_case == "ГОР"): continue if (wf.class0_.is_proper_name and wf.is_in_dictionary): if (wf.normal_case is None or not wf.normal_case.startswith("ЛЮБ")): if (mc0.is_adjective and t.morph.contains_attr("неизм.", None)): pass elif ( (((attrs) & (NounPhraseParseAttr.REFERENTCANBENOUN)) ) == (NounPhraseParseAttr.REFERENTCANBENOUN)): pass else: if (items is None or (len(items) < 1)): return None if (not items[0].is_std_adjective): return None if (mc0.is_adjective and t.morph.items_count == 1): if (t.morph.get_indexer_item(0).contains_attr( "в.ср.ст.", None)): return None mc1 = t.get_morph_class_in_dictionary() if (mc1 == MorphClass.VERB and t.morph.case_.is_undefined): return None if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES))) == (NounPhraseParseAttr.IGNOREPARTICIPLES) and t.morph.class0_.is_verb and not t.morph.class0_.is_noun) and not t.morph.class0_.is_proper): for wf in t.morph.items: if (wf.class0_.is_verb): if (wf.contains_attr("дейст.з.", None)): if (LanguageHelper.ends_with(t.term, "СЯ")): pass else: return None t1 = None for k in range(2): t = (Utils.ifNotNull(t1, t0)) if (k == 0): if (((isinstance(t0, TextToken)) and t0.next0_ is not None and t0.next0_.is_hiphen) and t0.next0_.next0_ is not None): if (not t0.is_whitespace_after and not t0.morph.class0_.is_pronoun and not (isinstance(t0.next0_.next0_, NumberToken))): if (not t0.next0_.is_whitespace_after): t = t0.next0_.next0_ elif (t0.next0_.next0_.chars.is_all_lower and LanguageHelper.ends_with(t0.term, "О")): t = t0.next0_.next0_ it = NounPhraseItem._new404(t0, t, _can_be_surname) if (t0 == t and (isinstance(t0, ReferentToken))): it.can_be_noun = True it.morph = MorphCollection(t0.morph) can_be_prepos = False for v in t.morph.items: wf = Utils.asObjectOrNull(v, MorphWordForm) if (v.class0_.is_verb and not v.case_.is_undefined): it.can_be_adj = True it.adj_morph.append(NounPhraseItemTextVar(v, t)) continue if (v.class0_.is_preposition): can_be_prepos = True if (v.class0_.is_adjective or ((v.class0_.is_pronoun and not v.class0_.is_personal_pronoun and not v.contains_attr("неизм.", None))) or ((v.class0_.is_noun and (isinstance(t, NumberToken))))): if (NounPhraseItem.try_accord_variant( items, (0 if items is None else len(items)), v, False)): is_doub = False if (v.contains_attr("к.ф.", None)): continue if (v.contains_attr("собир.", None) and not (isinstance(t, NumberToken))): if (wf is not None and wf.is_in_dictionary): return None continue if (v.contains_attr("сравн.", None)): continue ok = True if (isinstance(t, TextToken)): s = t.term if (s == "ПРАВО" or s == "ПРАВА"): ok = False elif (LanguageHelper.ends_with(s, "ОВ") and t.get_morph_class_in_dictionary().is_noun): ok = False elif (isinstance(t, NumberToken)): if (v.class0_.is_noun and t.morph.class0_.is_adjective): ok = False elif (t.morph.class0_.is_noun and (( (attrs) & (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))) == (NounPhraseParseAttr.NO)): ok = False if (ok): it.adj_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_adj = True if (_is_doubt_adj and t0 == t): it.is_doubt_adjective = True if (has_legal_verb and wf is not None and wf.is_in_dictionary): it.can_be_noun = True if (wf is not None and wf.class0_.is_pronoun): it.can_be_noun = True it.noun_morph.append( NounPhraseItemTextVar(v, t)) can_be_noun_ = False if (isinstance(t, NumberToken)): pass elif (v.class0_.is_noun or ((wf is not None and wf.normal_case == "САМ"))): can_be_noun_ = True elif (v.class0_.is_personal_pronoun): if (items is None or len(items) == 0): can_be_noun_ = True else: for it1 in items: if (it1.is_verb): if (len(items) == 1 and not v.case_.is_nominative): can_be_noun_ = True else: return None if (len(items) == 1): if (items[0].can_be_adj_for_personal_pronoun): can_be_noun_ = True elif ( (v.class0_.is_pronoun and ((items is None or len(items) == 0 or ((len(items) == 1 and items[0].can_be_adj_for_personal_pronoun)))) and wf is not None) and (((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО" or wf.normal_case == "ТО") or wf.normal_case == "ЭТО" or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО" or wf.normal_case == "КТО") or wf.normal_full == "КОТОРЫЙ" or wf.normal_case == "КОТОРЫЙ"))): if (wf.normal_case == "ВСЕ"): if (t.next0_ is not None and t.next0_.is_value("РАВНО", None)): return None can_be_noun_ = True elif (wf is not None and ((Utils.ifNotNull( wf.normal_full, wf.normal_case))) == "КОТОРЫЙ" and (((attrs) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO)): return None elif (v.class0_.is_proper and (isinstance(t, TextToken))): if (t.length_char > 4 or v.class0_.is_proper_name): can_be_noun_ = True if (can_be_noun_): added = False if (items is not None and len(items) > 1 and (((attrs) & (NounPhraseParseAttr.MULTINOUNS))) != (NounPhraseParseAttr.NO)): ok1 = True ii = 1 while ii < len(items): if (not items[ii].conj_before): ok1 = False break ii += 1 if (ok1): if (NounPhraseItem.try_accord_variant( items, (0 if items is None else len(items)), v, True)): it.noun_morph.append( NounPhraseItemTextVar(v, t)) it.can_be_noun = True it.multi_nouns = True added = True if (not added): if (NounPhraseItem.try_accord_variant( items, (0 if items is None else len(items)), v, False)): it.noun_morph.append(NounPhraseItemTextVar(v, t)) it.can_be_noun = True if (v.class0_.is_personal_pronoun and t.morph.contains_attr("неизм.", None) and not it.can_be_adj): itt = NounPhraseItemTextVar(v, t) itt.case_ = MorphCase.ALL_CASES itt.number = MorphNumber.UNDEFINED if (itt.normal_value is None): pass it.adj_morph.append(itt) it.can_be_adj = True elif ((len(items) > 0 and len(items[0].adj_morph) > 0 and items[0].adj_morph[0].number == MorphNumber.PLURAL) and not ((items[0].adj_morph[0].case_) & v.case_).is_undefined and not items[0].adj_morph[0].class0_.is_verb): if (t.next0_ is not None and t.next0_.is_comma_and and (isinstance(t.next0_.next0_, TextToken))): npt2 = NounPhraseHelper.try_parse( t.next0_.next0_, attrs, 0, None) if (npt2 is not None and npt2.preposition is None and not ((npt2.morph.case_) & v.case_ & items[0].adj_morph[0].case_ ).is_undefined): it.noun_morph.append( NounPhraseItemTextVar(v, t)) it.can_be_noun = True if (t0 != t): for v in it.adj_morph: v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), False) for v in it.noun_morph: v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), True) if (k == 1 and it.can_be_noun and not it.can_be_adj): if (t1 is not None): it.end_token = t1 else: it.end_token = t0.next0_.next0_ for v in it.noun_morph: if (v.normal_value is not None and (v.normal_value.find('-') < 0)): v.normal_value = "{0}-{1}".format( v.normal_value, it.end_token.get_normal_case_text( None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)) if (it.can_be_adj): if (NounPhraseItem.__m_std_adjectives.try_parse( it.begin_token, TerminParseAttr.NO) is not None): it.is_std_adjective = True if (can_be_prepos and it.can_be_noun): if (items is not None and len(items) > 0): npt1 = NounPhraseHelper.try_parse( t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0, None) if (npt1 is not None and npt1.end_char > t.end_char): return None else: npt1 = NounPhraseHelper.try_parse( t.next0_, Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) | (NounPhraseParseAttr.PARSEVERBS), NounPhraseParseAttr), 0, None) if (npt1 is not None): mc = LanguageHelper.get_case_after_preposition(t.lemma) if (not ((mc) & npt1.morph.case_).is_undefined): return None if (it.can_be_noun or it.can_be_adj or k == 1): if (it.begin_token.morph.class0_.is_pronoun): tt2 = it.end_token.next0_ if ((tt2 is not None and tt2.is_hiphen and not tt2.is_whitespace_after) and not tt2.is_whitespace_before): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): ss = tt2.term if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ") or ss == "Ж"): it.end_token = tt2 elif (ss == "НИБУДЬ" or ss == "ЛИБО" or (((ss == "ТО" and tt2.previous.is_hiphen)) and it.can_be_adj)): it.end_token = tt2 for m in it.adj_morph: m.normal_value = "{0}-{1}".format( m.normal_value, ss) if (m.single_number_value is not None): m.single_number_value = "{0}-{1}".format( m.single_number_value, ss) return it if (t0 == t): if (t0.is_value("БИЗНЕС", None) and t0.next0_ is not None and t0.next0_.chars == t0.chars): t1 = t0.next0_ continue return it return None