예제 #1
0
 def _create_referent_token(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken':
     if (p is None): 
         return None
     has_prefix = False
     if (attrs is not None): 
         for a in attrs: 
             if (a.typ == PersonAttrTerminType.BESTREGARDS): 
                 has_prefix = True
             else: 
                 if (a.begin_char < begin.begin_char): 
                     begin = a.begin_token
                     if ((a.end_token.next0_ is not None and a.end_token.next0_.is_char(')') and begin.previous is not None) and begin.previous.is_char('(')): 
                         begin = begin.previous
                 if (a.typ != PersonAttrTerminType.PREFIX): 
                     if (a.age is not None): 
                         p.add_slot(PersonReferent.ATTR_AGE, a.age, False, 0)
                     if (a.prop_ref is None): 
                         p.add_slot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                     else: 
                         p.add_slot(PersonReferent.ATTR_ATTR, a, False, 0)
                 elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                     p.is_female = True
                 elif (a.gender == MorphGender.MASCULINE and not p.is_male): 
                     p.is_male = True
     elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): 
         if (begin.previous.term == "ИП"): 
             a = PersonAttrToken(begin.previous, begin.previous)
             a.prop_ref = PersonPropertyReferent()
             a.prop_ref.name = "индивидуальный предприниматель"
             p.add_slot(PersonReferent.ATTR_ATTR, a, False, 0)
             begin = begin.previous
     m0 = MorphCollection()
     for it in morph_.items: 
         bi = MorphBaseInfo()
         bi.copy_from(it)
         bi.number = MorphNumber.SINGULAR
         if (bi.gender == MorphGender.UNDEFINED): 
             if (p.is_male and not p.is_female): 
                 bi.gender = MorphGender.MASCULINE
             if (not p.is_male and p.is_female): 
                 bi.gender = MorphGender.FEMINIE
         m0.add_item(bi)
     morph_ = m0
     if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): 
         morph_.case_ = attrs[0].morph.case_
         if (attrs[0].morph.number == MorphNumber.SINGULAR): 
             morph_.number = MorphNumber.SINGULAR
         if (p.is_male and not p.is_female): 
             morph_.gender = MorphGender.MASCULINE
         elif (p.is_female): 
             morph_.gender = MorphGender.FEMINIE
     if (begin.previous is not None): 
         ttt = begin.previous
         if (ttt.is_value("ИМЕНИ", "ІМЕНІ")): 
             for_attribute = True
         else: 
             if (ttt.is_char('.') and ttt.previous is not None): 
                 ttt = ttt.previous
             if (ttt.whitespaces_after_count < 3): 
                 if (ttt.is_value("ИМ", "ІМ")): 
                     for_attribute = True
     if (for_attribute): 
         return ReferentToken._new2484(p, begin, end, morph_, p._m_person_identity_typ)
     if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.get_referent(), PersonReferent))): 
         rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken)
         ttt = rt00
         while ttt is not None: 
             if (ttt.previous is None or not (isinstance(ttt.previous.previous, ReferentToken))): 
                 break
             if (not ttt.previous.is_comma_and or not (isinstance(ttt.previous.previous.get_referent(), PersonReferent))): 
                 break
             rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken))
             ttt = (rt00)
         if (isinstance(rt00.begin_token.get_referent(), PersonPropertyReferent)): 
             ok = False
             if (rt00.begin_token.end_token.next0_ is not None and rt00.begin_token.end_token.next0_.is_char(':')): 
                 ok = True
             elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): 
                 ok = True
             if (ok): 
                 p.add_slot(PersonReferent.ATTR_ATTR, rt00.begin_token.get_referent(), False, 0)
     if (ad is not None): 
         if (ad.overflow_level > 10): 
             return ReferentToken._new2484(p, begin, end, morph_, p._m_person_identity_typ)
         ad.overflow_level += 1
     attrs1 = None
     has_position = False
     open_br = False
     t = end.next0_
     first_pass3365 = True
     while True:
         if first_pass3365: first_pass3365 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             if (t.newlines_before_count > 2): 
                 break
             if (attrs1 is not None and len(attrs1) > 0): 
                 break
             ml = MailLine.parse(t, 0, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             if (t.chars.is_capital_upper): 
                 attr1 = PersonAttrToken.try_attach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
                 ok1 = False
                 if (attr1 is not None): 
                     if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): 
                         ok1 = True
                     else: 
                         tt2 = t.next0_
                         while tt2 is not None and tt2.end_char <= attr1.end_char: 
                             if (tt2.is_whitespace_before): 
                                 ok1 = True
                             tt2 = tt2.next0_
                 else: 
                     ttt = PersonHelper.__correct_tail_attributes(p, t)
                     if (ttt is not None and ttt != t): 
                         t = ttt
                         end = t
                         continue
                 if (not ok1): 
                     break
         if (t.is_hiphen or t.is_char_of("_>|")): 
             continue
         if (t.is_value("МОДЕЛЬ", None)): 
             break
         tt = PersonHelper.__correct_tail_attributes(p, t)
         if (tt != t and tt is not None): 
             t = tt
             end = t
             continue
         is_be = False
         if (t.is_char('(') and t == end.next0_): 
             open_br = True
             t = t.next0_
             if (t is None): 
                 break
             pit1 = PersonItemToken.try_attach(t, None, PersonItemToken.ParseAttr.NO, None)
             if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.is_char(')')): 
                 if (pit1.lastname is not None): 
                     inf = MorphBaseInfo._new2476(MorphCase.NOMINATIVE)
                     if (p.is_male): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender)
                     if (p.is_female): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender)
                     sur = PersonIdentityToken.create_lastname(pit1, inf)
                     if (sur is not None): 
                         p._add_fio_identity(sur, None, None)
                         t = pit1.end_token.next0_
                         end = t
                         continue
             if ((isinstance(t, TextToken)) and t.chars.is_latin_letter): 
                 pits = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.CANBELATIN, 10)
                 if (((pits is not None and len(pits) >= 2 and len(pits) <= 3) and pits[0].chars.is_latin_letter and pits[1].chars.is_latin_letter) and pits[len(pits) - 1].end_token.next0_ is not None and pits[len(pits) - 1].end_token.next0_.is_char(')')): 
                     pr2 = PersonReferent()
                     cou = 0
                     for pi0_ in pits: 
                         for si in p.slots: 
                             if (si.type_name == PersonReferent.ATTR_FIRSTNAME or si.type_name == PersonReferent.ATTR_MIDDLENAME or si.type_name == PersonReferent.ATTR_LASTNAME): 
                                 if (MiscHelper.can_be_equal_cyr_and_latss(str(si.value), pi0_.value)): 
                                     cou += 1
                                     pr2.add_slot(si.type_name, pi0_.value, False, 0)
                                     break
                     if (cou == len(pits)): 
                         for si in pr2.slots: 
                             p.add_slot(si.type_name, si.value, False, 0)
                         t = pits[len(pits) - 1].end_token.next0_
                         end = t
                         continue
         elif (t.is_comma): 
             t = t.next0_
             if ((isinstance(t, TextToken)) and t.is_value("WHO", None)): 
                 continue
             if ((isinstance(t, TextToken)) and t.chars.is_latin_letter): 
                 pits = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.CANBELATIN, 10)
                 if ((pits is not None and len(pits) >= 2 and len(pits) <= 3) and pits[0].chars.is_latin_letter and pits[1].chars.is_latin_letter): 
                     pr2 = PersonReferent()
                     cou = 0
                     for pi0_ in pits: 
                         for si in p.slots: 
                             if (si.type_name == PersonReferent.ATTR_FIRSTNAME or si.type_name == PersonReferent.ATTR_MIDDLENAME or si.type_name == PersonReferent.ATTR_LASTNAME): 
                                 if (MiscHelper.can_be_equal_cyr_and_latss(str(si.value), pi0_.value)): 
                                     cou += 1
                                     pr2.add_slot(si.type_name, pi0_.value, False, 0)
                                     break
                     if (cou == len(pits)): 
                         for si in pr2.slots: 
                             p.add_slot(si.type_name, si.value, False, 0)
                         t = pits[len(pits) - 1].end_token
                         end = t
                         continue
         elif ((isinstance(t, TextToken)) and t.is_verb_be): 
             t = t.next0_
         elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): 
             if (t == end.next0_): 
                 break
             t = t.next0_
         elif (t.is_hiphen and t == end.next0_): 
             t = t.next0_
         elif (t.is_char('.') and t == end.next0_ and has_prefix): 
             t = t.next0_
         ttt2 = PersonHelper.create_nickname(p, t)
         if (ttt2 is not None): 
             end = ttt2
             t = end
             continue
         if (t is None): 
             break
         attr = None
         attr = PersonAttrToken.try_attach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (attr is None): 
             if ((t is not None and t.get_referent() is not None and t.get_referent().type_name == "GEO") and attrs1 is not None and open_br): 
                 continue
             if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.is_char(')')): 
                 if (p.find_slot(PersonReferent.ATTR_LASTNAME, None, True) is None): 
                     p.add_slot(PersonReferent.ATTR_LASTNAME, t.get_source_text().upper(), False, 0)
                     t = t.next0_
                     end = t
             if (t is not None and t.is_value("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): 
                 if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): 
                     p.is_female = True
                     p._correct_data()
                 elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): 
                     p.is_male = True
                     p._correct_data()
             break
         if (attr.morph.number == MorphNumber.PLURAL): 
             break
         if (attr.typ == PersonAttrTerminType.BESTREGARDS): 
             break
         if (attr.is_doubt): 
             if (has_prefix): 
                 pass
             elif (t.is_newline_before and attr.is_newline_after): 
                 pass
             elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.is_char(':')))): 
                 pass
             else: 
                 break
         if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): 
             if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): 
                 break
         if (open_br): 
             if (PersonAnalyzer._try_attach_person(t, ad, False, 0, True) is not None): 
                 break
         if (attrs1 is None): 
             if (t.previous.is_comma and t.previous == end.next0_): 
                 ttt = attr.end_token.next0_
                 if (ttt is not None): 
                     if (ttt.morph.class0_.is_verb): 
                         if (MiscHelper.can_be_start_of_sentence(begin)): 
                             pass
                         else: 
                             break
             attrs1 = list()
         attrs1.append(attr)
         if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): 
             if (not is_be): 
                 has_position = True
         elif (attr.typ != PersonAttrTerminType.PREFIX): 
             if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): 
                 pass
             else: 
                 attrs1 = (None)
                 break
         t = attr.end_token
     if (attrs1 is not None and has_position and attrs is not None): 
         te1 = attrs[len(attrs) - 1].end_token.next0_
         te2 = attrs1[0].begin_token
         if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): 
             pass
         elif (attrs1[0].age is not None): 
             pass
         elif (((te1.is_hiphen or te1.is_char(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): 
             pass
         else: 
             for a in attrs: 
                 if (a.typ == PersonAttrTerminType.POSITION): 
                     te = attrs1[len(attrs1) - 1].end_token
                     if (te.next0_ is not None): 
                         if (not te.next0_.is_char('.')): 
                             attrs1 = (None)
                             break
     if (attrs1 is not None and not has_prefix): 
         attr = attrs1[len(attrs1) - 1]
         ok = False
         if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): 
             ok = True
         else: 
             rt = PersonAnalyzer._try_attach_person(attr.begin_token, ad, False, -1, False)
             if (rt is not None and (isinstance(rt.referent, PersonReferent))): 
                 ok = True
         if (ok): 
             if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): 
                 attrs1 = (None)
             elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): 
                 rt1 = PersonAnalyzer._try_attach_person(attr.begin_token, ad, False, -1, False)
                 if (rt1 is not None): 
                     attrs1 = (None)
     if (attrs1 is not None): 
         for a in attrs1: 
             if (a.typ != PersonAttrTerminType.PREFIX): 
                 if (a.age is not None): 
                     p.add_slot(PersonReferent.ATTR_AGE, a.age, True, 0)
                 elif (a.prop_ref is None): 
                     p.add_slot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                 else: 
                     p.add_slot(PersonReferent.ATTR_ATTR, a, False, 0)
                 end = a.end_token
                 if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): 
                     if (a.gender == MorphGender.MASCULINE and not p.is_male): 
                         p.is_male = True
                         p._correct_data()
                     elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                         p.is_female = True
                         p._correct_data()
         if (open_br): 
             if (end.next0_ is not None and end.next0_.is_char(')')): 
                 end = end.next0_
     crlf_cou = 0
     t = end.next0_
     first_pass3366 = True
     while True:
         if first_pass3366: first_pass3366 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             ml = MailLine.parse(t, 0, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             crlf_cou += 1
         if (t.is_char_of(":,(") or t.is_hiphen): 
             continue
         if (t.is_char('.') and t == end.next0_): 
             continue
         r = t.get_referent()
         if (r is not None): 
             if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): 
                 ty = r.get_string_value("SCHEME")
                 if (r.type_name == "URI"): 
                     if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): 
                         break
                 p._add_contact(r)
                 end = t
                 crlf_cou = 0
                 continue
         if (isinstance(r, PersonIdentityReferent)): 
             p.add_slot(PersonReferent.ATTR_IDDOC, r, False, 0)
             end = t
             crlf_cou = 0
             continue
         if (r is not None and r.type_name == "ORGANIZATION"): 
             if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): 
                 break
             if (begin.previous is not None and begin.previous.morph.class0_.is_verb): 
                 break
             if (t.whitespaces_after_count == 1): 
                 break
             exist = False
             for s in p.slots: 
                 if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): 
                     pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent)
                     if (pr.find_slot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
                 elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): 
                     pr = Utils.asObjectOrNull(s.value, PersonAttrToken)
                     if (pr.referent.find_slot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
             if (not exist): 
                 pat = PersonAttrToken(t, t)
                 pat.prop_ref = PersonPropertyReferent._new2446("сотрудник")
                 pat.prop_ref.add_slot(PersonPropertyReferent.ATTR_REF, r, False, 0)
                 p.add_slot(PersonReferent.ATTR_ATTR, pat, False, 0)
             continue
         if (r is not None): 
             break
         if (not has_prefix or crlf_cou >= 2): 
             break
         rt = t.kit.process_referent("PERSON", t)
         if (rt is not None): 
             break
     if (ad is not None): 
         ad.overflow_level -= 1
     if (begin.is_value("НА", None) and begin.next0_ is not None and begin.next0_.is_value("ИМЯ", None)): 
         t0 = begin.previous
         if (t0 is not None and t0.is_comma): 
             t0 = t0.previous
         if (t0 is not None and (isinstance(t0.get_referent(), PersonIdentityReferent))): 
             p.add_slot(PersonReferent.ATTR_IDDOC, t0.get_referent(), False, 0)
     return ReferentToken._new2484(p, begin, end, morph_, p._m_person_identity_typ)
예제 #2
0
class TextToken(Token):
    """ Входной токен (после морфанализа)
    Текстовой токен
    """
    def __init__(self,
                 source: 'MorphToken',
                 kit_: 'AnalysisKit',
                 bchar: int = -1,
                 echar: int = -1) -> None:
        super().__init__(kit_, (bchar if bchar >= 0 else
                                (0 if source is None else source.begin_char)),
                         (echar if echar >= 0 else
                          (0 if source is None else source.end_char)))
        self.term = None
        self.lemma = None
        self.term0 = None
        self.invariant_prefix_length_of_morph_vars = 0
        self.max_length_of_morph_vars = 0
        if (source is None):
            return
        self.chars = source.char_info
        self.term = source.term
        self.lemma = (Utils.ifNotNull(source.get_lemma(), self.term))
        self.max_length_of_morph_vars = (len(self.term))
        self.morph = MorphCollection()
        if (source.word_forms is not None):
            for wf in source.word_forms:
                self.morph.add_item(wf)
                if (wf.normal_case is not None and
                    (self.max_length_of_morph_vars < len(wf.normal_case))):
                    self.max_length_of_morph_vars = (len(wf.normal_case))
                if (wf.normal_full is not None and
                    (self.max_length_of_morph_vars < len(wf.normal_full))):
                    self.max_length_of_morph_vars = (len(wf.normal_full))
        i = 0
        while i < len(self.term):
            ch = self.term[i]
            j = 0
            j = 0
            while j < self.morph.items_count:
                wf = Utils.asObjectOrNull(self.morph.get_indexer_item(j),
                                          MorphWordForm)
                if (wf.normal_case is not None):
                    if (i >= len(wf.normal_case)):
                        break
                    if (wf.normal_case[i] != ch):
                        break
                if (wf.normal_full is not None):
                    if (i >= len(wf.normal_full)):
                        break
                    if (wf.normal_full[i] != ch):
                        break
                j += 1
            if (j < self.morph.items_count):
                break
            self.invariant_prefix_length_of_morph_vars = ((i + 1))
            i += 1
        if (self.morph.language.is_undefined
                and not source.language.is_undefined):
            self.morph.language = source.language

    def __str__(self) -> str:
        res = Utils.newStringIO(self.term)
        for l_ in self.morph.items:
            print(", {0}".format(str(l_)), end="", file=res, flush=True)
        return Utils.toStringStringIO(res)

    def check_value(self, dict0_: typing.List[tuple]) -> object:
        """ Попробовать привязать словарь
        
        Args:
            dict0_(typing.List[tuple]): 
        
        """
        if (dict0_ is None):
            return None
        res = None
        wrapres2868 = RefOutArgWrapper(None)
        inoutres2869 = Utils.tryGetValue(dict0_, self.term, wrapres2868)
        res = wrapres2868.value
        if (inoutres2869):
            return res
        if (self.morph is not None):
            for it in self.morph.items:
                mf = Utils.asObjectOrNull(it, MorphWordForm)
                if (mf is not None):
                    if (mf.normal_case is not None):
                        wrapres2864 = RefOutArgWrapper(None)
                        inoutres2865 = Utils.tryGetValue(
                            dict0_, mf.normal_case, wrapres2864)
                        res = wrapres2864.value
                        if (inoutres2865):
                            return res
                    if (mf.normal_full is not None
                            and mf.normal_case != mf.normal_full):
                        wrapres2866 = RefOutArgWrapper(None)
                        inoutres2867 = Utils.tryGetValue(
                            dict0_, mf.normal_full, wrapres2866)
                        res = wrapres2866.value
                        if (inoutres2867):
                            return res
        return None

    def get_source_text(self) -> str:
        return super().get_source_text()

    def is_value(self, term_: str, termua: str = None) -> bool:
        if (termua is not None and self.morph.language.is_ua):
            if (self.is_value(termua, None)):
                return True
        if (term_ is None):
            return False
        if (self.invariant_prefix_length_of_morph_vars > len(term_)):
            return False
        if (self.max_length_of_morph_vars >= len(self.term)
                and (self.max_length_of_morph_vars < len(term_))):
            return False
        if (term_ == self.term):
            return True
        for wf in self.morph.items:
            if ((isinstance(wf, MorphWordForm)) and
                ((wf.normal_case == term_ or wf.normal_full == term_))):
                return True
        return False

    @property
    def is_and(self) -> bool:
        """ Это соединительный союз И (на всех языках) """
        if (not self.morph.class0_.is_conjunction):
            if (self.length_char == 1 and self.is_char('&')):
                return True
            return False
        val = self.term
        if (val == "И" or val == "AND" or val == "UND"):
            return True
        if (self.morph.language.is_ua):
            if (val == "І" or val == "ТА"):
                return True
        return False

    @property
    def is_or(self) -> bool:
        """ Это соединительный союз ИЛИ (на всех языках) """
        if (not self.morph.class0_.is_conjunction):
            return False
        val = self.term
        if (val == "ИЛИ" or val == "ЛИБО" or val == "OR"):
            return True
        if (self.morph.language.is_ua):
            if (val == "АБО"):
                return True
        return False

    @property
    def is_letters(self) -> bool:
        return str.isalpha(self.term[0])

    def get_morph_class_in_dictionary(self) -> 'MorphClass':
        res = MorphClass()
        for wf in self.morph.items:
            if ((isinstance(wf, MorphWordForm)) and wf.is_in_dictionary):
                res |= wf.class0_
        return res

    def get_normal_case_text(self,
                             mc: 'MorphClass' = None,
                             num: 'MorphNumber' = MorphNumber.UNDEFINED,
                             gender: 'MorphGender' = MorphGender.UNDEFINED,
                             keep_chars: bool = False) -> str:
        from pullenti.ner.core.MiscHelper import MiscHelper
        empty = True
        if (mc is not None and mc.is_preposition):
            return LanguageHelper.normalize_preposition(self.term)
        for it in self.morph.items:
            if (mc is not None and not mc.is_undefined):
                cc = (it.class0_) & mc
                if (cc.is_undefined):
                    continue
                if (cc.is_misc and not cc.is_proper and mc != it.class0_):
                    continue
            wf = Utils.asObjectOrNull(it, MorphWordForm)
            normal_full = False
            if (gender != MorphGender.UNDEFINED):
                if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)):
                    if ((gender == MorphGender.MASCULINE and
                         ((it.gender != MorphGender.UNDEFINED or it.number
                           == MorphNumber.PLURAL)) and wf is not None)
                            and wf.normal_full is not None):
                        normal_full = True
                    elif (gender == MorphGender.MASCULINE
                          and it.class0_.is_personal_pronoun):
                        pass
                    else:
                        continue
            if (not it.case_.is_undefined):
                empty = False
            if (wf is not None):
                res = None
                if (num == MorphNumber.SINGULAR
                        and it.number == MorphNumber.PLURAL
                        and wf.normal_full is not None):
                    le = len(wf.normal_case)
                    if ((le == (len(wf.normal_full) + 2) and le > 4
                         and wf.normal_case[le - 2] == 'С')
                            and wf.normal_case[le - 1] == 'Я'):
                        res = wf.normal_case
                    else:
                        res = (wf.normal_full
                               if normal_full else wf.normal_full)
                else:
                    res = (wf.normal_full if normal_full else
                           (Utils.ifNotNull(wf.normal_case, self.term)))
                if (num == MorphNumber.SINGULAR and mc is not None
                        and mc == MorphClass.NOUN):
                    if (res == "ДЕТИ"):
                        res = "РЕБЕНОК"
                if (keep_chars):
                    if (self.chars.is_all_lower):
                        res = res.lower()
                    elif (self.chars.is_capital_upper):
                        res = MiscHelper.convert_first_char_upper_and_other_lower(
                            res)
                return res
        if (not empty):
            return None
        te = None
        if (num == MorphNumber.SINGULAR and mc is not None):
            bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender,
                                       MorphNumber.SINGULAR,
                                       self.morph.language)
            vars0_ = MorphologyService.get_wordform(self.term, bi)
            if (vars0_ is not None):
                te = vars0_
        if (te is None):
            te = self.term
        if (keep_chars):
            if (self.chars.is_all_lower):
                return te.lower()
            elif (self.chars.is_capital_upper):
                return MiscHelper.convert_first_char_upper_and_other_lower(te)
        return te

    @staticmethod
    def get_source_text_tokens(begin: 'Token',
                               end: 'Token') -> typing.List['TextToken']:
        from pullenti.ner.MetaToken import MetaToken
        res = list()
        t = begin
        while t is not None and t != end.next0_ and t.end_char <= end.end_char:
            if (isinstance(t, TextToken)):
                res.append(Utils.asObjectOrNull(t, TextToken))
            elif (isinstance(t, MetaToken)):
                res.extend(
                    TextToken.get_source_text_tokens(t.begin_token,
                                                     t.end_token))
            t = t.next0_
        return res

    @property
    def is_pure_verb(self) -> bool:
        """ Признак того, что это чистый глагол """
        ret = False
        if ((self.is_value("МОЖНО", None) or self.is_value("МОЖЕТ", None)
             or self.is_value("ДОЛЖНЫЙ", None))
                or self.is_value("НУЖНО", None)):
            return True
        for it in self.morph.items:
            if ((isinstance(it, MorphWordForm)) and it.is_in_dictionary):
                if (it.class0_.is_verb and it.case_.is_undefined):
                    ret = True
                elif (not it.class0_.is_verb):
                    if (it.class0_.is_adjective
                            and it.contains_attr("к.ф.", None)):
                        pass
                    else:
                        return False
        return ret

    @property
    def is_verb_be(self) -> bool:
        """ Проверка, что это глагол типа БЫТЬ, ЯВЛЯТЬСЯ и т.п. """
        if ((self.is_value("БЫТЬ", None) or self.is_value("ЕСТЬ", None)
             or self.is_value("ЯВЛЯТЬ", None)) or self.is_value("BE", None)):
            return True
        if (self.term == "IS" or self.term == "WAS" or self.term == "BECAME"):
            return True
        if (self.term == "Є"):
            return True
        return False

    def _serialize(self, stream: Stream) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        super()._serialize(stream)
        SerializerHelper.serialize_string(stream, self.term)
        SerializerHelper.serialize_string(stream, self.lemma)
        SerializerHelper.serialize_short(
            stream, self.invariant_prefix_length_of_morph_vars)
        SerializerHelper.serialize_short(stream, self.max_length_of_morph_vars)

    def _deserialize(self, stream: Stream, kit_: 'AnalysisKit',
                     vers: int) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        super()._deserialize(stream, kit_, vers)
        self.term = SerializerHelper.deserialize_string(stream)
        self.lemma = SerializerHelper.deserialize_string(stream)
        self.invariant_prefix_length_of_morph_vars = SerializerHelper.deserialize_short(
            stream)
        self.max_length_of_morph_vars = SerializerHelper.deserialize_short(
            stream)

    @staticmethod
    def _new470(_arg1: 'MorphToken', _arg2: 'AnalysisKit', _arg3: int,
                _arg4: int, _arg5: str) -> 'TextToken':
        res = TextToken(_arg1, _arg2, _arg3, _arg4)
        res.term0 = _arg5
        return res

    @staticmethod
    def _new473(_arg1: 'MorphToken', _arg2: 'AnalysisKit', _arg3: int,
                _arg4: int, _arg5: 'CharsInfo', _arg6: str) -> 'TextToken':
        res = TextToken(_arg1, _arg2, _arg3, _arg4)
        res.chars = _arg5
        res.term0 = _arg6
        return res