def process_referent(self, begin: 'Token',
                      end: 'Token') -> 'ReferentToken':
     pat = PersonAttrToken.try_attach(
         begin, None, PersonAttrToken.PersonAttrAttachAttrs.INPROCESS)
     if (pat is not None and pat.prop_ref is not None):
         return ReferentToken._new2595(pat.prop_ref, pat.begin_token,
                                       pat.end_token, pat.morph, pat)
     return None
Exemple #2
0
 def isPersonSayOrAttrAfter(t : 'Token') -> bool:
     if (t is None): 
         return False
     tt = PersonHelper.__correctTailAttributes(None, t)
     if (tt is not None and tt != t): 
         return True
     if (t.is_comma and t.next0_ is not None): 
         t = t.next0_
     if (t.chars.is_latin_letter): 
         if (t.isValue("SAY", None) or t.isValue("ASK", None) or t.isValue("WHO", None)): 
             return True
     if (t.isChar('.') and (isinstance(t.next0_, TextToken)) and ((t.next0_.morph.class0_.is_pronoun or t.next0_.morph.class0_.is_personal_pronoun))): 
         if (t.next0_.morph.gender == MorphGender.FEMINIE or t.next0_.morph.gender == MorphGender.MASCULINE): 
             return True
     if (t.is_comma and t.next0_ is not None): 
         t = t.next0_
     if (PersonAttrToken.tryAttach(t, None, PersonAttrToken.PersonAttrAttachAttrs.NO) is not None): 
         return True
     return False
Exemple #3
0
 def _createReferentToken(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken':
     from pullenti.ner.person.internal.PersonIdentityToken import PersonIdentityToken
     if (p is None): 
         return None
     has_prefix = False
     if (attrs is not None): 
         for a in attrs: 
             if (a.typ == PersonAttrTerminType.BESTREGARDS): 
                 has_prefix = True
             else: 
                 if (a.begin_char < begin.begin_char): 
                     begin = a.begin_token
                 if (a.typ != PersonAttrTerminType.PREFIX): 
                     if (a.age is not None): 
                         p.addSlot(PersonReferent.ATTR_AGE, a.age, False, 0)
                     if (a.prop_ref is None): 
                         p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                     else: 
                         p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
                 elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                     p.is_female = True
                 elif (a.gender == MorphGender.MASCULINE and not p.is_male): 
                     p.is_male = True
     elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): 
         if ((begin.previous).term == "ИП"): 
             a = PersonAttrToken(begin.previous, begin.previous)
             a.prop_ref = PersonPropertyReferent()
             a.prop_ref.name = "индивидуальный предприниматель"
             p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
             begin = begin.previous
     m0 = MorphCollection()
     for it in morph_.items: 
         bi = MorphBaseInfo(it)
         bi.number = MorphNumber.SINGULAR
         if (bi.gender == MorphGender.UNDEFINED): 
             if (p.is_male and not p.is_female): 
                 bi.gender = MorphGender.MASCULINE
             if (not p.is_male and p.is_female): 
                 bi.gender = MorphGender.FEMINIE
         m0.addItem(bi)
     morph_ = m0
     if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): 
         morph_.case_ = attrs[0].morph.case_
         if (attrs[0].morph.number == MorphNumber.SINGULAR): 
             morph_.number = MorphNumber.SINGULAR
         if (p.is_male and not p.is_female): 
             morph_.gender = MorphGender.MASCULINE
         elif (p.is_female): 
             morph_.gender = MorphGender.FEMINIE
     if (begin.previous is not None): 
         ttt = begin.previous
         if (ttt.isValue("ИМЕНИ", "ІМЕНІ")): 
             for_attribute = True
         else: 
             if (ttt.isChar('.') and ttt.previous is not None): 
                 ttt = ttt.previous
             if (ttt.whitespaces_after_count < 3): 
                 if (ttt.isValue("ИМ", "ІМ")): 
                     for_attribute = True
     if (for_attribute): 
         return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
     if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.getReferent(), PersonReferent))): 
         rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken)
         ttt = rt00
         while ttt is not None: 
             if (ttt.previous is None or not ((isinstance(ttt.previous.previous, ReferentToken)))): 
                 break
             if (not ttt.previous.is_comma_and or not ((isinstance(ttt.previous.previous.getReferent(), PersonReferent)))): 
                 break
             rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken))
             ttt = (rt00)
         if (isinstance(rt00.begin_token.getReferent(), PersonPropertyReferent)): 
             ok = False
             if ((rt00.begin_token).end_token.next0_ is not None and (rt00.begin_token).end_token.next0_.isChar(':')): 
                 ok = True
             elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): 
                 ok = True
             if (ok): 
                 p.addSlot(PersonReferent.ATTR_ATTR, rt00.begin_token.getReferent(), False, 0)
     if (ad is not None): 
         if (ad.overflow_level > 10): 
             return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
         ad.overflow_level += 1
     attrs1 = None
     has_position = False
     open_br = False
     t = end.next0_
     first_pass3095 = True
     while True:
         if first_pass3095: first_pass3095 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             if (t.newlines_before_count > 2): 
                 break
             if (attrs1 is not None and len(attrs1) > 0): 
                 break
             ml = MailLine.parse(t, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             if (t.chars.is_capital_upper): 
                 attr1 = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
                 ok1 = False
                 if (attr1 is not None): 
                     if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): 
                         ok1 = True
                     else: 
                         tt2 = t.next0_
                         while tt2 is not None and tt2.end_char <= attr1.end_char: 
                             if (tt2.is_whitespace_before): 
                                 ok1 = True
                             tt2 = tt2.next0_
                 else: 
                     ttt = PersonHelper.__correctTailAttributes(p, t)
                     if (ttt is not None and ttt != t): 
                         t = ttt
                         end = t
                         continue
                 if (not ok1): 
                     break
         if (t.is_hiphen or t.isCharOf("_>|")): 
             continue
         if (t.isValue("МОДЕЛЬ", None)): 
             break
         tt = PersonHelper.__correctTailAttributes(p, t)
         if (tt != t and tt is not None): 
             t = tt
             end = t
             continue
         is_be = False
         if (t.isChar('(') and t == end.next0_): 
             open_br = True
             t = t.next0_
             if (t is None): 
                 break
             pit1 = PersonItemToken.tryAttach(t, None, PersonItemToken.ParseAttr.NO, None)
             if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.isChar(')')): 
                 if (pit1.lastname is not None): 
                     inf = MorphBaseInfo._new2321(MorphCase.NOMINATIVE)
                     if (p.is_male): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender)
                     if (p.is_female): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender)
                     sur = PersonIdentityToken.createLastname(pit1, inf)
                     if (sur is not None): 
                         p._addFioIdentity(sur, None, None)
                         t = pit1.end_token.next0_
                         end = t
                         continue
         elif (t.is_comma): 
             t = t.next0_
             if ((isinstance(t, TextToken)) and (t).isValue("WHO", None)): 
                 continue
         elif ((isinstance(t, TextToken)) and (t).is_verb_be): 
             t = t.next0_
         elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): 
             if (t == end.next0_): 
                 break
             t = t.next0_
         elif (t.is_hiphen and t == end.next0_): 
             t = t.next0_
         elif (t.isChar('.') and t == end.next0_ and has_prefix): 
             t = t.next0_
         ttt2 = PersonHelper.createNickname(p, t)
         if (ttt2 is not None): 
             end = ttt2
             t = end
             continue
         if (t is None): 
             break
         attr = None
         attr = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (attr is None): 
             if ((t is not None and t.getReferent() is not None and t.getReferent().type_name == "GEO") and attrs1 is not None and open_br): 
                 continue
             if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.isChar(')')): 
                 if (p.findSlot(PersonReferent.ATTR_LASTNAME, None, True) is None): 
                     p.addSlot(PersonReferent.ATTR_LASTNAME, t.getSourceText().upper(), False, 0)
                     t = t.next0_
                     end = t
             if (t is not None and t.isValue("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): 
                 if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): 
                     p.is_female = True
                     p._correctData()
                 elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): 
                     p.is_male = True
                     p._correctData()
             break
         if (attr.morph.number == MorphNumber.PLURAL): 
             break
         if (attr.typ == PersonAttrTerminType.BESTREGARDS): 
             break
         if (attr.is_doubt): 
             if (has_prefix): 
                 pass
             elif (t.is_newline_before and attr.is_newline_after): 
                 pass
             elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.isChar(':')))): 
                 pass
             else: 
                 break
         if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): 
             if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): 
                 break
         if (open_br): 
             if (PersonAnalyzer._tryAttachPerson(t, ad, False, 0, True) is not None): 
                 break
         if (attrs1 is None): 
             if (t.previous.is_comma and t.previous == end.next0_): 
                 ttt = attr.end_token.next0_
                 if (ttt is not None): 
                     if (ttt.morph.class0_.is_verb): 
                         if (MiscHelper.canBeStartOfSentence(begin)): 
                             pass
                         else: 
                             break
             attrs1 = list()
         attrs1.append(attr)
         if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): 
             if (not is_be): 
                 has_position = True
         elif (attr.typ != PersonAttrTerminType.PREFIX): 
             if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): 
                 pass
             else: 
                 attrs1 = (None)
                 break
         t = attr.end_token
     if (attrs1 is not None and has_position and attrs is not None): 
         te1 = attrs[len(attrs) - 1].end_token.next0_
         te2 = attrs1[0].begin_token
         if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): 
             pass
         elif (attrs1[0].age is not None): 
             pass
         elif (((te1.is_hiphen or te1.isChar(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): 
             pass
         else: 
             for a in attrs: 
                 if (a.typ == PersonAttrTerminType.POSITION): 
                     te = attrs1[len(attrs1) - 1].end_token
                     if (te.next0_ is not None): 
                         if (not te.next0_.isChar('.')): 
                             attrs1 = (None)
                             break
     if (attrs1 is not None and not has_prefix): 
         attr = attrs1[len(attrs1) - 1]
         ok = False
         if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): 
             ok = True
         else: 
             rt = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False)
             if (rt is not None and (isinstance(rt.referent, PersonReferent))): 
                 ok = True
         if (ok): 
             if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): 
                 attrs1 = (None)
             elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): 
                 rt1 = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False)
                 if (rt1 is not None): 
                     attrs1 = (None)
     if (attrs1 is not None): 
         for a in attrs1: 
             if (a.typ != PersonAttrTerminType.PREFIX): 
                 if (a.age is not None): 
                     p.addSlot(PersonReferent.ATTR_AGE, a.age, True, 0)
                 elif (a.prop_ref is None): 
                     p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                 else: 
                     p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
                 end = a.end_token
                 if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): 
                     if (a.gender == MorphGender.MASCULINE and not p.is_male): 
                         p.is_male = True
                         p._correctData()
                     elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                         p.is_female = True
                         p._correctData()
         if (open_br): 
             if (end.next0_ is not None and end.next0_.isChar(')')): 
                 end = end.next0_
     crlf_cou = 0
     t = end.next0_
     first_pass3096 = True
     while True:
         if first_pass3096: first_pass3096 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             ml = MailLine.parse(t, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             crlf_cou += 1
         if (t.isCharOf(":,(") or t.is_hiphen): 
             continue
         if (t.isChar('.') and t == end.next0_): 
             continue
         r = t.getReferent()
         if (r is not None): 
             if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): 
                 ty = r.getStringValue("SCHEME")
                 if (r.type_name == "URI"): 
                     if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): 
                         break
                 p._addContact(r)
                 end = t
                 crlf_cou = 0
                 continue
         if (isinstance(r, PersonIdentityReferent)): 
             p.addSlot(PersonReferent.ATTR_IDDOC, r, False, 0)
             end = t
             crlf_cou = 0
             continue
         if (r is not None and r.type_name == "ORGANIZATION"): 
             if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): 
                 break
             if (begin.previous is not None and begin.previous.morph.class0_.is_verb): 
                 break
             if (t.whitespaces_after_count == 1): 
                 break
             exist = False
             for s in p.slots: 
                 if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): 
                     pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent)
                     if (pr.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
                 elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): 
                     pr = Utils.asObjectOrNull(s.value, PersonAttrToken)
                     if (pr.referent.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
             if (not exist): 
                 pat = PersonAttrToken(t, t)
                 pat.prop_ref = PersonPropertyReferent._new2291("сотрудник")
                 pat.prop_ref.addSlot(PersonPropertyReferent.ATTR_REF, r, False, 0)
                 p.addSlot(PersonReferent.ATTR_ATTR, pat, False, 0)
             continue
         if (r is not None): 
             break
         if (not has_prefix or crlf_cou >= 2): 
             break
         rt = t.kit.processReferent("PERSON", t)
         if (rt is not None): 
             break
     if (ad is not None): 
         ad.overflow_level -= 1
     return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
 def __try_parse(t: 'Token', prev: 'PersonIdToken') -> 'PersonIdToken':
     if (t.is_value("СВИДЕТЕЛЬСТВО", None)):
         tt1 = t
         ip = False
         reg = False
         tt = t.next0_
         first_pass3372 = True
         while True:
             if first_pass3372: first_pass3372 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if (tt.is_comma_and or tt.morph.class0_.is_preposition):
                 continue
             if (tt.is_value("РЕГИСТРАЦИЯ", None)
                     or tt.is_value("РЕЕСТР", None)
                     or tt.is_value("ЗАРЕГИСТРИРОВАТЬ", None)):
                 reg = True
                 tt1 = tt
             elif (tt.is_value("ИНДИВИДУАЛЬНЫЙ", None)
                   or tt.is_value("ИП", None)):
                 ip = True
                 tt1 = tt
             elif ((tt.is_value("ВНЕСЕНИЕ", None)
                    or tt.is_value("ГОСУДАРСТВЕННЫЙ", None)
                    or tt.is_value("ЕДИНЫЙ", None))
                   or tt.is_value("ЗАПИСЬ", None)
                   or tt.is_value("ПРЕДПРИНИМАТЕЛЬ", None)):
                 tt1 = tt
             elif (tt.get_referent() is not None
                   and tt.get_referent().type_name == "DATERANGE"):
                 tt1 = tt
             else:
                 break
         if (reg and ip):
             return PersonIdToken._new2505(
                 t, tt1, PersonIdToken.Typs.KEYWORD,
                 "СВИДЕТЕЛЬСТВО О ГОСУДАРСТВЕННОЙ РЕГИСТРАЦИИ ФИЗИЧЕСКОГО ЛИЦА В КАЧЕСТВЕ ИНДИВИДУАЛЬНОГО ПРЕДПРИНИМАТЕЛЯ"
             )
     tok = PersonIdToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
     if (tok is not None):
         ty = Utils.valToEnum(tok.termin.tag, PersonIdToken.Typs)
         res = PersonIdToken._new2505(tok.begin_token, tok.end_token, ty,
                                      tok.termin.canonic_text)
         if (prev is None):
             if (ty != PersonIdToken.Typs.KEYWORD):
                 return None
             t = tok.end_token.next0_
             first_pass3373 = True
             while True:
                 if first_pass3373: first_pass3373 = False
                 else: t = t.next0_
                 if (not (t is not None)): break
                 r = t.get_referent()
                 if (r is not None and (isinstance(r, GeoReferent))):
                     res.referent = r
                     res.end_token = t
                     continue
                 if (t.is_value("ГРАЖДАНИН", None) and t.next0_ is not None
                         and
                     (isinstance(t.next0_.get_referent(), GeoReferent))):
                     res.referent = t.next0_.get_referent()
                     res.end_token = t.next0_
                     t = res.end_token
                     continue
                 if (r is not None):
                     break
                 ait = PersonAttrToken.try_attach(
                     t, None, PersonAttrToken.PersonAttrAttachAttrs.NO)
                 if (ait is not None):
                     if (ait.referent is not None):
                         for s in ait.referent.slots:
                             if (s.type_name
                                     == PersonPropertyReferent.ATTR_REF and
                                 (isinstance(s.value, GeoReferent))):
                                 res.referent = (Utils.asObjectOrNull(
                                     s.value, Referent))
                     res.end_token = ait.end_token
                     break
                 if (t.is_value("ДАННЫЙ", None)):
                     res.end_token = t
                     continue
                 break
             if ((isinstance(res.referent, GeoReferent))
                     and not res.referent.is_state):
                 res.referent = (None)
             return res
         if (ty == PersonIdToken.Typs.NUMBER):
             tmp = io.StringIO()
             tt = tok.end_token.next0_
             if (tt is not None and tt.is_char(':')):
                 tt = tt.next0_
             while tt is not None:
                 if (tt.is_newline_before):
                     break
                 if (not (isinstance(tt, NumberToken))):
                     break
                 print(tt.get_source_text(), end="", file=tmp)
                 res.end_token = tt
                 tt = tt.next0_
             if (tmp.tell() < 1):
                 return None
             res.value = Utils.toStringStringIO(tmp)
             res.has_prefix = True
             return res
         if (ty == PersonIdToken.Typs.SERIA):
             tmp = io.StringIO()
             tt = tok.end_token.next0_
             if (tt is not None and tt.is_char(':')):
                 tt = tt.next0_
             next_num = False
             first_pass3374 = True
             while True:
                 if first_pass3374: first_pass3374 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (MiscHelper.check_number_prefix(tt) is not None):
                     next_num = True
                     break
                 if (not (isinstance(tt, NumberToken))):
                     if (not (isinstance(tt, TextToken))):
                         break
                     if (not tt.chars.is_all_upper):
                         break
                     nu = NumberHelper.try_parse_roman(tt)
                     if (nu is not None):
                         print(nu.get_source_text(), end="", file=tmp)
                         tt = nu.end_token
                     elif (tt.length_char != 2):
                         break
                     else:
                         print(tt.term, end="", file=tmp)
                         res.end_token = tt
                     if (tt.next0_ is not None and tt.next0_.is_hiphen):
                         tt = tt.next0_
                     continue
                 if (tmp.tell() >= 4):
                     break
                 print(tt.get_source_text(), end="", file=tmp)
                 res.end_token = tt
             if (tmp.tell() < 4):
                 if (tmp.tell() < 2):
                     return None
                 tt1 = res.end_token.next0_
                 if (tt1 is not None and tt1.is_comma):
                     tt1 = tt1.next0_
                 next0__ = PersonIdToken.__try_parse(tt1, res)
                 if (next0__ is not None
                         and next0__.typ == PersonIdToken.Typs.NUMBER):
                     pass
                 else:
                     return None
             res.value = Utils.toStringStringIO(tmp)
             res.has_prefix = True
             return res
         if (ty == PersonIdToken.Typs.CODE):
             tt = res.end_token.next0_
             first_pass3375 = True
             while True:
                 if first_pass3375: first_pass3375 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_char_of(":") or tt.is_hiphen):
                     continue
                 if (isinstance(tt, NumberToken)):
                     res.end_token = tt
                     continue
                 break
         if (ty == PersonIdToken.Typs.ADDRESS):
             if (isinstance(t.get_referent(), AddressReferent)):
                 res.referent = t.get_referent()
                 res.end_token = t
                 return res
             tt = res.end_token.next0_
             first_pass3376 = True
             while True:
                 if first_pass3376: first_pass3376 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_char_of(":") or tt.is_hiphen
                         or tt.morph.class0_.is_preposition):
                     continue
                 if (isinstance(tt.get_referent(), AddressReferent)):
                     res.referent = tt.get_referent()
                     res.end_token = tt
                 break
             if (res.referent is None):
                 return None
         return res
     elif (prev is None):
         return None
     t0 = t
     t1 = MiscHelper.check_number_prefix(t0)
     if (t1 is not None):
         t = t1
     if (isinstance(t, NumberToken)):
         tmp = io.StringIO()
         res = PersonIdToken._new2507(t0, t, PersonIdToken.Typs.NUMBER)
         tt = t
         while tt is not None:
             if (tt.is_newline_before or not (isinstance(tt, NumberToken))):
                 break
             print(tt.get_source_text(), end="", file=tmp)
             res.end_token = tt
             tt = tt.next0_
         if (tmp.tell() < 4):
             if (tmp.tell() < 2):
                 return None
             if (prev is None or prev.typ != PersonIdToken.Typs.KEYWORD):
                 return None
             ne = PersonIdToken.__try_parse(res.end_token.next0_, prev)
             if (ne is not None and ne.typ == PersonIdToken.Typs.NUMBER):
                 res.typ = PersonIdToken.Typs.SERIA
             else:
                 return None
         res.value = Utils.toStringStringIO(tmp)
         if (t0 != t):
             res.has_prefix = True
         return res
     if (isinstance(t, ReferentToken)):
         r = t.get_referent()
         if (r is not None):
             if (r.type_name == "DATE"):
                 return PersonIdToken._new2508(t, t,
                                               PersonIdToken.Typs.DATE, r)
             if (r.type_name == "ORGANIZATION"):
                 return PersonIdToken._new2508(t, t, PersonIdToken.Typs.ORG,
                                               r)
             if (r.type_name == "ADDRESS"):
                 return PersonIdToken._new2508(t, t,
                                               PersonIdToken.Typs.ADDRESS,
                                               r)
     if ((prev is not None and prev.typ == PersonIdToken.Typs.KEYWORD and
          (isinstance(t, TextToken))) and not t.chars.is_all_lower
             and t.chars.is_letter):
         rr = PersonIdToken.__try_parse(t.next0_, prev)
         if (rr is not None and rr.typ == PersonIdToken.Typs.NUMBER):
             return PersonIdToken._new2505(t, t, PersonIdToken.Typs.SERIA,
                                           t.term)
     if ((t is not None and t.is_value("ОТ", "ВІД") and
          (isinstance(t.next0_, ReferentToken)))
             and t.next0_.get_referent().type_name == "DATE"):
         return PersonIdToken._new2508(t, t.next0_, PersonIdToken.Typs.DATE,
                                       t.next0_.get_referent())
     return None
Exemple #5
0
 def __calc_rank_and_value(self, min_newlines_count: int) -> bool:
     self.rank = 0
     if (self.begin_token.chars.is_all_lower):
         self.rank -= 30
     words = 0
     up_words = 0
     notwords = 0
     line_number = 0
     tstart = self.begin_token
     tend = self.end_token
     t = self.begin_token
     first_pass3396 = True
     while True:
         if first_pass3396: first_pass3396 = False
         else: t = t.next0_
         if (not (t != self.end_token.next0_ and t is not None
                  and t.end_char <= self.end_token.end_char)):
             break
         if (t.is_newline_before):
             pass
         tit = TitleItemToken.try_attach(t)
         if (tit is not None):
             if (tit.typ == TitleItemToken.Types.THEME
                     or tit.typ == TitleItemToken.Types.TYPANDTHEME):
                 if (t != self.begin_token):
                     if (line_number > 0):
                         return False
                     notwords = 0
                     up_words = notwords
                     words = up_words
                     tstart = tit.end_token.next0_
                 t = tit.end_token
                 if (t.next0_ is None):
                     return False
                 if (t.next0_.chars.is_letter
                         and t.next0_.chars.is_all_lower):
                     self.rank += 20
                 else:
                     self.rank += 100
                 tstart = t.next0_
                 if (tit.typ == TitleItemToken.Types.TYPANDTHEME):
                     self.type_value = tit.value
                 continue
             if (tit.typ == TitleItemToken.Types.TYP):
                 if (t == self.begin_token):
                     if (tit.end_token.is_newline_after):
                         self.type_value = tit.value
                         self.rank += 5
                         tstart = tit.end_token.next0_
                 t = tit.end_token
                 words += 1
                 if (tit.begin_token != tit.end_token):
                     words += 1
                 if (tit.chars.is_all_upper):
                     up_words += 1
                 continue
             if (tit.typ == TitleItemToken.Types.DUST
                     or tit.typ == TitleItemToken.Types.SPECIALITY):
                 if (t == self.begin_token):
                     return False
                 self.rank -= 20
                 if (tit.typ == TitleItemToken.Types.SPECIALITY):
                     self.speciality = tit.value
                 t = tit.end_token
                 continue
             if (tit.typ == TitleItemToken.Types.CONSULTANT
                     or tit.typ == TitleItemToken.Types.BOSS
                     or tit.typ == TitleItemToken.Types.EDITOR):
                 t = tit.end_token
                 if (t.next0_ is not None and
                     ((t.next0_.is_char_of(":") or t.next0_.is_hiphen
                       or t.whitespaces_after_count > 4))):
                     self.rank -= 10
                 else:
                     self.rank -= 2
                 continue
             return False
         blt = BookLinkToken.try_parse(t, 0)
         if (blt is not None):
             if (blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.N
                     or blt.typ == BookLinkTyp.PAGES):
                 self.rank -= 10
             elif (blt.typ == BookLinkTyp.N
                   or blt.typ == BookLinkTyp.PAGERANGE):
                 self.rank -= 20
         if (t == self.begin_token and BookLinkToken.try_parse_author(
                 t, FioTemplateType.UNDEFINED) is not None):
             self.rank -= 20
         if (t.is_newline_before and t != self.begin_token):
             line_number += 1
             if (line_number > 4):
                 return False
             if (t.chars.is_all_lower):
                 self.rank += 10
             elif (t.previous.is_char('.')):
                 self.rank -= 10
             elif (t.previous.is_char_of(",-")):
                 self.rank += 10
             else:
                 npt = NounPhraseHelper.try_parse(t.previous,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.end_char >= t.end_char):
                     self.rank += 10
         if (t != self.begin_token
                 and t.newlines_before_count > min_newlines_count):
             self.rank -= (t.newlines_before_count - min_newlines_count)
         bst = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (bst is not None and bst.is_quote_type
                 and bst.end_token.end_char <= self.end_token.end_char):
             if (words == 0):
                 tstart = bst.begin_token
                 self.rank += 10
                 if (bst.end_token == self.end_token):
                     tend = self.end_token
                     self.rank += 10
         rli = t.get_referents()
         if (rli is not None):
             for r in rli:
                 if (isinstance(r, OrganizationReferent)):
                     if (t.is_newline_before):
                         self.rank -= 10
                     else:
                         self.rank -= 4
                     continue
                 if ((isinstance(r, GeoReferent))
                         or (isinstance(r, PersonReferent))):
                     if (t.is_newline_before):
                         self.rank -= 5
                         if (t.is_newline_after or t.next0_ is None):
                             self.rank -= 20
                         elif (t.next0_.is_hiphen
                               or (isinstance(t.next0_, NumberToken))
                               or (isinstance(t.next0_.get_referent(),
                                              DateReferent))):
                             self.rank -= 20
                         elif (t != self.begin_token):
                             self.rank -= 20
                     continue
                 if ((isinstance(r, GeoReferent))
                         or (isinstance(r, DenominationReferent))):
                     continue
                 if ((isinstance(r, UriReferent))
                         or (isinstance(r, PhoneReferent))):
                     return False
                 if (t.is_newline_before):
                     self.rank -= 4
                 else:
                     self.rank -= 2
                 if (t == self.begin_token and (isinstance(
                         self.end_token.get_referent(), PersonReferent))):
                     self.rank -= 10
             words += 1
             if (t.chars.is_all_upper):
                 up_words += 1
             if (t == self.begin_token):
                 if (t.is_newline_after):
                     self.rank -= 10
                 elif (t.next0_ is not None and t.next0_.is_char('.')
                       and t.next0_.is_newline_after):
                     self.rank -= 10
             continue
         if (isinstance(t, NumberToken)):
             if (t.typ == NumberSpellingType.WORDS):
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
             else:
                 notwords += 1
             continue
         pat = PersonAttrToken.try_attach(
             t, None, PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (pat is not None):
             if (t.is_newline_before):
                 if (not pat.morph.case_.is_undefined
                         and not pat.morph.case_.is_nominative):
                     pass
                 elif (pat.chars.is_all_upper):
                     pass
                 else:
                     self.rank -= 20
             elif (t.chars.is_all_lower):
                 self.rank -= 1
             while t is not None:
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
                 if (t == pat.end_token):
                     break
                 t = t.next0_
             continue
         oitt = OrgItemTypeToken.try_attach(t, True, None)
         if (oitt is not None):
             if (oitt.morph.number != MorphNumber.PLURAL
                     and not oitt.is_doubt_root_word):
                 if (not oitt.morph.case_.is_undefined
                         and not oitt.morph.case_.is_nominative):
                     words += 1
                     if (t.chars.is_all_upper):
                         up_words += 1
                 else:
                     self.rank -= 4
                     if (t == self.begin_token):
                         self.rank -= 5
             else:
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
             t = oitt.end_token
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (tt.is_char('©')):
                 self.rank -= 10
             if (tt.is_char('_')):
                 self.rank -= 1
             if (tt.chars.is_letter):
                 if (tt.length_char > 2):
                     words += 1
                     if (t.chars.is_all_upper):
                         up_words += 1
             elif (not tt.is_char(',')):
                 notwords += 1
             if (tt.is_pure_verb):
                 self.rank -= 30
                 words -= 1
                 break
             if (tt == self.end_token):
                 if (tt.morph.class0_.is_preposition
                         or tt.morph.class0_.is_conjunction):
                     self.rank -= 10
                 elif (tt.is_char('.')):
                     self.rank += 5
             elif (tt.is_char_of("._")):
                 self.rank -= 5
     self.rank += words
     self.rank -= notwords
     if ((words < 1) and (self.rank < 50)):
         return False
     if (tstart is None or tend is None):
         return False
     if (tstart.end_char > tend.end_char):
         return False
     tit1 = TitleItemToken.try_attach(self.end_token.next0_)
     if (tit1 is not None
             and ((tit1.typ == TitleItemToken.Types.TYP
                   or tit1.typ == TitleItemToken.Types.SPECIALITY))):
         if (tit1.end_token.is_newline_after):
             self.rank += 15
         else:
             self.rank += 10
         if (tit1.typ == TitleItemToken.Types.SPECIALITY):
             self.speciality = tit1.value
     if (up_words > 4 and up_words > (math.floor((0.8 * (words))))):
         if (tstart.previous is not None and
             (isinstance(tstart.previous.get_referent(), PersonReferent))):
             self.rank += (5 + up_words)
     self.begin_name_token = tstart
     self.end_name_token = tend
     return True
Exemple #6
0
 def kind(self) -> 'PersonPropertyKind':
     """ Категория свойства """
     from pullenti.ner.person.internal.PersonAttrToken import PersonAttrToken
     return PersonAttrToken.check_kind(self)
Exemple #7
0
 def kind(self) -> 'PersonPropertyKind':
     from pullenti.ner.person.internal.PersonAttrToken import PersonAttrToken
     return PersonAttrToken.checkKind(self)