def try_parse_author( t: 'Token', prev_pers_template: 'FioTemplateType' = FioTemplateType.UNDEFINED ) -> 'BookLinkToken': if (t is None): return None rtp = PersonItemToken.try_parse_person(t, prev_pers_template) if (rtp is not None): re = None if (rtp.data is None): re = BookLinkToken._new326(t, (t if rtp == t else rtp.end_token), BookLinkTyp.PERSON, rtp.referent) else: re = BookLinkToken._new327(t, rtp.end_token, BookLinkTyp.PERSON, rtp) re.person_template = (Utils.valToEnum(rtp.misc_attrs, FioTemplateType)) tt = rtp.begin_token first_pass3018 = True while True: if first_pass3018: first_pass3018 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= rtp.end_char)): break if (not (isinstance(tt.get_referent(), PersonPropertyReferent))): continue rt = Utils.asObjectOrNull(tt, ReferentToken) if (rt.begin_token.chars.is_capital_upper and tt != rtp.begin_token): re.start_of_name = MiscHelper.get_text_value_of_meta_token( rt, GetTextAttr.KEEPREGISTER) break return None return re if (t.is_char('[')): re = BookLinkToken.try_parse_author(t.next0_, FioTemplateType.UNDEFINED) if (re is not None and re.end_token.next0_ is not None and re.end_token.next0_.is_char(']')): re.begin_token = t re.end_token = re.end_token.next0_ return re if (((t.is_value("И", None) or t.is_value("ET", None))) and t.next0_ is not None): if (t.next0_.is_value("ДРУГИЕ", None) or t.next0_.is_value("ДР", None) or t.next0_.is_value("AL", None)): res = BookLinkToken._new328(t, t.next0_, BookLinkTyp.ANDOTHERS) if (t.next0_.next0_ is not None and t.next0_.next0_.is_char('.')): res.end_token = res.end_token.next0_ return res return None
def createNickname(pr : 'PersonReferent', t : 'Token') -> 'Token': """ Выделить кличку Args: pr(PersonReferent): t(Token): начальный токен Returns: Token: если не null, то последний токен клички, а в pr запишет саму кличку """ has_keyw = False is_br = False first_pass3097 = True while True: if first_pass3097: first_pass3097 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_hiphen or t.is_comma or t.isCharOf(".:;")): continue if (t.morph.class0_.is_preposition): continue if (t.isChar('(')): is_br = True continue if ((t.isValue("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.isValue("КЛИЧКА", None) or t.isValue("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.isValue("ПСЕВДО", None) or t.isValue("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): has_keyw = True continue break if (not has_keyw or t is None): return None if (BracketHelper.isBracket(t, True)): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = br.end_token tt = t.next0_ first_pass3098 = True while True: if first_pass3098: first_pass3098 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma_and): continue if (not BracketHelper.isBracket(tt, True)): break br = BracketHelper.tryParse(tt, BracketParseAttr.NO, 100) if (br is None): break ni = MiscHelper.getTextValue(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0) tt = br.end_token t = tt if (is_br and t.next0_ is not None and t.next0_.isChar(')')): t = t.next0_ return t else: pli = PersonItemToken.tryAttachList(t, None, PersonItemToken.ParseAttr.NO, 10) if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): ni = MiscHelper.getTextValue(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO) if (ni is not None): pr.addSlot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = pli[len(pli) - 1].end_token if (is_br and t.next0_ is not None and t.next0_.isChar(')')): t = t.next0_ return t return None
def _createReferentToken(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken': from pullenti.ner.person.internal.PersonIdentityToken import PersonIdentityToken if (p is None): return None has_prefix = False if (attrs is not None): for a in attrs: if (a.typ == PersonAttrTerminType.BESTREGARDS): has_prefix = True else: if (a.begin_char < begin.begin_char): begin = a.begin_token if (a.typ != PersonAttrTerminType.PREFIX): if (a.age is not None): p.addSlot(PersonReferent.ATTR_AGE, a.age, False, 0) if (a.prop_ref is None): p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0) else: p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) elif (a.gender == MorphGender.FEMINIE and not p.is_female): p.is_female = True elif (a.gender == MorphGender.MASCULINE and not p.is_male): p.is_male = True elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): if ((begin.previous).term == "ИП"): a = PersonAttrToken(begin.previous, begin.previous) a.prop_ref = PersonPropertyReferent() a.prop_ref.name = "индивидуальный предприниматель" p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) begin = begin.previous m0 = MorphCollection() for it in morph_.items: bi = MorphBaseInfo(it) bi.number = MorphNumber.SINGULAR if (bi.gender == MorphGender.UNDEFINED): if (p.is_male and not p.is_female): bi.gender = MorphGender.MASCULINE if (not p.is_male and p.is_female): bi.gender = MorphGender.FEMINIE m0.addItem(bi) morph_ = m0 if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): morph_.case_ = attrs[0].morph.case_ if (attrs[0].morph.number == MorphNumber.SINGULAR): morph_.number = MorphNumber.SINGULAR if (p.is_male and not p.is_female): morph_.gender = MorphGender.MASCULINE elif (p.is_female): morph_.gender = MorphGender.FEMINIE if (begin.previous is not None): ttt = begin.previous if (ttt.isValue("ИМЕНИ", "ІМЕНІ")): for_attribute = True else: if (ttt.isChar('.') and ttt.previous is not None): ttt = ttt.previous if (ttt.whitespaces_after_count < 3): if (ttt.isValue("ИМ", "ІМ")): for_attribute = True if (for_attribute): return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ) if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.getReferent(), PersonReferent))): rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken) ttt = rt00 while ttt is not None: if (ttt.previous is None or not ((isinstance(ttt.previous.previous, ReferentToken)))): break if (not ttt.previous.is_comma_and or not ((isinstance(ttt.previous.previous.getReferent(), PersonReferent)))): break rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken)) ttt = (rt00) if (isinstance(rt00.begin_token.getReferent(), PersonPropertyReferent)): ok = False if ((rt00.begin_token).end_token.next0_ is not None and (rt00.begin_token).end_token.next0_.isChar(':')): ok = True elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): ok = True if (ok): p.addSlot(PersonReferent.ATTR_ATTR, rt00.begin_token.getReferent(), False, 0) if (ad is not None): if (ad.overflow_level > 10): return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ) ad.overflow_level += 1 attrs1 = None has_position = False open_br = False t = end.next0_ first_pass3095 = True while True: if first_pass3095: first_pass3095 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_newline_before): if (t.newlines_before_count > 2): break if (attrs1 is not None and len(attrs1) > 0): break ml = MailLine.parse(t, 0) if (ml is not None and ml.typ == MailLine.Types.FROM): break if (t.chars.is_capital_upper): attr1 = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO) ok1 = False if (attr1 is not None): if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): ok1 = True else: tt2 = t.next0_ while tt2 is not None and tt2.end_char <= attr1.end_char: if (tt2.is_whitespace_before): ok1 = True tt2 = tt2.next0_ else: ttt = PersonHelper.__correctTailAttributes(p, t) if (ttt is not None and ttt != t): t = ttt end = t continue if (not ok1): break if (t.is_hiphen or t.isCharOf("_>|")): continue if (t.isValue("МОДЕЛЬ", None)): break tt = PersonHelper.__correctTailAttributes(p, t) if (tt != t and tt is not None): t = tt end = t continue is_be = False if (t.isChar('(') and t == end.next0_): open_br = True t = t.next0_ if (t is None): break pit1 = PersonItemToken.tryAttach(t, None, PersonItemToken.ParseAttr.NO, None) if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.isChar(')')): if (pit1.lastname is not None): inf = MorphBaseInfo._new2321(MorphCase.NOMINATIVE) if (p.is_male): inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender) if (p.is_female): inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender) sur = PersonIdentityToken.createLastname(pit1, inf) if (sur is not None): p._addFioIdentity(sur, None, None) t = pit1.end_token.next0_ end = t continue elif (t.is_comma): t = t.next0_ if ((isinstance(t, TextToken)) and (t).isValue("WHO", None)): continue elif ((isinstance(t, TextToken)) and (t).is_verb_be): t = t.next0_ elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): if (t == end.next0_): break t = t.next0_ elif (t.is_hiphen and t == end.next0_): t = t.next0_ elif (t.isChar('.') and t == end.next0_ and has_prefix): t = t.next0_ ttt2 = PersonHelper.createNickname(p, t) if (ttt2 is not None): end = ttt2 t = end continue if (t is None): break attr = None attr = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO) if (attr is None): if ((t is not None and t.getReferent() is not None and t.getReferent().type_name == "GEO") and attrs1 is not None and open_br): continue if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.isChar(')')): if (p.findSlot(PersonReferent.ATTR_LASTNAME, None, True) is None): p.addSlot(PersonReferent.ATTR_LASTNAME, t.getSourceText().upper(), False, 0) t = t.next0_ end = t if (t is not None and t.isValue("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): p.is_female = True p._correctData() elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): p.is_male = True p._correctData() break if (attr.morph.number == MorphNumber.PLURAL): break if (attr.typ == PersonAttrTerminType.BESTREGARDS): break if (attr.is_doubt): if (has_prefix): pass elif (t.is_newline_before and attr.is_newline_after): pass elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.isChar(':')))): pass else: break if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): break if (open_br): if (PersonAnalyzer._tryAttachPerson(t, ad, False, 0, True) is not None): break if (attrs1 is None): if (t.previous.is_comma and t.previous == end.next0_): ttt = attr.end_token.next0_ if (ttt is not None): if (ttt.morph.class0_.is_verb): if (MiscHelper.canBeStartOfSentence(begin)): pass else: break attrs1 = list() attrs1.append(attr) if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): if (not is_be): has_position = True elif (attr.typ != PersonAttrTerminType.PREFIX): if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): pass else: attrs1 = (None) break t = attr.end_token if (attrs1 is not None and has_position and attrs is not None): te1 = attrs[len(attrs) - 1].end_token.next0_ te2 = attrs1[0].begin_token if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): pass elif (attrs1[0].age is not None): pass elif (((te1.is_hiphen or te1.isChar(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): pass else: for a in attrs: if (a.typ == PersonAttrTerminType.POSITION): te = attrs1[len(attrs1) - 1].end_token if (te.next0_ is not None): if (not te.next0_.isChar('.')): attrs1 = (None) break if (attrs1 is not None and not has_prefix): attr = attrs1[len(attrs1) - 1] ok = False if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): ok = True else: rt = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False) if (rt is not None and (isinstance(rt.referent, PersonReferent))): ok = True if (ok): if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): attrs1 = (None) elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): rt1 = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False) if (rt1 is not None): attrs1 = (None) if (attrs1 is not None): for a in attrs1: if (a.typ != PersonAttrTerminType.PREFIX): if (a.age is not None): p.addSlot(PersonReferent.ATTR_AGE, a.age, True, 0) elif (a.prop_ref is None): p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0) else: p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) end = a.end_token if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): if (a.gender == MorphGender.MASCULINE and not p.is_male): p.is_male = True p._correctData() elif (a.gender == MorphGender.FEMINIE and not p.is_female): p.is_female = True p._correctData() if (open_br): if (end.next0_ is not None and end.next0_.isChar(')')): end = end.next0_ crlf_cou = 0 t = end.next0_ first_pass3096 = True while True: if first_pass3096: first_pass3096 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_newline_before): ml = MailLine.parse(t, 0) if (ml is not None and ml.typ == MailLine.Types.FROM): break crlf_cou += 1 if (t.isCharOf(":,(") or t.is_hiphen): continue if (t.isChar('.') and t == end.next0_): continue r = t.getReferent() if (r is not None): if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): ty = r.getStringValue("SCHEME") if (r.type_name == "URI"): if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): break p._addContact(r) end = t crlf_cou = 0 continue if (isinstance(r, PersonIdentityReferent)): p.addSlot(PersonReferent.ATTR_IDDOC, r, False, 0) end = t crlf_cou = 0 continue if (r is not None and r.type_name == "ORGANIZATION"): if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): break if (begin.previous is not None and begin.previous.morph.class0_.is_verb): break if (t.whitespaces_after_count == 1): break exist = False for s in p.slots: if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent) if (pr.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): exist = True break elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): pr = Utils.asObjectOrNull(s.value, PersonAttrToken) if (pr.referent.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): exist = True break if (not exist): pat = PersonAttrToken(t, t) pat.prop_ref = PersonPropertyReferent._new2291("сотрудник") pat.prop_ref.addSlot(PersonPropertyReferent.ATTR_REF, r, False, 0) p.addSlot(PersonReferent.ATTR_ATTR, pat, False, 0) continue if (r is not None): break if (not has_prefix or crlf_cou >= 2): break rt = t.kit.processReferent("PERSON", t) if (rt is not None): break if (ad is not None): ad.overflow_level -= 1 return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
def create_nickname(pr : 'PersonReferent', t : 'Token') -> 'Token': has_keyw = False is_br = False first_pass3367 = True while True: if first_pass3367: first_pass3367 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_hiphen or t.is_comma or t.is_char_of(".:;")): continue if (t.morph.class0_.is_preposition): continue if (t.is_char('(')): is_br = True continue if ((t.is_value("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.is_value("КЛИЧКА", None) or t.is_value("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.is_value("ПСЕВДО", None) or t.is_value("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): has_keyw = True continue break if (not has_keyw or t is None): return None if (BracketHelper.is_bracket(t, True)): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = br.end_token tt = t.next0_ first_pass3368 = True while True: if first_pass3368: first_pass3368 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma_and): continue if (not BracketHelper.is_bracket(tt, True)): break br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is None): break ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) tt = br.end_token t = tt if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ return t else: ret = None first_pass3369 = True while True: if first_pass3369: first_pass3369 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_comma_and): continue if (ret is not None and t.chars.is_all_lower): break if (t.whitespaces_before_count > 2): break pli = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.NO, 10) if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): ni = MiscHelper.get_text_value(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = pli[len(pli) - 1].end_token if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ ret = t continue if ((isinstance(t, ReferentToken)) and not t.chars.is_all_lower and t.begin_token == t.end_token): val = MiscHelper.get_text_value_of_meta_token(Utils.asObjectOrNull(t, ReferentToken), GetTextAttr.NO) pr.add_slot(PersonReferent.ATTR_NICKNAME, val, False, 0) if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ ret = t continue break return ret return None
def __tryParse(t: 'Token', is_in_lit: bool, max_char: int = 0) -> typing.List['ReferentToken']: if (t is None): return None is_bracket_regime = False if (t.previous is not None and t.previous.isChar('(')): is_bracket_regime = True blt = BookLinkToken.tryParse(t, 0) if (blt is None): blt = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED) if (blt is None and not is_bracket_regime): return None t0 = t coef = 0 is_electr_res = False decree = None regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED num = None spec_see = None book_prev = None if (is_bracket_regime): regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS elif (blt.typ == BookLinkTyp.PERSON): if (not is_in_lit): return None regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS elif (blt.typ == BookLinkTyp.NUMBER): num = blt.value t = blt.end_token.next0_ if (t is None or t.is_newline_before): return None if (not t.is_whitespace_before): if (isinstance(t, NumberToken)): n = (t).value if ((((n == "3" or n == "0")) and not t.is_whitespace_after and (isinstance(t.next0_, TextToken))) and t.next0_.chars.is_all_lower): pass else: return None elif (not ((isinstance(t, TextToken))) or t.chars.is_all_lower): r = t.getReferent() if (isinstance(r, PersonReferent)): pass elif (is_in_lit and r is not None and r.type_name == "DECREE"): pass else: return None first_pass2757 = True while True: if first_pass2757: first_pass2757 = False else: t = t.next0_ if (not (t is not None)): break if (isinstance(t, NumberToken)): break if (not ((isinstance(t, TextToken)))): break if (BracketHelper.canBeStartOfSequence(t, True, False)): break if (not t.chars.is_letter): continue bbb = BookLinkToken.tryParse(t, 0) if (bbb is not None): if (bbb.typ == BookLinkTyp.TAMZE): spec_see = bbb t = bbb.end_token.next0_ break if (bbb.typ == BookLinkTyp.SEE): t = bbb.end_token continue break if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE): coef += 1 max0_ = 1000 tt = t0 while tt is not None and max0_ > 0: if (isinstance(tt.getReferent(), BookLinkRefReferent)): book_prev = (tt.getReferent()).book break tt = tt.previous max0_ -= 1 blt1 = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED) if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON): regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS else: ok = False tt = t first_pass2758 = True while True: if first_pass2758: first_pass2758 = False else: tt = (None if tt is None else tt.next0_) if (not (tt is not None)): break if (tt.is_newline_before): break if (is_in_lit and tt.getReferent() is not None and tt.getReferent().type_name == "DECREE"): ok = True decree = tt break bbb = BookLinkToken.tryParse(tt, 0) if (bbb is None): continue if (bbb.typ == BookLinkTyp.ELECTRONRES): is_electr_res = True ok = True break if (bbb.typ == BookLinkTyp.DELIMETER): tt = bbb.end_token.next0_ if (BookLinkToken.tryParseAuthor( tt, FioTemplateType.UNDEFINED) is not None): ok = True break bbb = BookLinkToken.tryParse(tt, 0) if (bbb is not None): if (bbb.typ == BookLinkTyp.EDITORS or bbb.typ == BookLinkTyp.TRANSLATE or bbb.typ == BookLinkTyp.SOSTAVITEL): ok = True break if (not ok and not is_in_lit): if (BookLinkToken.checkLinkBefore(t0, num)): pass else: return None regtyp = BookLinkAnalyzer.RegionTyp.NAME else: return None res = BookLinkReferent() corr_authors = list() t00 = t blt00 = None start_of_name = None prev_pers_templ = FioTemplateType.UNDEFINED if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS): first_pass2759 = True while True: if first_pass2759: first_pass2759 = False else: t = t.next0_ if (not (t is not None)): break if (max_char > 0 and t.begin_char >= max_char): break if (t.isCharOf(".;") or t.is_comma_and): continue if (t.isChar('/')): break if ((t.isChar('(') and t.next0_ is not None and t.next0_.isValue("EDS", None)) and t.next0_.next0_ is not None and t.next0_.next0_.isChar(')')): t = t.next0_.next0_.next0_ break blt = BookLinkToken.tryParseAuthor(t, prev_pers_templ) if (blt is None and t.previous is not None and t.previous.is_and): blt = BookLinkToken.tryParseAuthor( t.previous, FioTemplateType.UNDEFINED) if (blt is None): if ((isinstance(t.getReferent(), OrganizationReferent)) and blt00 is not None): bbb2 = BookLinkToken.tryParse(t.next0_, 0) if (bbb2 is not None): if (bbb2.typ == BookLinkTyp.YEAR): res.addSlot(BookLinkReferent.ATTR_AUTHOR, t.getReferent(), False, 0) res.year = int(bbb2.value) coef += .5 t = bbb2.end_token.next0_ break if (blt.typ == BookLinkTyp.PERSON): tt2 = blt.end_token.next0_ bbb2 = BookLinkToken.tryParse(tt2, 0) if (bbb2 is not None): if (bbb2.typ == BookLinkTyp.YEAR): res.year = int(bbb2.value) coef += .5 blt.end_token = bbb2.end_token blt00 = (None) if (blt00 is not None and ((blt00.end_token.next0_ == blt.begin_token or blt.begin_token.previous.isChar('.')))): tt11 = blt.end_token.next0_ nex = BookLinkToken.tryParse(tt11, 0) if (nex is not None and nex.typ == BookLinkTyp.ANDOTHERS): pass else: if (tt11 is None): break if (tt11.isChar('/') and tt11.next0_ is not None and tt11.next0_.isChar('/')): break if (tt11.isChar(':')): break if ((str(blt).find('.') < 0) and str(blt00).find('.') > 0): break if ((isinstance(tt11, TextToken)) and tt11.chars.is_all_lower): break if (tt11.isCharOf(",.;") and tt11.next0_ is not None): tt11 = tt11.next0_ nex = BookLinkToken.tryParse(tt11, 0) if (nex is not None and nex.typ != BookLinkTyp.PERSON and nex.typ != BookLinkTyp.ANDOTHERS): break elif ( (blt00 is not None and blt00.person_template != FioTemplateType.UNDEFINED and blt.person_template != blt00.person_template) and blt.person_template == FioTemplateType.NAMESURNAME): if (blt.end_token.next0_ is None or not blt.end_token.next0_.is_comma_and): break if (BookLinkToken.tryParseAuthor( blt.end_token.next0_.next0_, FioTemplateType.UNDEFINED) is not None): pass else: break if (blt00 is None and blt.person_template == FioTemplateType.NAMESURNAME): tt = blt.end_token.next0_ if (tt is not None and tt.is_hiphen): tt = tt.next0_ if (isinstance(tt, NumberToken)): break BookLinkAnalyzer.__addAuthor(res, blt) coef += 1 t = blt.end_token if (isinstance(t.getReferent(), PersonReferent)): corr_authors.append( Utils.asObjectOrNull(t, ReferentToken)) blt00 = blt prev_pers_templ = blt.person_template start_of_name = blt.start_of_name if ((start_of_name) is not None): t = t.next0_ break continue if (blt.typ == BookLinkTyp.ANDOTHERS): coef += .5 t = blt.end_token.next0_ res.authors_and_other = True break break if (t is None): return None if ((t.is_newline_before and t != t0 and num is None) and res.findSlot( BookLinkReferent.ATTR_AUTHOR, None, True) is None): return None if (start_of_name is None): if (t.chars.is_all_lower): coef -= (1) if (t.chars.is_latin_letter and not is_electr_res and num is None): if (res.getSlotValue(BookLinkReferent.ATTR_AUTHOR) is None): return None tn0 = t tn1 = None uri = None next_num = None wrapnn393 = RefOutArgWrapper(0) inoutres394 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn393) nn = wrapnn393.value if (inoutres394): next_num = str((nn + 1)) br = (BracketHelper.tryParse( t, Utils.valToEnum( (BracketParseAttr.CANCONTAINSVERBS) | (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100) if BracketHelper.canBeStartOfSequence(t, True, False) else None) if (br is not None): t = t.next0_ pages = None first_pass2760 = True while True: if first_pass2760: first_pass2760 = False else: t = t.next0_ if (not (t is not None)): break if (max_char > 0 and t.begin_char >= max_char): break if (br is not None and br.end_token == t): tn1 = t break tit = TitleItemToken.tryAttach(t) if (tit is not None): if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t and br is None) and BracketHelper.canBeStartOfSequence( tit.end_token.next0_, True, False)): br = BracketHelper.tryParse(tit.end_token.next0_, BracketParseAttr.NO, 100) if (br is not None): coef += (1) if (num is not None): coef += 1 tn0 = br.begin_token tn1 = br.end_token res.typ = tit.value.lower() t = br.end_token.next0_ break if (t.is_newline_before and t != tn0): if (br is not None and (t.end_char < br.end_char)): pass elif (not MiscHelper.canBeStartOfSentence(t)): pass else: if (t.newlines_before_count > 1): break if ((isinstance(t, NumberToken)) and num is not None and (t).int_value is not None): if (num == str(((t).int_value - 1))): break elif (num is not None): pass else: nnn = NounPhraseHelper.tryParse( t.previous, Utils.valToEnum( ((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.PARSEADVERBS) | (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)) | (NounPhraseParseAttr.MULTILINES), NounPhraseParseAttr), 0) if (nnn is not None and nnn.end_char >= t.end_char): pass else: break if (t.isCharOf(".;") and t.whitespaces_after_count > 0): tit = TitleItemToken.tryAttach(t.next0_) if ((tit) is not None): if (tit.typ == TitleItemToken.Types.TYP): break stop = True words = 0 notwords = 0 tt = t.next0_ first_pass2761 = True while True: if first_pass2761: first_pass2761 = False else: tt = tt.next0_ if (not (tt is not None)): break blt0 = BookLinkToken.tryParse(tt, 0) if (blt0 is None): if (tt.is_newline_before): break if ((isinstance(tt, TextToken)) and not tt.getMorphClassInDictionary().is_undefined ): words += 1 else: notwords += 1 if (words > 6 and words > (notwords * 4)): stop = False break continue if ((blt0.typ == BookLinkTyp.DELIMETER or blt0.typ == BookLinkTyp.TRANSLATE or blt0.typ == BookLinkTyp.TYPE) or blt0.typ == BookLinkTyp.GEO or blt0.typ == BookLinkTyp.PRESS): stop = False break if (br is not None and br.end_token.previous.end_char > t.end_char): stop = False if (stop): break if (t == decree): t = t.next0_ break blt = BookLinkToken.tryParse(t, 0) if (blt is None): tn1 = t continue if (blt.typ == BookLinkTyp.DELIMETER): break if (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL) or blt.typ == BookLinkTyp.TYPE or blt.typ == BookLinkTyp.VOLUME) or blt.typ == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES): coef += 1 break if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS): if (t.previous.is_hiphen or t.previous.isCharOf(".;") or blt.add_coef > 0): break if (blt.typ == BookLinkTyp.YEAR): if (t.previous is not None and t.previous.is_comma): break if (blt.typ == BookLinkTyp.ELECTRONRES): is_electr_res = True break if (blt.typ == BookLinkTyp.URL): if (t == tn0 or t.previous.isCharOf(":.")): is_electr_res = True break tn1 = t if (tn1 is None and start_of_name is None): if (is_electr_res): uri_re = BookLinkReferent() rt0 = ReferentToken(uri_re, t00, t) rts0 = list() bref0 = BookLinkRefReferent._new389(uri_re) if (num is not None): bref0.number = num rt01 = ReferentToken(bref0, t0, rt0.end_token) ok = False while t is not None: if (t.is_newline_before): break blt0 = BookLinkToken.tryParse(t, 0) if (blt0 is not None): if (isinstance(blt0.ref, UriReferent)): uri_re.addSlot( BookLinkReferent.ATTR_URL, Utils.asObjectOrNull(blt0.ref, UriReferent), False, 0) ok = True t = blt0.end_token rt0.end_token = rt01.end_token = t t = t.next0_ if (ok): rts0.append(rt01) rts0.append(rt0) return rts0 if (decree is not None and num is not None): rts0 = list() bref0 = BookLinkRefReferent._new389(decree.getReferent()) if (num is not None): bref0.number = num rt01 = ReferentToken(bref0, t0, decree) t = decree.next0_ while t is not None: if (t.is_newline_before): break if (isinstance(t, TextToken)): if ((t).is_pure_verb): return None rt01.end_token = t t = t.next0_ rts0.append(rt01) return rts0 if (book_prev is not None): tt = t while tt is not None and ((tt.isCharOf(",.") or tt.is_hiphen)): tt = tt.next0_ blt0 = BookLinkToken.tryParse(tt, 0) if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE): rts0 = list() bref0 = BookLinkRefReferent._new389(book_prev) if (num is not None): bref0.number = num bref0.pages = blt0.value rt00 = ReferentToken(bref0, t0, blt0.end_token) rts0.append(rt00) return rts0 return None if (br is not None and ((tn1 == br.end_token or tn1 == br.end_token.previous))): tn0 = tn0.next0_ tn1 = tn1.previous if (start_of_name is None): while tn0 is not None: if (tn0.isCharOf(":,~")): tn0 = tn0.next0_ else: break while tn1 is not None and tn1.begin_char > tn0.begin_char: if (tn1.isCharOf(".;,:(~") or tn1.is_hiphen or tn1.isValue("РЕД", None)): pass else: break tn1 = tn1.previous nam = MiscHelper.getTextValue( tn0, tn1, Utils.valToEnum( (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) if (start_of_name is not None): if (nam is None or (len(nam) < 3)): nam = start_of_name else: nam = "{0}{1}{2}".format( start_of_name, (" " if tn0.is_whitespace_before else ""), nam) if (nam is None): return None res.name = nam if (num is None and not is_in_lit): if (len(nam) < 20): return None coef -= (2) if (len(nam) > 500): coef -= (math.floor(len(nam) / 500)) if (is_bracket_regime): coef -= 1 if (len(nam) > 200): if (num is None): return None if (res.findSlot(BookLinkReferent.ATTR_AUTHOR, None, True) is None and not BookLinkToken.checkLinkBefore(t0, num)): return None en = 0 ru = 0 ua = 0 cha = 0 nocha = 0 chalen = 0 lt0 = tn0 lt1 = tn1 if (tn1 is None): if (t is None): return None lt0 = t0 lt1 = t tn1 = t.previous tt = lt0 while tt is not None and tt.end_char <= lt1.end_char: if ((isinstance(tt, TextToken)) and tt.chars.is_letter): if (tt.chars.is_latin_letter): en += 1 elif (tt.morph.language.is_ua): ua += 1 elif (tt.morph.language.is_ru): ru += 1 if (tt.length_char > 2): cha += 1 chalen += tt.length_char elif (not ((isinstance(tt, ReferentToken)))): nocha += 1 tt = tt.next0_ if (ru > (ua + en)): res.lang = "RU" elif (ua > (ru + en)): res.lang = "UA" elif (en > (ru + ua)): res.lang = "EN" if (nocha > 3 and nocha > cha and start_of_name is None): if (nocha > (math.floor(chalen / 3))): coef -= (2) if (res.lang == "EN"): tt = tn0.next0_ first_pass2762 = True while True: if first_pass2762: first_pass2762 = False else: tt = tt.next0_ if (not (tt is not None and (tt.end_char < tn1.end_char))): break if (tt.is_comma and tt.next0_ is not None and ((not tt.next0_.chars.is_all_lower or (isinstance(tt.next0_, ReferentToken))))): if (tt.next0_.next0_ is not None and tt.next0_.next0_.is_comma_and): if (isinstance(tt.next0_, ReferentToken)): pass else: continue nam = MiscHelper.getTextValue( tn0, tt.previous, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) if (nam is not None and len(nam) > 15): res.name = nam break rt = ReferentToken(res, t00, tn1) authors = True edits = False br = (None) first_pass2763 = True while True: if first_pass2763: first_pass2763 = False else: t = t.next0_ if (not (t is not None)): break if (max_char > 0 and t.begin_char >= max_char): break if (BracketHelper.canBeStartOfSequence(t, False, False)): br = BracketHelper.tryParse(t, BracketParseAttr.CANBEMANYLINES, 100) if (br is not None and br.length_char > 300): br = (None) blt = BookLinkToken.tryParse(t, 0) if (t.is_newline_before and not t.isChar('/') and not t.previous.isChar('/')): if (blt is not None and blt.typ == BookLinkTyp.NUMBER): break if (t.previous.isCharOf(":")): pass elif (blt is not None and (( ((blt.typ == BookLinkTyp.DELIMETER or blt.typ == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES) or blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))): pass elif (num is not None and BookLinkToken.tryParseAuthor( t, FioTemplateType.UNDEFINED) is not None): pass elif (num is not None and blt is not None and blt.typ != BookLinkTyp.NUMBER): pass elif (br is not None and (t.end_char < br.end_char) and t.begin_char > br.begin_char): pass else: ok = False mmm = 50 tt = t.next0_ while tt is not None and mmm > 0: if (tt.is_newline_before): blt2 = BookLinkToken.tryParse(tt, 0) if (blt2 is not None and blt2.typ == BookLinkTyp.NUMBER and blt2.value == next_num): ok = True break if (blt2 is not None): if (blt2.typ == BookLinkTyp.PAGES or blt2.typ == BookLinkTyp.GEO or blt2.typ == BookLinkTyp.PRESS): ok = True break tt = tt.next0_ mmm -= 1 if (not ok): npt = NounPhraseHelper.tryParse( t.previous, Utils.valToEnum( ((NounPhraseParseAttr.MULTILINES) | (NounPhraseParseAttr.PARSEADVERBS) | (NounPhraseParseAttr.PARSEPREPOSITION)) | (NounPhraseParseAttr.PARSEVERBS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0) if (npt is not None and npt.end_char >= t.end_char): ok = True if (not ok): break rt.end_token = t if (blt is not None): rt.end_token = blt.end_token if (t.isCharOf(".,") or t.is_hiphen): continue if (t.isValue("С", None)): pass if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None and blt.typ == BookLinkTyp.EDITORS): edits = True t = blt.end_token coef += 1 continue if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None and blt.typ == BookLinkTyp.SOSTAVITEL): edits = False t = blt.end_token coef += 1 continue if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors): blt2 = BookLinkToken.tryParseAuthor(t, prev_pers_templ) if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON): prev_pers_templ = blt2.person_template if (not edits): BookLinkAnalyzer.__addAuthor(res, blt2) coef += 1 t = blt2.end_token continue if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS): if (not edits): res.authors_and_other = True coef += 1 t = blt2.end_token continue authors = False if (blt is None): continue if (blt.typ == BookLinkTyp.ELECTRONRES or blt.typ == BookLinkTyp.URL): is_electr_res = True if (blt.typ == BookLinkTyp.ELECTRONRES): coef += 1.5 else: coef += .5 if (isinstance(blt.ref, UriReferent)): res.addSlot(BookLinkReferent.ATTR_URL, Utils.asObjectOrNull(blt.ref, UriReferent), False, 0) elif (blt.typ == BookLinkTyp.YEAR): if (res.year == 0): res.year = int(blt.value) coef += .5 elif (blt.typ == BookLinkTyp.DELIMETER): coef += 1 if (blt.length_char == 2): regtyp = BookLinkAnalyzer.RegionTyp.SECOND else: regtyp = BookLinkAnalyzer.RegionTyp.FIRST elif ( (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE or blt.typ == BookLinkTyp.PAGES) or blt.typ == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE) or blt.typ == BookLinkTyp.PRESS or blt.typ == BookLinkTyp.VOLUME) or blt.typ == BookLinkTyp.N): coef += 1 elif (blt.typ == BookLinkTyp.PAGERANGE): pages = blt coef += 1 if (is_bracket_regime and blt.end_token.next0_ is not None and blt.end_token.next0_.isChar(')')): coef += (2) if (res.name is not None and res.findSlot(BookLinkReferent.ATTR_AUTHOR, None, True) is not None): coef = (10) elif (blt.typ == BookLinkTyp.GEO and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))): coef += 1 elif (blt.typ == BookLinkTyp.GEO and t.previous is not None and t.previous.isChar('.')): coef += 1 elif (blt.typ == BookLinkTyp.ANDOTHERS): coef += 1 if (authors): res.authors_and_other = True coef += blt.add_coef t = blt.end_token if ((coef < 2.5) and num is not None): if (BookLinkToken.checkLinkBefore(t0, num)): coef += (2) elif (BookLinkToken.checkLinkAfter(rt.end_token, num)): coef += (1) if (rt.length_char > 500): return None if (is_in_lit): coef += 1 if (coef < 2.5): if (is_electr_res and uri is not None): pass elif (coef >= 2 and is_in_lit): pass else: return None for rr in corr_authors: pits0 = PersonItemToken.tryAttachList( rr.begin_token, None, PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10) if (pits0 is None or (len(pits0) < 2)): continue if (pits0[0].typ == PersonItemToken.ItemType.VALUE): exi = False for i in range(len(rr.referent.slots) - 1, -1, -1): s = rr.referent.slots[i] if (s.type_name == PersonReferent.ATTR_LASTNAME): ln = Utils.asObjectOrNull(s.value, str) if (ln is None): continue if (ln == pits0[0].value): exi = True continue if (ln.find('-') > 0): ln = ln[0:0 + ln.find('-')] if (pits0[0].begin_token.isValue(ln, None)): del rr.referent.slots[i] if (not exi): rr.referent.addSlot(PersonReferent.ATTR_LASTNAME, pits0[0].value, False, 0) rts = list() bref = BookLinkRefReferent._new389(res) if (num is not None): bref.number = num rt1 = ReferentToken(bref, t0, rt.end_token) if (pages is not None): if (pages.value is not None): bref.pages = pages.value rt.end_token = pages.begin_token.previous rts.append(rt1) rts.append(rt) return rts
def parse(t0: 'Token', lev_: int) -> 'MailLine': if (t0 is None): return None res = MailLine(t0, t0) pr = True t = t0 first_pass3027 = True while True: if first_pass3027: first_pass3027 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_newline_before and t0 != t): break res.end_token = t if (t.is_table_control_char or t.is_hiphen): continue if (pr): if ((isinstance(t, TextToken)) and t.isCharOf(">|")): res.lev += 1 else: pr = False tok = MailLine.M_FROM_WORDS.tryParse(t, TerminParseAttr.NO) if (tok is not None and tok.end_token.next0_ is not None and tok.end_token.next0_.isChar(':')): res.typ = MailLine.Types.FROM t = tok.end_token.next0_ continue if (isinstance(t, ReferentToken)): r = t.getReferent() if (r is not None): if ((((isinstance(r, PersonReferent)) or (isinstance(r, GeoReferent)) or (isinstance(r, AddressReferent))) or r.type_name == "PHONE" or r.type_name == "URI") or (isinstance(r, PersonPropertyReferent)) or r.type_name == "ORGANIZATION"): res.refs.append(r) if (res.typ == MailLine.Types.UNDEFINED): t = t0 while t is not None and (t.end_char < res.end_char): if (not t.is_hiphen and t.chars.is_letter): break t = t.next0_ ok = 0 nams = 0 oth = 0 last_comma = None first_pass3028 = True while True: if first_pass3028: first_pass3028 = False else: t = t.next0_ if (not (t is not None and (t.end_char < res.end_char))): break if (isinstance(t.getReferent(), PersonReferent)): nams += 1 continue if (isinstance(t, TextToken)): if (not t.chars.is_letter): last_comma = t continue tok = MailLine.M_HELLO_WORDS.tryParse( t, TerminParseAttr.NO) if (tok is not None): ok += 1 t = tok.end_token continue if (t.isValue("ВСЕ", None) or t.isValue("ALL", None) or t.isValue("TEAM", None)): nams += 1 continue pit = PersonItemToken.tryAttach( t, None, PersonItemToken.ParseAttr.NO, None) if (pit is not None): nams += 1 t = pit.end_token continue oth += 1 if ((oth) > 3): if (ok > 0 and last_comma is not None): res.end_token = last_comma oth = 0 break if ((oth < 3) and ok > 0): res.typ = MailLine.Types.HELLO if (res.typ == MailLine.Types.UNDEFINED): ok_words = 0 if (t0.isValue("HAVE", None)): pass t = t0 first_pass3029 = True while True: if first_pass3029: first_pass3029 = False else: t = t.next0_ if (not (t is not None and t.end_char <= res.end_char)): break if (not ((isinstance(t, TextToken)))): continue if (t.isChar('<')): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token continue if (not t.is_letters or t.is_table_control_char): continue tok = MailLine.M_REGARD_WORDS.tryParse(t, TerminParseAttr.NO) if (tok is not None): ok_words += 1 while t is not None and t.end_char <= tok.end_char: t.tag = (tok.termin) t = t.next0_ t = tok.end_token if ((isinstance(t.next0_, TextToken)) and t.next0_.morph.case_.is_genitive): t = t.next0_ first_pass3030 = True while True: if first_pass3030: first_pass3030 = False else: t = t.next0_ if (not (t.end_char <= res.end_char)): break if (t.morph.class0_.is_conjunction): continue npt1 = NounPhraseHelper.tryParse( t, NounPhraseParseAttr.NO, 0) if (npt1 is None): break if (not npt1.morph.case_.is_genitive): break while t.end_char < npt1.end_char: t.tag = (t) t = t.next0_ t.tag = (t) continue if ((t.morph.class0_.is_preposition or t.morph.class0_.is_conjunction or t.morph.class0_.is_misc) or t.isValue("C", None)): continue if ((ok_words > 0 and t.previous is not None and t.previous.is_comma) and t.previous.begin_char > t0.begin_char and not t.chars.is_all_lower): res.end_token = t.previous break npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0) if (npt is None): if ((res.end_char - t.end_char) > 10): ok_words = 0 break tok = MailLine.M_REGARD_WORDS.tryParse(npt.end_token, TerminParseAttr.NO) if (tok is not None and (isinstance(npt.end_token, TextToken))): term = (npt.end_token).term if (term == "ДЕЛ"): tok = (None) if (tok is None): if (npt.noun.isValue("НАДЕЖДА", None)): t.tag = (t) elif (ok_words > 0 and t.isValue("NICE", None) and ((res.end_char - npt.end_char) < 13)): t.tag = (t) else: ok_words = 0 break ok_words += 1 while t is not None and t.end_char <= tok.end_char: t.tag = (tok.termin) t = t.next0_ t = tok.end_token if (ok_words > 0): res.typ = MailLine.Types.BESTREGARDS if (res.typ == MailLine.Types.UNDEFINED): t = t0 while t is not None and (t.end_char < res.end_char): if (not ((isinstance(t, TextToken)))): break elif (not t.is_hiphen and t.chars.is_letter): break t = t.next0_ if (t is not None): if (t != t0): pass if (((t.isValue("ПЕРЕСЫЛАЕМОЕ", None) or t.isValue("ПЕРЕАДРЕСОВАННОЕ", None))) and t.next0_ is not None and t.next0_.isValue("СООБЩЕНИЕ", None)): res.typ = MailLine.Types.FROM res.must_be_first_line = True elif ((t.isValue("НАЧАЛО", None) and t.next0_ is not None and ((t.next0_.isValue("ПЕРЕСЫЛАЕМОЕ", None) or t.next0_.isValue("ПЕРЕАДРЕСОВАННОЕ", None)))) and t.next0_.next0_ is not None and t.next0_.next0_.isValue("СООБЩЕНИЕ", None)): res.typ = MailLine.Types.FROM res.must_be_first_line = True elif (t.isValue("ORIGINAL", None) and t.next0_ is not None and ((t.next0_.isValue("MESSAGE", None) or t.next0_.isValue("APPOINTMENT", None)))): res.typ = MailLine.Types.FROM res.must_be_first_line = True elif (t.isValue("ПЕРЕСЛАНО", None) and t.next0_ is not None and t.next0_.isValue("ПОЛЬЗОВАТЕЛЕМ", None)): res.typ = MailLine.Types.FROM res.must_be_first_line = True elif (((t.getReferent() is not None and t.getReferent().type_name == "DATE")) or ((t.isValue("IL", None) and t.next0_ is not None and t.next0_.isValue("GIORNO", None))) or ((t.isValue("ON", None) and (isinstance(t.next0_, ReferentToken)) and t.next0_.getReferent().type_name == "DATE"))): has_from = False has_date = t.getReferent() is not None and t.getReferent( ).type_name == "DATE" if (t.is_newline_after and (lev_ < 5)): res1 = MailLine.parse(t.next0_, lev_ + 1) if (res1 is not None and res1.typ == MailLine.Types.HELLO): res.typ = MailLine.Types.FROM next0__ = MailLine.parse(res.end_token.next0_, lev_ + 1) if (next0__ is not None): if (next0__.typ != MailLine.Types.UNDEFINED): next0__ = (None) tmax = res.end_char if (next0__ is not None): tmax = next0__.end_char br1 = None while t is not None and t.end_char <= tmax: if (t.isValue("ОТ", None) or t.isValue("FROM", None)): has_from = True elif ( t.getReferent() is not None and ((t.getReferent().type_name == "URI" or (isinstance(t.getReferent(), PersonReferent))))): if (t.getReferent().type_name == "URI" and has_date): if (br1 is not None): has_from = True next0__ = (None) if (t.previous.isChar('<') and t.next0_ is not None and t.next0_.isChar('>')): t = t.next0_ if (t.next0_ is not None and t.next0_.isChar(':')): t = t.next0_ if (t.is_newline_after): has_from = True next0__ = (None) t = t.next0_ while t is not None and t.end_char <= res.end_char: if (t.isValue("HA", None) and t.next0_ is not None and t.next0_.isValue("SCRITTO", None)): has_from = True break elif (((t.isValue("НАПИСАТЬ", None) or t.isValue("WROTE", None))) and ((res.end_char - t.end_char) < 10)): has_from = True break t = t.next0_ if (has_from): res.typ = MailLine.Types.FROM if (next0__ is not None and t.end_char >= next0__.begin_char): res.end_token = next0__.end_token break elif (br1 is None and not t.isChar('<') and BracketHelper.canBeStartOfSequence( t, True, False)): br1 = BracketHelper.tryParse( t, BracketParseAttr.NO, 100) if (br1 is not None): t = br1.end_token t = t.next0_ else: has_uri = False while t is not None and (t.end_char < res.end_char): if (t.getReferent() is not None and ((t.getReferent().type_name == "URI" or (isinstance(t.getReferent(), PersonReferent))))): has_uri = True elif (t.isValue("ПИСАТЬ", None) and has_uri): if (t.next0_ is not None and t.next0_.isChar('(')): if (has_uri): res.typ = MailLine.Types.FROM break t = t.next0_ return res