def __calc_newline_between_coef(fr1: 'FragToken', fr2: 'FragToken') -> int: if (fr1.newlines_after_count > 1): return 1 tt = fr1.begin_token while tt is not None and tt.end_char <= fr1.end_char: if (BracketHelper.can_be_start_of_sequence(tt, False, False)): br = BracketHelper.try_parse(tt, BracketParseAttr.CANBEMANYLINES, 100) if (br is not None and br.end_char >= fr2.begin_char): return -1 tt = tt.next0_ t = fr1.end_token if (t.is_char_of(":;.")): return 1 if ((isinstance(t, TextToken)) and ((t.morph.class0_.is_preposition or t.morph.class0_.is_conjunction))): return -1 t1 = fr2.begin_token if (isinstance(t1, TextToken)): if (t1.chars.is_all_lower): return -1 if (BracketHelper.can_be_start_of_sequence(t1, False, False)): if (t.chars.is_all_lower): return -1 elif (isinstance(t1, NumberToken)): if (t.chars.is_all_lower): return -1 if (t.chars.is_all_lower): if (fr2.end_token.is_char(';')): return -1 return 0
def try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken': res = WeaponItemToken.__try_parse(t, prev, after_conj, attach_high) if (res is None): npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.noun.begin_char > npt.begin_char): res = WeaponItemToken.__try_parse(npt.noun.begin_token, prev, after_conj, attach_high) if (res is not None): if (res.typ == WeaponItemToken.Typs.NOUN): str0_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if (str0_ == "РУЧНОЙ ГРАНАТ"): str0_ = "РУЧНАЯ ГРАНАТА" if ((Utils.ifNotNull(str0_, "")).endswith(res.value)): if (res.alt_value is None): res.alt_value = str0_ else: str0_ = str0_[0:0+len(str0_) - len(res.value)].strip() res.alt_value = "{0} {1}".format(str0_, res.alt_value) res.begin_token = t return res return None if (res.typ == WeaponItemToken.Typs.NAME): br = BracketHelper.try_parse(res.end_token.next0_, BracketParseAttr.NO, 100) if (br is not None and br.is_char('(')): alt = MiscHelper.get_text_value_of_meta_token(br, GetTextAttr.NO) if (MiscHelper.can_be_equal_cyr_and_latss(res.value, alt)): res.alt_value = alt res.end_token = br.end_token return res
def __try_attach_speciality(t: 'Token', key_word_before: bool) -> 'TitleItemToken': if (t is None): return None susp = False if (not key_word_before): if (not t.is_newline_before): susp = True val = None t0 = t dig_count = 0 for i in range(3): nt = Utils.asObjectOrNull(t, NumberToken) if (nt is None): break if (nt.typ != NumberSpellingType.DIGIT or nt.morph.class0_.is_adjective): break if (val is None): val = io.StringIO() if (susp and t.length_char != 2): return None digs = nt.get_source_text() dig_count += len(digs) print(digs, end="", file=val) if (t.next0_ is None): break t = t.next0_ if (t.is_char_of(".,") or t.is_hiphen): if (susp and (i < 2)): if (not t.is_char('.') or t.is_whitespace_after or t.is_whitespace_before): return None if (t.next0_ is not None): t = t.next0_ if (val is None or (dig_count < 5)): return None if (dig_count != 6): if (not key_word_before): return None else: Utils.insertStringIO(val, 4, '.') Utils.insertStringIO(val, 2, '.') tt = t.next0_ first_pass3395 = True while True: if first_pass3395: first_pass3395 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt continue t = tt return TitleItemToken._new2655(t0, t, TitleItemToken.Types.SPECIALITY, Utils.toStringStringIO(val))
def __check_detail(rt: 'ReferentToken') -> None: if (rt.end_token.whitespaces_after_count > 2 or rt.end_token.next0_ is None): return if (rt.end_token.next0_.is_char('(')): br = BracketHelper.try_parse(rt.end_token.next0_, BracketParseAttr.NO, 100) if (br is not None): rt.referent.detail = MiscHelper.get_text_value( br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) rt.end_token = br.end_token
def _add_name(self, begin: 'Token', end: 'Token') -> 'Termin': if (BracketHelper.can_be_start_of_sequence(begin, True, False)): br = BracketHelper.try_parse(begin, BracketParseAttr.NO, 100) if (br is not None and br.end_token == end): begin = begin.next0_ end = end.previous val = MiscHelper.get_text_value( begin, end, Utils.valToEnum( (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES), GetTextAttr)) if (val is None): return None if (val.endswith(".") and not val.endswith("..")): val = val[0:0 + len(val) - 1].strip() self.add_slot(TitlePageReferent.ATTR_NAME, val, False, 0) return Termin(val.upper())
def try_attach_org(t: 'Token', can_be_cyr: bool = False) -> 'ReferentToken': from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken if (t is None): return None br = False if (t.is_char('(') and t.next0_ is not None): t = t.next0_ br = True if (isinstance(t, NumberToken)): if (t.typ == NumberSpellingType.WORDS and t.morph.class0_.is_adjective and t.chars.is_capital_upper): pass else: return None else: if (t.chars.is_all_lower): return None if ((t.length_char < 3) and not t.chars.is_letter): return None if (not t.chars.is_latin_letter): if (not can_be_cyr or not t.chars.is_cyrillic_letter): return None t0 = t t1 = t0 nam_wo = 0 tok = None geo_ = None add_typ = None first_pass3312 = True while True: if first_pass3312: first_pass3312 = False else: t = t.next0_ if (not (t is not None)): break if (t != t0 and t.whitespaces_before_count > 1): break if (t.is_char(')')): break if (t.is_char('(') and t.next0_ is not None): if ((isinstance(t.next0_.get_referent(), GeoReferent)) and t.next0_.next0_ is not None and t.next0_.next0_.is_char(')')): geo_ = (Utils.asObjectOrNull(t.next0_.get_referent(), GeoReferent)) t = t.next0_.next0_ continue typ = OrgItemTypeToken.try_attach(t.next0_, True, None) if ((typ is not None and typ.end_token.next0_ is not None and typ.end_token.next0_.is_char(')')) and typ.chars.is_latin_letter): add_typ = typ t = typ.end_token.next0_ continue if (((isinstance(t.next0_, TextToken)) and t.next0_.next0_ is not None and t.next0_.next0_.is_char(')')) and t.next0_.chars.is_capital_upper): t = t.next0_.next0_ t1 = t continue break tok = OrgItemEngItem.try_attach(t, can_be_cyr) if (tok is None and t.is_char_of(".,") and t.next0_ is not None): tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr) if (tok is None and t.next0_.is_char_of(",.")): tok = OrgItemEngItem.try_attach(t.next0_.next0_, can_be_cyr) if (tok is not None): if (tok.length_char == 1 and t0.chars.is_cyrillic_letter): return None break if (t.is_hiphen and not t.is_whitespace_after and not t.is_whitespace_before): continue if (t.is_char_of("&+") or t.is_and): continue if (t.is_char('.')): if (t.previous is not None and t.previous.length_char == 1): continue elif (MiscHelper.can_be_start_of_sentence(t.next0_)): break if (not t.chars.is_latin_letter): if (not can_be_cyr or not t.chars.is_cyrillic_letter): break if (t.chars.is_all_lower): if (t.morph.class0_.is_preposition or t.morph.class0_.is_conjunction): continue if (br): continue break mc = t.get_morph_class_in_dictionary() if (mc.is_verb): if (t.next0_ is not None and t.next0_.morph.class0_.is_preposition): break if (t.next0_ is not None and t.next0_.is_value("OF", None)): break if (isinstance(t, TextToken)): nam_wo += 1 t1 = t if (tok is None): return None if (t0 == tok.begin_token): br2 = BracketHelper.try_parse(tok.end_token.next0_, BracketParseAttr.NO, 100) if (br2 is not None): org1 = OrganizationReferent() if (tok.short_value is not None): org1.add_type_str(tok.short_value) org1.add_type_str(tok.full_value) nam1 = MiscHelper.get_text_value(br2.begin_token, br2.end_token, GetTextAttr.NO) if (nam1 is not None): org1.add_name(nam1, True, None) return ReferentToken(org1, t0, br2.end_token) return None org0_ = OrganizationReferent() te = tok.end_token if (tok.is_bank): t1 = tok.end_token if (tok.full_value == "company" and (tok.whitespaces_after_count < 3)): tok1 = OrgItemEngItem.try_attach(tok.end_token.next0_, can_be_cyr) if (tok1 is not None): t1 = tok.end_token tok = tok1 te = tok.end_token if (tok.full_value == "company"): if (nam_wo == 0): return None nam = MiscHelper.get_text_value(t0, t1, GetTextAttr.IGNOREARTICLES) if (nam == "STOCK" and tok.full_value == "company"): return None alt_nam = None if (Utils.isNullOrEmpty(nam)): return None if (nam.find('(') > 0): i1 = nam.find('(') i2 = nam.find(')') if (i1 < i2): alt_nam = nam tai = None if ((i2 + 1) < len(nam)): tai = nam[i2:].strip() nam = nam[0:0 + i1].strip() if (tai is not None): nam = "{0} {1}".format(nam, tai) if (tok.is_bank): org0_.add_type_str( ("bank" if tok.kit.base_language.is_en else "банк")) org0_.add_profile(OrgProfile.FINANCE) if ((t1.next0_ is not None and t1.next0_.is_value("OF", None) and t1.next0_.next0_ is not None) and t1.next0_.next0_.chars.is_latin_letter): nam0 = OrgItemNameToken.try_attach(t1.next0_, None, False, False) if (nam0 is not None): te = nam0.end_token else: te = t1.next0_.next0_ nam = MiscHelper.get_text_value(t0, te, GetTextAttr.NO) if (isinstance(te.get_referent(), GeoReferent)): org0_._add_geo_object( Utils.asObjectOrNull(te.get_referent(), GeoReferent)) elif (t0 == t1): return None else: if (tok.short_value is not None): org0_.add_type_str(tok.short_value) org0_.add_type_str(tok.full_value) if (Utils.isNullOrEmpty(nam)): return None org0_.add_name(nam, True, None) if (alt_nam is not None): org0_.add_name(alt_nam, True, None) res = ReferentToken(org0_, t0, te) t = te while t.next0_ is not None: if (t.next0_.is_char_of(",.")): t = t.next0_ else: break if (t.whitespaces_after_count < 2): tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr) if (tok is not None): if (tok.short_value is not None): org0_.add_type_str(tok.short_value) org0_.add_type_str(tok.full_value) res.end_token = tok.end_token if (geo_ is not None): org0_._add_geo_object(geo_) if (add_typ is not None): org0_.add_type(add_typ, False) if (not br): return res t = res.end_token if (t.next0_ is None or t.next0_.is_char(')')): res.end_token = t.next0_ else: return None return res
def __try_parse(t: 'Token', lev: int) -> 'BookLinkToken': if (t is None or lev > 3): return None if (t.is_char('[')): re = BookLinkToken.__try_parse(t.next0_, lev + 1) if (re is not None and re.end_token.next0_ is not None and re.end_token.next0_.is_char(']')): re.begin_token = t re.end_token = re.end_token.next0_ return re if (re is not None and re.end_token.is_char(']')): re.begin_token = t return re if (re is not None): if (re.typ == BookLinkTyp.SOSTAVITEL or re.typ == BookLinkTyp.EDITORS): return re br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): if ((isinstance(br.end_token.previous, NumberToken)) and (br.length_char < 30)): return BookLinkToken._new329( t, br.end_token, BookLinkTyp.NUMBER, MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)) t0 = t if (isinstance(t, ReferentToken)): if (isinstance(t.get_referent(), PersonReferent)): return BookLinkToken.try_parse_author( t, FioTemplateType.UNDEFINED) if (isinstance(t.get_referent(), GeoReferent)): return BookLinkToken._new326(t, t, BookLinkTyp.GEO, t.get_referent()) if (isinstance(t.get_referent(), DateReferent)): dr = Utils.asObjectOrNull(t.get_referent(), DateReferent) if (len(dr.slots) == 1 and dr.year > 0): return BookLinkToken._new329(t, t, BookLinkTyp.YEAR, str(dr.year)) if (dr.year > 0 and t.previous is not None and t.previous.is_comma): return BookLinkToken._new329(t, t, BookLinkTyp.YEAR, str(dr.year)) if (isinstance(t.get_referent(), OrganizationReferent)): org0_ = Utils.asObjectOrNull(t.get_referent(), OrganizationReferent) if (org0_.kind == OrganizationKind.PRESS): return BookLinkToken._new326(t, t, BookLinkTyp.PRESS, org0_) if (isinstance(t.get_referent(), UriReferent)): uri = Utils.asObjectOrNull(t.get_referent(), UriReferent) if ((uri.scheme == "http" or uri.scheme == "https" or uri.scheme == "ftp") or uri.scheme is None): return BookLinkToken._new326(t, t, BookLinkTyp.URL, uri) tok_ = BookLinkToken.__m_termins.try_parse(t, TerminParseAttr.NO) if (tok_ is not None): typ_ = Utils.valToEnum(tok_.termin.tag, BookLinkTyp) ok = True if (typ_ == BookLinkTyp.TYPE or typ_ == BookLinkTyp.NAMETAIL or typ_ == BookLinkTyp.ELECTRONRES): if (t.previous is not None and ((t.previous.is_char_of(".:[") or t.previous.is_hiphen))): pass else: ok = False if (ok): return BookLinkToken._new329(t, tok_.end_token, typ_, tok_.termin.canonic_text) if (typ_ == BookLinkTyp.ELECTRONRES): tt = tok_.end_token.next0_ first_pass3019 = True while True: if first_pass3019: first_pass3019 = False else: tt = tt.next0_ if (not (tt is not None)): break if ((isinstance(tt, TextToken)) and not tt.chars.is_letter): continue if (isinstance(tt.get_referent(), UriReferent)): return BookLinkToken._new326(t, tt, BookLinkTyp.ELECTRONRES, tt.get_referent()) break if (t.is_char('/')): res = BookLinkToken._new329(t, t, BookLinkTyp.DELIMETER, "/") if (t.next0_ is not None and t.next0_.is_char('/')): res.end_token = t.next0_ res.value = "//" if (not t.is_whitespace_before and not t.is_whitespace_after): coo = 3 no = True tt = t.next0_ while tt is not None and coo > 0: vvv = BookLinkToken.try_parse(tt, lev + 1) if (vvv is not None and vvv.typ != BookLinkTyp.NUMBER): no = False break tt = tt.next0_ coo -= 1 if (no): return None return res if ((isinstance(t, NumberToken)) and t.int_value is not None and t.typ == NumberSpellingType.DIGIT): res = BookLinkToken._new329(t, t, BookLinkTyp.NUMBER, str(t.value)) val = t.int_value if (val >= 1930 and (val < 2030)): res.typ = BookLinkTyp.YEAR if (t.next0_ is not None and t.next0_.is_char('.')): res.end_token = t.next0_ elif ((t.next0_ is not None and t.next0_.length_char == 1 and not t.next0_.chars.is_letter) and t.next0_.is_whitespace_after): res.end_token = t.next0_ elif (isinstance(t.next0_, TextToken)): term = t.next0_.term if (((term == "СТР" or term == "C" or term == "С") or term == "P" or term == "S") or term == "PAGES"): res.end_token = t.next0_ res.typ = BookLinkTyp.PAGES res.value = str(t.value) return res if (isinstance(t, TextToken)): term = t.term if ((((( ((term == "СТР" or term == "C" or term == "С") or term == "ТОМ" or term == "T") or term == "Т" or term == "P") or term == "PP" or term == "V") or term == "VOL" or term == "S") or term == "СТОР" or t.is_value("PAGE", None)) or t.is_value("СТРАНИЦА", "СТОРІНКА")): tt = t.next0_ while tt is not None: if (tt.is_char_of(".:~")): tt = tt.next0_ else: break if (isinstance(tt, NumberToken)): res = BookLinkToken._new328(t, tt, BookLinkTyp.PAGERANGE) tt0 = tt tt1 = tt tt = tt.next0_ first_pass3020 = True while True: if first_pass3020: first_pass3020 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_char_of(",") or tt.is_hiphen): if (isinstance(tt.next0_, NumberToken)): tt = tt.next0_ res.end_token = tt tt1 = tt continue break res.value = MiscHelper.get_text_value( tt0, tt1, GetTextAttr.NO) return res if ((term == "M" or term == "М" or term == "СПБ") or term == "K" or term == "К"): if (t.next0_ is not None and t.next0_.is_char_of(":;")): re = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO) return re if (t.next0_ is not None and t.next0_.is_char_of(".")): res = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO) if (t.next0_.next0_ is not None and t.next0_.next0_.is_char_of(":;")): res.end_token = t.next0_.next0_ elif (t.next0_.next0_ is not None and (isinstance(t.next0_.next0_, NumberToken))): pass elif (t.next0_.next0_ is not None and t.next0_.next0_.is_comma and (isinstance(t.next0_.next0_.next0_, NumberToken))): pass else: return None return res if (term == "ПЕР" or term == "ПЕРЕВ" or term == "ПЕРЕВОД"): tt = t if (tt.next0_ is not None and tt.next0_.is_char('.')): tt = tt.next0_ if (tt.next0_ is not None and ((tt.next0_.is_value("C", None) or tt.next0_.is_value("С", None)))): tt = tt.next0_ if (tt.next0_ is None or tt.whitespaces_after_count > 2): return None re = BookLinkToken._new328(t, tt.next0_, BookLinkTyp.TRANSLATE) return re if (term == "ТАМ" or term == "ТАМЖЕ"): res = BookLinkToken._new328(t, t, BookLinkTyp.TAMZE) if (t.next0_ is not None and t.next0_.is_value("ЖЕ", None)): res.end_token = t.next0_ return res if (((term == "СМ" or term == "CM" or term == "НАПР") or term == "НАПРИМЕР" or term == "SEE") or term == "ПОДРОБНЕЕ" or term == "ПОДРОБНО"): res = BookLinkToken._new328(t, t, BookLinkTyp.SEE) t = t.next0_ first_pass3021 = True while True: if first_pass3021: first_pass3021 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_char_of(".:") or t.is_value("ALSO", None)): res.end_token = t continue if (t.is_value("В", None) or t.is_value("IN", None)): res.end_token = t continue vvv = BookLinkToken.__try_parse(t, lev + 1) if (vvv is not None and vvv.typ == BookLinkTyp.SEE): res.end_token = vvv.end_token break break return res if (term == "БОЛЕЕ"): vvv = BookLinkToken.__try_parse(t.next0_, lev + 1) if (vvv is not None and vvv.typ == BookLinkTyp.SEE): vvv.begin_token = t return vvv no = MiscHelper.check_number_prefix(t) if (isinstance(no, NumberToken)): return BookLinkToken._new328(t, no, BookLinkTyp.N) if (((term == "B" or term == "В")) and (isinstance(t.next0_, NumberToken)) and (isinstance(t.next0_.next0_, TextToken))): term2 = t.next0_.next0_.term if (((term2 == "Т" or term2 == "T" or term2.startswith("ТОМ")) or term2 == "TT" or term2 == "ТТ") or term2 == "КН" or term2.startswith("КНИГ")): return BookLinkToken._new328(t, t.next0_.next0_, BookLinkTyp.VOLUME) if (t.is_char('(')): if (((isinstance(t.next0_, NumberToken)) and t.next0_.int_value is not None and t.next0_.next0_ is not None) and t.next0_.next0_.is_char(')')): num = t.next0_.int_value if (num > 1900 and num <= 2040): if (num <= datetime.datetime.now().year): return BookLinkToken._new329(t, t.next0_.next0_, BookLinkTyp.YEAR, str(num)) if (((isinstance(t.next0_, ReferentToken)) and (isinstance(t.next0_.get_referent(), DateReferent)) and t.next0_.next0_ is not None) and t.next0_.next0_.is_char(')')): num = t.next0_.get_referent().year if (num > 0): return BookLinkToken._new329(t, t.next0_.next0_, BookLinkTyp.YEAR, str(num)) return None
def try_create_canonic_decree_ref_uri(t: 'Token') -> 'CanonicDecreeRefUri': if (not (isinstance(t, ReferentToken))): return None dr = Utils.asObjectOrNull(t.get_referent(), DecreeReferent) res = None if (dr is not None): if (dr.kind == DecreeKind.PUBLISHER): return None res = CanonicDecreeRefUri._new833(t.kit.sofa.text, dr, t.begin_char, t.end_char) if ((t.previous is not None and t.previous.is_char('(') and t.next0_ is not None) and t.next0_.is_char(')')): return res if (t.misc_attrs != 0): return res rt = Utils.asObjectOrNull(t, ReferentToken) if (rt.begin_token.is_char('(') and rt.end_token.is_char(')')): res = CanonicDecreeRefUri._new833( t.kit.sofa.text, dr, rt.begin_token.next0_.begin_char, rt.end_token.previous.end_char) return res next_decree_items = None if ((t.next0_ is not None and t.next0_.is_comma_and and (isinstance(t.next0_.next0_, ReferentToken))) and (isinstance(t.next0_.next0_.get_referent(), DecreeReferent))): next_decree_items = DecreeToken.try_attach_list( t.next0_.next0_.begin_token, None, 10, False) if (next_decree_items is not None and len(next_decree_items) > 1): i = 0 while i < (len(next_decree_items) - 1): if (next_decree_items[i].is_newline_after): del next_decree_items[i + 1:i + 1 + len(next_decree_items) - i - 1] break i += 1 was_typ = False was_num = False tt = t.begin_token first_pass3090 = True while True: if first_pass3090: first_pass3090 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= t.end_char)): break if (tt.begin_char == t.begin_char and tt.is_char('(') and tt.next0_ is not None): res.begin_char = tt.next0_.begin_char if (tt.is_char('(') and tt.next0_ is not None and tt.next0_.is_value("ДАЛЕЕ", None)): if (res.end_char >= tt.begin_char): res.end_char = tt.previous.end_char break if (tt.end_char == t.end_char and tt.is_char(')')): res.end_char = tt.previous.end_char tt1 = tt.previous while tt1 is not None and tt1.begin_char >= res.begin_char: if (tt1.is_char('(') and tt1.previous is not None): if (res.begin_char < tt1.previous.begin_char): res.end_char = tt1.previous.end_char tt1 = tt1.previous li = DecreeToken.try_attach_list(tt, None, 10, False) if (li is not None and len(li) > 0): ii = 0 while ii < (len(li) - 1): if (li[ii].typ == DecreeToken.ItemType.TYP and li[ii + 1].typ == DecreeToken.ItemType.TERR): res.type_with_geo = MiscHelper.get_text_value( li[ii].begin_token, li[ii + 1].end_token, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVESINGLE) ii += 1 if ((next_decree_items is not None and len(next_decree_items) > 1 and (len(next_decree_items) < len(li))) and next_decree_items[0].typ != DecreeToken.ItemType.TYP): d = len(li) - len(next_decree_items) j = 0 j = 0 while j < len(next_decree_items): if (next_decree_items[j].typ != li[d + j].typ): break j += 1 if (j >= len(next_decree_items)): del li[0:0 + d] res.begin_char = li[0].begin_char elif ( (next_decree_items is not None and len(next_decree_items) == 1 and next_decree_items[0].typ == DecreeToken.ItemType.NAME) and len(li) == 2 and li[1].typ == DecreeToken.ItemType.NAME): res.begin_char = li[1].begin_char res.end_char = li[1].end_char break elif ((next_decree_items is not None and len(next_decree_items) == 1 and next_decree_items[0].typ == DecreeToken.ItemType.NUMBER) and li[len(li) - 1].typ == DecreeToken.ItemType.NUMBER): res.begin_char = li[len(li) - 1].begin_char res.end_char = li[len(li) - 1].end_char i = 0 first_pass3091 = True while True: if first_pass3091: first_pass3091 = False else: i += 1 if (not (i < len(li))): break l_ = li[i] if (l_.begin_char > t.end_char): del li[i:i + len(li) - i] break if (l_.typ == DecreeToken.ItemType.NAME): if (not was_num): if (dr.kind == DecreeKind.CONTRACT): continue if (((i + 1) < len(li)) and ( (li[i + 1].typ == DecreeToken.ItemType.DATE or li[i + 1].typ == DecreeToken.ItemType.NUMBER))): continue ee = l_.begin_token.previous.end_char if (ee > res.begin_char and (ee < res.end_char)): res.end_char = ee break if (l_.typ == DecreeToken.ItemType.NUMBER): was_num = True if (i == 0): if (l_.typ == DecreeToken.ItemType.TYP): was_typ = True elif (l_.typ == DecreeToken.ItemType.OWNER or l_.typ == DecreeToken.ItemType.ORG): if (((i + 1) < len(li)) and ((li[1].typ == DecreeToken.ItemType.DATE or li[1].typ == DecreeToken.ItemType.NUMBER))): was_typ = True if (was_typ): tt0 = l_.begin_token.previous if (tt0 is not None and tt0.is_char('.')): tt0 = tt0.previous if (tt0 is not None and ((tt0.is_value("УТВЕРЖДЕННЫЙ", None) or tt0.is_value("УТВЕРДИТЬ", None) or tt0.is_value("УТВ", None)))): if (l_.begin_char > res.begin_char): res.begin_char = l_.begin_char if (res.end_char < res.begin_char): res.end_char = t.end_char res.is_adopted = True if (len(li) > 0): tt = li[len(li) - 1].end_token if (tt.is_char(')')): tt = tt.previous continue if (was_typ): na = DecreeToken.try_attach_name(tt, dr.typ0, True, False) if (na is not None and tt.begin_char > t.begin_char): tt1 = na.end_token.next0_ if (tt1 is not None and tt1.is_char_of(",()")): tt1 = tt1.next0_ if (tt1 is not None and (tt1.end_char < t.end_char)): if (tt1.is_value("УТВЕРЖДЕННЫЙ", None) or tt1.is_value("УТВЕРДИТЬ", None) or tt1.is_value("УТВ", None)): tt = tt1 continue if (tt.previous is not None and tt.previous.is_char(':') and na.end_char <= res.end_char): res.begin_char = tt.begin_char break if (tt.previous.end_char > res.begin_char): res.end_char = tt.previous.end_char break return res dpr = Utils.asObjectOrNull(t.get_referent(), DecreePartReferent) if (dpr is None): return None if ((t.previous is not None and t.previous.is_hiphen and (isinstance(t.previous.previous, ReferentToken))) and (isinstance( t.previous.previous.get_referent(), DecreePartReferent))): if (DecreePartReferent.create_range_referent( Utils.asObjectOrNull(t.previous.previous.get_referent(), DecreePartReferent), dpr) is not None): return None t1 = t has_diap = False diap_ref = None if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, ReferentToken))) and (isinstance(t.next0_.next0_.get_referent(), DecreePartReferent))): diap = DecreePartReferent.create_range_referent( Utils.asObjectOrNull(dpr, DecreePartReferent), Utils.asObjectOrNull(t.next0_.next0_.get_referent(), DecreePartReferent)) if (diap is not None): dpr = diap has_diap = True t1 = t.next0_.next0_ diap_ref = (Utils.asObjectOrNull(t1, ReferentToken)) res = CanonicDecreeRefUri._new835(t.kit.sofa.text, dpr, t.begin_char, t1.end_char, has_diap) if ((t.previous is not None and t.previous.is_char('(') and t1.next0_ is not None) and t1.next0_.is_char(')')): return res tt = t.begin_token while tt is not None and tt.end_char <= t.end_char: if (isinstance(tt.get_referent(), DecreeReferent)): if (tt.begin_char > t.begin_char): res.end_char = tt.previous.end_char if (tt.previous.morph.class0_.is_preposition and tt.previous.previous is not None): res.end_char = tt.previous.previous.end_char elif (tt.end_char < t.end_char): res.begin_char = tt.begin_char break tt = tt.next0_ has_same_before = DecreeHelper.__has_same_decree(t, dpr, True) has_same_after = DecreeHelper.__has_same_decree(t, dpr, False) ptmin = PartToken.ItemType.PREFIX ptmin2 = PartToken.ItemType.PREFIX max0_ = 0 max2 = 0 for s in dpr.slots: pt = PartToken._get_type_by_attr_name(s.type_name) if (pt == PartToken.ItemType.PREFIX): continue co = PartToken._get_rank(pt) if (co < 1): if (pt == PartToken.ItemType.PART and dpr.find_slot(DecreePartReferent.ATTR_CLAUSE, None, True) is not None): co = PartToken._get_rank(PartToken.ItemType.PARAGRAPH) else: continue if (co > max0_): max2 = max0_ ptmin2 = ptmin max0_ = co ptmin = pt elif (co > max2): max2 = co ptmin2 = pt if (ptmin != PartToken.ItemType.PREFIX): tt = t.begin_token while tt is not None and tt.end_char <= res.end_char: if (tt.begin_char >= res.begin_char): pt = PartToken.try_attach(tt, None, False, False) if (pt is not None and pt.typ == ptmin): res.begin_char = pt.begin_char res.end_char = pt.end_char if (pt.typ == PartToken.ItemType.APPENDIX and pt.end_token.is_value("К", None) and pt.begin_token != pt.end_token): res.end_char = pt.end_token.previous.end_char if (pt.end_char == t.end_char): if ((t.next0_ is not None and t.next0_.is_comma_and and (isinstance(t.next0_.next0_, ReferentToken))) and (isinstance(t.next0_.next0_.get_referent(), DecreePartReferent))): tt1 = t.next0_.next0_.begin_token ok = True if (tt1.chars.is_letter): ok = False if (ok): for v in pt.values: res.begin_char = v.begin_char res.end_char = v.end_char break if (not has_diap): return res break tt = tt.next0_ if (has_diap and diap_ref is not None): tt = diap_ref.begin_token while tt is not None and tt.end_char <= diap_ref.end_char: if (tt.is_char(',')): break if (tt != diap_ref.begin_token and tt.is_whitespace_before): break res.end_char = tt.end_char tt = tt.next0_ return res if (((has_same_before or has_same_after)) and ptmin != PartToken.ItemType.PREFIX): tt = t.begin_token first_pass3092 = True while True: if first_pass3092: first_pass3092 = False else: tt = tt.next0_ if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.begin_char >= res.begin_char): pt = (PartToken.try_attach(tt, None, False, False) if not has_same_before else None) if (pt is not None): if (pt.typ == ptmin): for v in pt.values: res.begin_char = v.begin_char res.end_char = v.end_char return res tt = pt.end_token continue if ((isinstance(tt, NumberToken)) and tt.begin_char == res.begin_char): res.end_char = tt.end_char while tt is not None and tt.next0_ is not None: if (not tt.next0_.is_char('.') or tt.is_whitespace_after or tt.next0_.is_whitespace_after): break if (not (isinstance(tt.next0_.next0_, NumberToken))): break tt = tt.next0_.next0_ res.end_char = tt.end_char if (tt.next0_ is not None and tt.next0_.is_hiphen): if (isinstance(tt.next0_.next0_, NumberToken)): tt = tt.next0_.next0_ res.end_char = tt.end_char while tt is not None and tt.next0_ is not None: if (not tt.next0_.is_char('.') or tt.is_whitespace_after or tt.next0_.is_whitespace_after): break if (not (isinstance( tt.next0_.next0_, NumberToken))): break tt = tt.next0_.next0_ res.end_char = tt.end_char elif (tt.next0_.next0_ is not None and (isinstance(tt.next0_.next0_.get_referent(), DecreePartReferent)) and has_diap): res.end_char = tt.next0_.next0_.begin_token.end_char return res if (BracketHelper.can_be_start_of_sequence( tt, True, False) and tt.begin_char == res.begin_char and has_same_before): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None and br.end_token.previous == tt.next0_): res.end_char = br.end_char return res return res if (not has_same_before and not has_same_after and ptmin != PartToken.ItemType.PREFIX): tt = t.begin_token while tt is not None and tt.end_char <= res.end_char: if (tt.begin_char >= res.begin_char): pts = PartToken.try_attach_list(tt, False, 40) if (pts is None or len(pts) == 0): break i = 0 while i < len(pts): if (pts[i].typ == ptmin): res.begin_char = pts[i].begin_char res.end_char = pts[i].end_char tt = pts[i].end_token if (tt.next0_ is not None and tt.next0_.is_hiphen): if (isinstance(tt.next0_.next0_, NumberToken)): res.end_char = tt.next0_.next0_.end_char elif (tt.next0_.next0_ is not None and (isinstance( tt.next0_.next0_.get_referent(), DecreePartReferent)) and has_diap): res.end_char = tt.next0_.next0_.begin_token.end_char return res i += 1 tt = tt.next0_ return res
def __try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken': if (t is None): return None if (BracketHelper.is_bracket(t, True)): wit = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, attach_high) if (wit is not None): if (wit.end_token.next0_ is None): wit.begin_token = t return wit if (BracketHelper.is_bracket(wit.end_token.next0_, True)): wit.begin_token = t wit.end_token = wit.end_token.next0_ return wit tok = WeaponItemToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO) if (tok is not None): res = WeaponItemToken(t, tok.end_token) res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs)) if (res.typ == WeaponItemToken.Typs.NOUN): res.value = tok.termin.canonic_text if (tok.termin.tag2 is not None): res.is_doubt = True tt = res.end_token.next0_ first_pass3426 = True while True: if first_pass3426: first_pass3426 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.whitespaces_before_count > 2): break wit = WeaponItemToken.__try_parse(tt, None, False, False) if (wit is not None): if (wit.typ == WeaponItemToken.Typs.BRAND): res.__inner_tokens.append(wit) tt = wit.end_token res.end_token = tt continue break if (not (isinstance(tt, TextToken))): break mc = tt.get_morph_class_in_dictionary() if (mc == MorphClass.ADJECTIVE): if (res.alt_value is None): res.alt_value = res.value if (res.alt_value.endswith(res.value)): res.alt_value = res.alt_value[0:0+len(res.alt_value) - len(res.value)] res.alt_value = "{0}{1} {2}".format(res.alt_value, tt.term, res.value) res.end_token = tt continue break return res if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): res.value = tok.termin.canonic_text return res if (res.typ == WeaponItemToken.Typs.MODEL): res.value = tok.termin.canonic_text if (isinstance(tok.termin.tag2, list)): li = Utils.asObjectOrNull(tok.termin.tag2, list) for to in li: wit = WeaponItemToken._new2758(t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token) res.__inner_tokens.append(wit) if (to.additional_vars is not None and len(to.additional_vars) > 0): wit.alt_value = to.additional_vars[0].canonic_text res.__correct_model() return res nnn = MiscHelper.check_number_prefix(t) if (nnn is not None): tit = TransItemToken._attach_number(nnn, True) if (tit is not None): res = WeaponItemToken._new2759(t, tit.end_token, WeaponItemToken.Typs.NUMBER) res.value = tit.value res.alt_value = tit.alt_value return res if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.is_char('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): res = WeaponItemToken._new2760(t, t.next0_, WeaponItemToken.Typs.MODEL, True) res.value = t.term res.__correct_model() return res if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.MODEL, True) res.value = t.term res.__correct_model() return res if (t.term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): pp = WeaponItemToken.__try_parse(t.next0_, None, False, False) if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): res = WeaponItemToken._new2759(t, t, WeaponItemToken.Typs.NOUN) res.value = "ПИСТОЛЕТ" res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ" return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): ok = False if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): ok = True elif (prev is None and t.previous is not None and t.previous.is_comma_and): ok = True if (ok): res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.NAME, True) res.value = t.term if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): res.value = "{0}-{1}".format(res.value, t.next0_.next0_.term) res.end_token = t.next0_.next0_ if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): res.typ = WeaponItemToken.Typs.BRAND if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correct_model() elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): res.typ = WeaponItemToken.Typs.MODEL res.__correct_model() return res if (t.is_value("МАРКА", None)): res = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, False) if (res is not None and res.typ == WeaponItemToken.Typs.BRAND): res.begin_token = t return res if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100) if (br is not None): return WeaponItemToken._new2764(t, br.end_token, WeaponItemToken.Typs.BRAND, MiscHelper.get_text_value(br.begin_token, br.end_token, GetTextAttr.NO)) if (((isinstance(t, TextToken)) and (isinstance(t.next0_, TextToken)) and t.next0_.length_char > 1) and not t.next0_.chars.is_all_lower): return WeaponItemToken._new2764(t, t.next0_, WeaponItemToken.Typs.BRAND, t.term) if (t.is_value("КАЛИБР", "КАЛІБР")): tt1 = t.next0_ if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): tt1 = tt1.next0_ num = NumbersWithUnitToken.try_parse(tt1, None, False, False, False, False) if (num is not None and num.single_val is not None): return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (isinstance(t, NumberToken)): num = NumbersWithUnitToken.try_parse(t, None, False, False, False, False) if (num is not None and num.single_val is not None): if (len(num.units) == 1 and num.units[0].unit is not None and num.units[0].unit.name_cyr == "мм"): return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (num.end_token.next0_ is not None and num.end_token.next0_.is_value("КАЛИБР", "КАЛІБР")): return WeaponItemToken._new2764(t, num.end_token.next0_, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val)) if (t.is_value("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")): tt1 = t.next0_ if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): tt1 = tt1.next0_ if (isinstance(tt1, ReferentToken)): if ((isinstance(tt1.get_referent(), OrganizationReferent)) or (isinstance(tt1.get_referent(), GeoReferent))): return WeaponItemToken._new2769(t, tt1, WeaponItemToken.Typs.DEVELOPER, tt1.get_referent()) return None
def __try_parse_ru(first: 'Token', typ: 'NounPhraseParseAttr', max_char_pos: int, def_noun: 'NounPhraseItem' = None) -> 'NounPhraseToken': if (first is None): return None items = None adverbs = None prep = None kak = False t0 = first if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) != (NounPhraseParseAttr.NO) and t0.is_value("КАК", None)): t0 = t0.next0_ prep = PrepositionHelper.try_parse(t0) if (prep is not None): t0 = prep.end_token.next0_ kak = True internal_noun_prase = None conj_before = False t = t0 first_pass3041 = True while True: if first_pass3041: first_pass3041 = False else: t = t.next0_ if (not (t is not None)): break if (max_char_pos > 0 and t.begin_char > max_char_pos): break if ((t.morph.class0_.is_conjunction and not t.morph.class0_.is_adjective and not t.morph.class0_.is_pronoun) and not t.morph.class0_.is_noun): if (conj_before): break if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) != (NounPhraseParseAttr.NO)): break if (items is not None and ((t.is_and or t.is_or))): conj_before = True if ((t.next0_ is not None and t.next0_.is_char_of("\\/") and t.next0_.next0_ is not None) and t.next0_.next0_.is_or): t = t.next0_.next0_ if (((t.next0_ is not None and t.next0_.is_char('(') and t.next0_.next0_ is not None) and t.next0_.next0_.is_or and t.next0_.next0_.next0_ is not None) and t.next0_.next0_.next0_.is_char(')')): t = t.next0_.next0_.next0_ continue break elif (t.is_comma): if (conj_before or items is None): break if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) != (NounPhraseParseAttr.NO)): break mc = t.previous.get_morph_class_in_dictionary() if (mc.is_proper_surname or mc.is_proper_secname): break conj_before = True if (kak and t.next0_ is not None and t.next0_.is_value("ТАК", None)): t = t.next0_ if (t.next0_ is not None and t.next0_.is_and): t = t.next0_ pr = PrepositionHelper.try_parse(t.next0_) if (pr is not None): t = pr.end_token if (items[len(items) - 1].can_be_noun and items[len(items) - 1].end_token.morph.class0_.is_pronoun): break continue elif (t.is_char('(')): if (items is None): return None brr = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (brr is None): break if (brr.length_char > 100): break t = brr.end_token continue if (isinstance(t, ReferentToken)): if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == ( NounPhraseParseAttr.NO)): break elif (t.chars.is_latin_letter): break it = NounPhraseItem.try_parse(t, items, typ) if (it is None or ((not it.can_be_adj and not it.can_be_noun))): if (((it is not None and items is not None and t.chars.is_capital_upper) and (t.whitespaces_before_count < 3) and t.length_char > 3) and not t.get_morph_class_in_dictionary().is_noun and not t.get_morph_class_in_dictionary().is_adjective): it.can_be_noun = True items.append(it) break if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) != (NounPhraseParseAttr.NO) and (isinstance(t, TextToken)) and t.morph.class0_.is_adverb): if (adverbs is None): adverbs = list() adverbs.append(Utils.asObjectOrNull(t, TextToken)) continue break it.conj_before = conj_before conj_before = False if (not it.can_be_adj and not it.can_be_noun): break if (t.is_newline_before and t != first): if ((((typ) & (NounPhraseParseAttr.MULTILINES))) != (NounPhraseParseAttr.NO)): pass elif (items is not None and t.chars != items[len(items) - 1].chars): if (t.chars.is_all_lower and items[len(items) - 1].chars.is_capital_upper): pass else: break if (items is None): items = list() else: it0 = items[len(items) - 1] if (it0.can_be_noun and it0.is_personal_pronoun): if (it.is_pronoun): break if ((it0.begin_token.previous is not None and it0.begin_token.previous. get_morph_class_in_dictionary().is_verb and not it0.begin_token.previous. get_morph_class_in_dictionary().is_adjective) and not it0.begin_token.previous. get_morph_class_in_dictionary().is_preposition): if (t.morph.case_.is_nominative or t.morph.case_.is_accusative): pass else: break if (it.can_be_noun and it.is_verb): if (it0.previous is None): pass elif ((isinstance(it0.previous, TextToken)) and not it0.previous.chars.is_letter): pass else: break items.append(it) t = it.end_token if (t.is_newline_after and not t.chars.is_all_lower): mc = t.get_morph_class_in_dictionary() if (mc.is_proper_surname): break if (t.morph.class0_.is_proper_surname and mc.is_undefined): break if (items is None): return None tt1 = None if (len(items) == 1 and items[0].can_be_adj): and0_ = False tt1 = items[0].end_token.next0_ first_pass3042 = True while True: if first_pass3042: first_pass3042 = False else: tt1 = tt1.next0_ if (not (tt1 is not None)): break if (tt1.is_and or tt1.is_or): and0_ = True break if (tt1.is_comma or tt1.is_value("НО", None) or tt1.is_value("ТАК", None)): continue break if (and0_): if (items[0].can_be_noun and items[0].is_personal_pronoun): and0_ = False if (and0_): tt2 = tt1.next0_ if (tt2 is not None and tt2.morph.class0_.is_preposition): tt2 = tt2.next0_ npt1 = _NounPraseHelperInt.__try_parse_ru( tt2, typ, max_char_pos, None) if (npt1 is not None and len(npt1.adjectives) > 0): ok1 = False for av in items[0].adj_morph: for v in npt1.noun.noun_morph: if (v.check_accord(av, False, False)): items[0].morph.add_item(av) ok1 = True if (ok1): npt1.begin_token = items[0].begin_token npt1.end_token = tt1.previous npt1.adjectives.clear() npt1.adjectives.append(items[0]) return npt1 if (def_noun is not None): items.append(def_noun) last1 = items[len(items) - 1] check = True for it in items: if (not it.can_be_adj): check = False break elif (it.can_be_noun and it.is_personal_pronoun): check = False break tt1 = last1.end_token.next0_ if ((tt1 is not None and check and ((tt1.morph.class0_.is_preposition or tt1.morph.case_.is_instrumental))) and (tt1.whitespaces_before_count < 2)): inp = NounPhraseHelper.try_parse( tt1, Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), max_char_pos, None) if (inp is not None): tt1 = inp.end_token.next0_ npt1 = _NounPraseHelperInt.__try_parse_ru( tt1, typ, max_char_pos, None) if (npt1 is not None): ok = True ii = 0 first_pass3043 = True while True: if first_pass3043: first_pass3043 = False else: ii += 1 if (not (ii < len(items))): break it = items[ii] if (NounPhraseItem.try_accord_adj_and_noun( it, Utils.asObjectOrNull(npt1.noun, NounPhraseItem))): continue if (ii > 0): inp2 = NounPhraseHelper.try_parse( it.begin_token, typ, max_char_pos, None) if (inp2 is not None and inp2.end_token == inp.end_token): del items[ii:ii + len(items) - ii] inp = inp2 break ok = False break if (ok): if (npt1.morph.case_.is_genitive and not inp.morph.case_.is_instrumental): ok = False if (ok): i = 0 while i < len(items): npt1.adjectives.insert(i, items[i]) i += 1 npt1.internal_noun = inp mmm = MorphCollection(npt1.morph) for it in items: mmm.remove_items(it.adj_morph[0], False) if (mmm.gender != MorphGender.UNDEFINED or mmm.number != MorphNumber.UNDEFINED or not mmm.case_.is_undefined): npt1.morph = mmm if (adverbs is not None): if (npt1.adverbs is None): npt1.adverbs = adverbs else: npt1.adverbs[0:0] = adverbs npt1.begin_token = first return npt1 if (tt1 is not None and tt1.morph.class0_.is_noun and not tt1.morph.case_.is_genitive): it = NounPhraseItem.try_parse(tt1, items, typ) if (it is not None and it.can_be_noun): internal_noun_prase = inp inp.begin_token = items[0].end_token.next0_ items.append(it) i = 0 first_pass3044 = True while True: if first_pass3044: first_pass3044 = False else: i += 1 if (not (i < len(items))): break if (items[i].can_be_adj and items[i].begin_token.morph.class0_.is_verb): it = items[i].begin_token if (not it.get_morph_class_in_dictionary().is_verb): continue if (it.is_value("УПОЛНОМОЧЕННЫЙ", None)): continue if ((((typ) & (NounPhraseParseAttr.PARSEVERBS))) == ( NounPhraseParseAttr.NO)): continue inp = _NounPraseHelperInt.__try_parse_ru( items[i].end_token.next0_, NounPhraseParseAttr.NO, max_char_pos, None) if (inp is None): continue if (inp.anafor is not None and i == (len(items) - 1) and NounPhraseItem.try_accord_adj_and_noun( items[i], Utils.asObjectOrNull(inp.noun, NounPhraseItem))): inp.begin_token = first ii = 0 while ii < len(items): inp.adjectives.insert(ii, items[ii]) ii += 1 return inp if (inp.end_token.whitespaces_after_count > 3): continue npt1 = _NounPraseHelperInt.__try_parse_ru( inp.end_token.next0_, NounPhraseParseAttr.NO, max_char_pos, None) if (npt1 is None): continue ok = True j = 0 while j <= i: if (not NounPhraseItem.try_accord_adj_and_noun( items[j], Utils.asObjectOrNull(npt1.noun, NounPhraseItem))): ok = False break j += 1 if (not ok): continue verb = VerbPhraseHelper.try_parse(it, True, False, False) if (verb is None): continue vlinks = SemanticHelper.try_create_links(verb, inp, None) nlinks = SemanticHelper.try_create_links(inp, npt1, None) if (len(vlinks) == 0 and len(nlinks) > 0): continue j = 0 while j <= i: npt1.adjectives.insert(j, items[j]) j += 1 items[i].end_token = inp.end_token mmm = MorphCollection(npt1.morph) bil = list() j = 0 while j <= i: bil.clear() for m in items[j].adj_morph: bil.append(m) mmm.remove_items_list_cla(bil, None) j += 1 if (mmm.gender != MorphGender.UNDEFINED or mmm.number != MorphNumber.UNDEFINED or not mmm.case_.is_undefined): npt1.morph = mmm if (adverbs is not None): if (npt1.adverbs is None): npt1.adverbs = adverbs else: npt1.adverbs[0:0] = adverbs npt1.begin_token = first return npt1 ok2 = False if ((len(items) == 1 and (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) != (NounPhraseParseAttr.NO) and (items[0].whitespaces_after_count < 3)) and not items[0].is_adverb): if (not items[0].can_be_adj): ok2 = True elif (items[0].is_personal_pronoun and items[0].can_be_noun): ok2 = True if (ok2): it = NounPhraseItem.try_parse(items[0].end_token.next0_, None, typ) if (it is not None and it.can_be_adj and it.begin_token.chars.is_all_lower): ok2 = True if (it.is_adverb or it.is_verb): ok2 = False if (it.is_pronoun and items[0].is_pronoun): ok2 = False if (it.can_be_adj_for_personal_pronoun and items[0].is_personal_pronoun): ok2 = True if (ok2 and NounPhraseItem.try_accord_adj_and_noun( it, items[0])): npt1 = _NounPraseHelperInt.__try_parse_ru( it.begin_token, typ, max_char_pos, None) if (npt1 is not None and ((npt1.end_char > it.end_char or len(npt1.adjectives) > 0))): pass else: items.insert(0, it) noun = None adj_after = None for i in range(len(items) - 1, -1, -1): if (items[i].can_be_noun): if (items[i].conj_before): continue if (i > 0 and not items[i - 1].can_be_adj): continue if (i > 0 and items[i - 1].can_be_noun): if (items[i - 1].is_doubt_adjective): continue if (items[i - 1].is_pronoun and items[i].is_pronoun): if (items[i].is_pronoun and items[i - 1].can_be_adj_for_personal_pronoun): pass else: continue noun = items[i] del items[i:i + len(items) - i] if (adj_after is not None): items.append(adj_after) elif (len(items) > 0 and items[0].can_be_noun and not items[0].can_be_adj): noun = items[0] items.clear() break if (noun is None): return None res = NounPhraseToken._new466(first, noun.end_token, prep) if (adverbs is not None): for a in adverbs: if (a.begin_char < noun.begin_char): if (len(items) == 0 and prep is None): return None if (res.adverbs is None): res.adverbs = list() res.adverbs.append(a) res.noun = (noun) res.multi_nouns = noun.multi_nouns if (kak): res.multi_nouns = True res.internal_noun = internal_noun_prase for v in noun.noun_morph: noun.morph.add_item(v) res.morph = noun.morph if (res.morph.case_.is_nominative and first.previous is not None and first.previous.morph.class0_.is_preposition): res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO) and ((res.morph.class0_.is_pronoun or res.morph.class0_.is_personal_pronoun))): return None stat = None if (len(items) > 1): stat = dict() need_update_morph = False if (len(items) > 0): ok_list = list() is_num_not = False for vv in noun.noun_morph: i = 0 v = vv i = 0 while i < len(items): ok = False for av in items[i].adj_morph: if (v.check_accord(av, False, False)): ok = True if (not ((av.case_) & v.case_).is_undefined and av.case_ != v.case_): v.case_ = av.case_ = (av.case_) & v.case_ break if (not ok): if (items[i].can_be_numeric_adj and items[i].try_accord_var(v, False)): ok = True v1 = NounPhraseItemTextVar() v1.copy_from_item(v) v1.number = MorphNumber.PLURAL is_num_not = True v1.case_ = MorphCase() for a in items[i].adj_morph: v1.case_ = (v1.case_) | a.case_ v = v1 else: break i += 1 if (i >= len(items)): ok_list.append(v) if (len(ok_list) > 0 and (((len(ok_list) < res.morph.items_count) or is_num_not))): res.morph = MorphCollection() for v in ok_list: res.morph.add_item(v) if (not is_num_not): noun.morph = res.morph i = 0 first_pass3045 = True while True: if first_pass3045: first_pass3045 = False else: i += 1 if (not (i < len(items))): break for av in items[i].adj_morph: for v in noun.noun_morph: if (v.check_accord(av, False, False)): if (not ((av.case_) & v.case_).is_undefined and av.case_ != v.case_): v.case_ = av.case_ = (av.case_) & v.case_ need_update_morph = True items[i].morph.add_item(av) if (stat is not None and av.normal_value is not None and len(av.normal_value) > 1): last = av.normal_value[len(av.normal_value) - 1] if (not last in stat): stat[last] = 1 else: stat[last] += 1 if (items[i].is_pronoun or items[i].is_personal_pronoun): res.anafor = items[i].begin_token if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == ( NounPhraseParseAttr.NO)): continue tt = Utils.asObjectOrNull(items[i].begin_token, TextToken) if (tt is not None and not tt.term.startswith("ВЫСШ")): err = False for wf in tt.morph.items: if (wf.class0_.is_adjective): if (wf.contains_attr("прев.", None)): if ((((typ) & (NounPhraseParseAttr.IGNOREADJBEST))) != (NounPhraseParseAttr.NO)): err = True if (wf.contains_attr("к.ф.", None) and tt.morph.class0_.is_personal_pronoun): return None if (err): continue if (res.morph.case_.is_nominative): v = MiscHelper.get_text_value_of_meta_token( items[i], GetTextAttr.KEEPQUOTES) if (not Utils.isNullOrEmpty(v)): if (items[i].get_normal_case_text( None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) != v): wf = NounPhraseItemTextVar(items[i].morph, None) wf.normal_value = v wf.class0_ = MorphClass.ADJECTIVE wf.case_ = res.morph.case_ if (res.morph.case_.is_prepositional or res.morph.gender == MorphGender.NEUTER or res.morph.gender == MorphGender.FEMINIE): items[i].morph.add_item(wf) else: items[i].morph.insert_item(0, wf) res.adjectives.append(items[i]) if (items[i].end_char > res.end_char): res.end_token = items[i].end_token i = 0 first_pass3046 = True while True: if first_pass3046: first_pass3046 = False else: i += 1 if (not (i < (len(res.adjectives) - 1))): break if (res.adjectives[i].whitespaces_after_count > 5): if (res.adjectives[i].chars != res.adjectives[i + 1].chars): if (not res.adjectives[i + 1].chars.is_all_lower): return None if (res.adjectives[i].chars.is_all_upper and res.adjectives[i + 1].chars.is_capital_upper): return None if (res.adjectives[i].chars.is_capital_upper and res.adjectives[i + 1].chars.is_all_upper): return None if (res.adjectives[i].whitespaces_after_count > 10): if (res.adjectives[i].newlines_after_count == 1): if (res.adjectives[i].chars.is_capital_upper and i == 0 and res.adjectives[i + 1].chars.is_all_lower): continue if (res.adjectives[i].chars == res.adjectives[ i + 1].chars): continue return None if (need_update_morph): noun.morph = MorphCollection() for v in noun.noun_morph: noun.morph.add_item(v) res.morph = noun.morph if (len(res.adjectives) > 0): if (noun.begin_token.previous is not None): if (noun.begin_token.previous.is_comma_and): if (res.adjectives[0].begin_char > noun.begin_char): pass else: return None zap = 0 and0_ = 0 cou = 0 last_and = False i = 0 while i < (len(res.adjectives) - 1): te = res.adjectives[i].end_token.next0_ if (te is None): return None if (te.is_char('(')): pass elif (te.is_comma): zap += 1 last_and = False elif (te.is_and or te.is_or): and0_ += 1 last_and = True if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun ): cou += 1 i += 1 if ((zap + and0_) > 0): if (and0_ > 1): return None elif (and0_ == 1 and not last_and): return None if ((zap + and0_) != cou): if (and0_ == 1): pass else: return None last = Utils.asObjectOrNull( res.adjectives[len(res.adjectives) - 1], NounPhraseItem) if (last.is_pronoun and not last_and): return None if (stat is not None): for adj in items: if (adj.morph.items_count > 1): w1 = Utils.asObjectOrNull(adj.morph.get_indexer_item(0), NounPhraseItemTextVar) w2 = Utils.asObjectOrNull(adj.morph.get_indexer_item(1), NounPhraseItemTextVar) if ((len(w1.normal_value) < 2) or (len(w2.normal_value) < 2)): break l1 = w1.normal_value[len(w1.normal_value) - 1] l2 = w2.normal_value[len(w2.normal_value) - 1] i1 = 0 i2 = 0 wrapi1468 = RefOutArgWrapper(0) Utils.tryGetValue(stat, l1, wrapi1468) i1 = wrapi1468.value wrapi2467 = RefOutArgWrapper(0) Utils.tryGetValue(stat, l2, wrapi2467) i2 = wrapi2467.value if (i1 < i2): adj.morph.remove_item(1) adj.morph.insert_item(0, w2) if (res.begin_token.get_morph_class_in_dictionary().is_verb and len(items) > 0): if (not res.begin_token.chars.is_all_lower or res.begin_token.previous is None): pass elif (res.begin_token.previous.morph.class0_.is_preposition): pass else: comma = False tt = res.begin_token.previous first_pass3047 = True while True: if first_pass3047: first_pass3047 = False else: tt = tt.previous if (not (tt is not None and tt.end_char <= res.end_char)): break if (tt.morph.class0_.is_adverb): continue if (tt.is_char_of(".;")): break if (tt.is_comma): comma = True continue if (tt.is_value("НЕ", None)): continue if (((tt.morph.class0_.is_noun or tt.morph.class0_.is_proper)) and comma): for it in res.begin_token.morph.items: if (it.class0_.is_verb and (isinstance(it, MorphWordForm))): if (tt.morph.check_accord(it, False, False)): if (res.morph.case_.is_instrumental): return None break if (res.begin_token == res.end_token): mc = res.begin_token.get_morph_class_in_dictionary() if (mc.is_adverb): if (res.begin_token.previous is not None and res.begin_token.previous.morph.class0_.is_preposition): pass elif (mc.is_noun and not mc.is_preposition and not mc.is_conjunction): pass elif (res.begin_token.is_value("ВЕСЬ", None)): pass else: return None if (def_noun is not None and def_noun.end_token == res.end_token and len(res.adjectives) > 0): res.end_token = res.adjectives[len(res.adjectives) - 1].end_token return res
def __try_parse(t: 'Token', is_in_lit: bool, max_char: int = 0) -> typing.List['ReferentToken']: if (t is None): return None is_bracket_regime = False if (t.previous is not None and t.previous.is_char('(')): is_bracket_regime = True blt = BookLinkToken.try_parse(t, 0) if (blt is None): blt = BookLinkToken.try_parse_author(t, FioTemplateType.UNDEFINED) if (blt is None and not is_bracket_regime): return None t0 = t coef = 0 is_electr_res = False decree = None regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED num = None spec_see = None book_prev = None if (is_bracket_regime): regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS elif (blt.typ == BookLinkTyp.PERSON): if (not is_in_lit): return None regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS elif (blt.typ == BookLinkTyp.NUMBER): num = blt.value t = blt.end_token.next0_ if (t is None or t.is_newline_before): return None if (not t.is_whitespace_before): if (isinstance(t, NumberToken)): n = t.value if ((((n == "3" or n == "0")) and not t.is_whitespace_after and (isinstance(t.next0_, TextToken))) and t.next0_.chars.is_all_lower): pass else: return None elif (not (isinstance(t, TextToken)) or t.chars.is_all_lower): r = t.get_referent() if (isinstance(r, PersonReferent)): pass elif (is_in_lit and r is not None and r.type_name == "DECREE"): pass else: return None first_pass3025 = True while True: if first_pass3025: first_pass3025 = False else: t = t.next0_ if (not (t is not None)): break if (isinstance(t, NumberToken)): break if (not (isinstance(t, TextToken))): break if (BracketHelper.can_be_start_of_sequence(t, True, False)): break if (not t.chars.is_letter): continue bbb = BookLinkToken.try_parse(t, 0) if (bbb is not None): if (bbb.typ == BookLinkTyp.TAMZE): spec_see = bbb t = bbb.end_token.next0_ break if (bbb.typ == BookLinkTyp.SEE): t = bbb.end_token continue break if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE): coef += 1 max0_ = 1000 tt = t0 while tt is not None and max0_ > 0: if (isinstance(tt.get_referent(), BookLinkRefReferent)): book_prev = tt.get_referent().book break tt = tt.previous max0_ -= 1 blt1 = BookLinkToken.try_parse_author(t, FioTemplateType.UNDEFINED) if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON): regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS else: ok = False tt = t first_pass3026 = True while True: if first_pass3026: first_pass3026 = False else: tt = (None if tt is None else tt.next0_) if (not (tt is not None)): break if (tt.is_newline_before): break if (is_in_lit and tt.get_referent() is not None and tt.get_referent().type_name == "DECREE"): ok = True decree = tt break bbb = BookLinkToken.try_parse(tt, 0) if (bbb is None): continue if (bbb.typ == BookLinkTyp.ELECTRONRES): is_electr_res = True ok = True break if (bbb.typ == BookLinkTyp.DELIMETER): tt = bbb.end_token.next0_ if (BookLinkToken.try_parse_author( tt, FioTemplateType.UNDEFINED) is not None): ok = True break bbb = BookLinkToken.try_parse(tt, 0) if (bbb is not None): if (bbb.typ == BookLinkTyp.EDITORS or bbb.typ == BookLinkTyp.TRANSLATE or bbb.typ == BookLinkTyp.SOSTAVITEL): ok = True break if (not ok and not is_in_lit): if (BookLinkToken.check_link_before(t0, num)): pass else: return None regtyp = BookLinkAnalyzer.RegionTyp.NAME else: return None res = BookLinkReferent() corr_authors = list() t00 = t blt00 = None start_of_name = None prev_pers_templ = FioTemplateType.UNDEFINED if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS): first_pass3027 = True while True: if first_pass3027: first_pass3027 = False else: t = t.next0_ if (not (t is not None)): break if (max_char > 0 and t.begin_char >= max_char): break if (t.is_char_of(".;") or t.is_comma_and): continue if (t.is_char('/')): break if ((t.is_char('(') and t.next0_ is not None and t.next0_.is_value("EDS", None)) and t.next0_.next0_ is not None and t.next0_.next0_.is_char(')')): t = t.next0_.next0_.next0_ break blt = BookLinkToken.try_parse_author(t, prev_pers_templ) if (blt is None and t.previous is not None and t.previous.is_and): blt = BookLinkToken.try_parse_author( t.previous, FioTemplateType.UNDEFINED) if (blt is None): if ((isinstance(t.get_referent(), OrganizationReferent)) and blt00 is not None): bbb2 = BookLinkToken.try_parse(t.next0_, 0) if (bbb2 is not None): if (bbb2.typ == BookLinkTyp.YEAR): res.add_slot(BookLinkReferent.ATTR_AUTHOR, t.get_referent(), False, 0) res.year = int(bbb2.value) coef += 0.5 t = bbb2.end_token.next0_ break if (blt.typ == BookLinkTyp.PERSON): tt2 = blt.end_token.next0_ bbb2 = BookLinkToken.try_parse(tt2, 0) if (bbb2 is not None): if (bbb2.typ == BookLinkTyp.YEAR): res.year = int(bbb2.value) coef += 0.5 blt.end_token = bbb2.end_token blt00 = (None) if (blt00 is not None and ((blt00.end_token.next0_ == blt.begin_token or blt.begin_token.previous.is_char('.')))): tt11 = blt.end_token.next0_ nex = BookLinkToken.try_parse(tt11, 0) if (nex is not None and nex.typ == BookLinkTyp.ANDOTHERS): pass else: if (tt11 is None): break if (tt11.is_char('/') and tt11.next0_ is not None and tt11.next0_.is_char('/')): break if (tt11.is_char(':')): break if ((str(blt).find('.') < 0) and str(blt00).find('.') > 0): break if ((isinstance(tt11, TextToken)) and tt11.chars.is_all_lower): break if (tt11.is_char_of(",.;") and tt11.next0_ is not None): tt11 = tt11.next0_ nex = BookLinkToken.try_parse(tt11, 0) if (nex is not None and nex.typ != BookLinkTyp.PERSON and nex.typ != BookLinkTyp.ANDOTHERS): break elif ( (blt00 is not None and blt00.person_template != FioTemplateType.UNDEFINED and blt.person_template != blt00.person_template) and blt.person_template == FioTemplateType.NAMESURNAME): if (blt.end_token.next0_ is None or not blt.end_token.next0_.is_comma_and): break if (BookLinkToken.try_parse_author( blt.end_token.next0_.next0_, FioTemplateType.UNDEFINED) is not None): pass else: break if (blt00 is None and blt.person_template == FioTemplateType.NAMESURNAME): tt = blt.end_token.next0_ if (tt is not None and tt.is_hiphen): tt = tt.next0_ if (isinstance(tt, NumberToken)): break BookLinkAnalyzer.__add_author(res, blt) coef += 1 t = blt.end_token if (isinstance(t.get_referent(), PersonReferent)): corr_authors.append( Utils.asObjectOrNull(t, ReferentToken)) blt00 = blt prev_pers_templ = blt.person_template start_of_name = blt.start_of_name if ((start_of_name) is not None): t = t.next0_ break continue if (blt.typ == BookLinkTyp.ANDOTHERS): coef += 0.5 t = blt.end_token.next0_ res.authors_and_other = True break break if (t is None): return None if ((t.is_newline_before and t != t0 and num is None) and res.find_slot(BookLinkReferent.ATTR_AUTHOR, None, True) is None): return None if (start_of_name is None): if (t.chars.is_all_lower): coef -= (1) if (t.chars.is_latin_letter and not is_electr_res and num is None): if (res.get_slot_value(BookLinkReferent.ATTR_AUTHOR) is None): return None tn0 = t tn1 = None uri = None next_num = None nn = 0 wrapnn376 = RefOutArgWrapper(0) inoutres377 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn376) nn = wrapnn376.value if (inoutres377): next_num = str((nn + 1)) br = (BracketHelper.try_parse( t, Utils.valToEnum( (BracketParseAttr.CANCONTAINSVERBS) | (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100) if BracketHelper.can_be_start_of_sequence(t, True, False) else None) if (br is not None): t = t.next0_ pages = None first_pass3028 = True while True: if first_pass3028: first_pass3028 = False else: t = t.next0_ if (not (t is not None)): break if (max_char > 0 and t.begin_char >= max_char): break if (br is not None and br.end_token == t): tn1 = t break tit = TitleItemToken.try_attach(t) if (tit is not None): if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t and br is None) and BracketHelper.can_be_start_of_sequence( tit.end_token.next0_, True, False)): br = BracketHelper.try_parse(tit.end_token.next0_, BracketParseAttr.NO, 100) if (br is not None): coef += (1) if (num is not None): coef += 1 tn0 = br.begin_token tn1 = br.end_token res.typ = tit.value.lower() t = br.end_token.next0_ break if (t.is_newline_before and t != tn0): if (br is not None and (t.end_char < br.end_char)): pass elif (not MiscHelper.can_be_start_of_sentence(t)): pass else: if (t.newlines_before_count > 1): break if ((isinstance(t, NumberToken)) and num is not None and t.int_value is not None): if (num == str((t.int_value - 1))): break elif (num is not None): pass else: nnn = NounPhraseHelper.try_parse( t.previous, Utils.valToEnum( ((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.PARSEADVERBS) | (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)) | (NounPhraseParseAttr.MULTILINES), NounPhraseParseAttr), 0, None) if (nnn is not None and nnn.end_char >= t.end_char): pass else: break if (t.is_char_of(".;") and t.whitespaces_after_count > 0): tit = TitleItemToken.try_attach(t.next0_) if ((tit) is not None): if (tit.typ == TitleItemToken.Types.TYP): break stop = True words = 0 notwords = 0 tt = t.next0_ first_pass3029 = True while True: if first_pass3029: first_pass3029 = False else: tt = tt.next0_ if (not (tt is not None)): break blt0 = BookLinkToken.try_parse(tt, 0) if (blt0 is None): if (tt.is_newline_before): break if ((isinstance(tt, TextToken)) and not tt. get_morph_class_in_dictionary().is_undefined): words += 1 else: notwords += 1 if (words > 6 and words > (notwords * 4)): stop = False break continue if ((blt0.typ == BookLinkTyp.DELIMETER or blt0.typ == BookLinkTyp.TRANSLATE or blt0.typ == BookLinkTyp.TYPE) or blt0.typ == BookLinkTyp.GEO or blt0.typ == BookLinkTyp.PRESS): stop = False break if (br is not None and br.end_token.previous.end_char > t.end_char): stop = False if (stop): break if (t == decree): t = t.next0_ break blt = BookLinkToken.try_parse(t, 0) if (blt is None): tn1 = t continue if (blt.typ == BookLinkTyp.DELIMETER): break if (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL) or blt.typ == BookLinkTyp.TYPE or blt.typ == BookLinkTyp.VOLUME) or blt.typ == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES): coef += 1 break if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS): if (t.previous.is_hiphen or t.previous.is_char_of(".;") or blt.add_coef > 0): break if (blt.typ == BookLinkTyp.YEAR): if (t.previous is not None and t.previous.is_comma): break if (blt.typ == BookLinkTyp.ELECTRONRES): is_electr_res = True break if (blt.typ == BookLinkTyp.URL): if (t == tn0 or t.previous.is_char_of(":.")): is_electr_res = True break tn1 = t if (tn1 is None and start_of_name is None): if (is_electr_res): uri_re = BookLinkReferent() rt0 = ReferentToken(uri_re, t00, t) rts0 = list() bref0 = BookLinkRefReferent._new372(uri_re) if (num is not None): bref0.number = num rt01 = ReferentToken(bref0, t0, rt0.end_token) ok = False while t is not None: if (t.is_newline_before): break blt0 = BookLinkToken.try_parse(t, 0) if (blt0 is not None): if (isinstance(blt0.ref, UriReferent)): uri_re.add_slot( BookLinkReferent.ATTR_URL, Utils.asObjectOrNull(blt0.ref, UriReferent), False, 0) ok = True t = blt0.end_token rt0.end_token = rt01.end_token = t t = t.next0_ if (ok): rts0.append(rt01) rts0.append(rt0) return rts0 if (decree is not None and num is not None): rts0 = list() bref0 = BookLinkRefReferent._new372(decree.get_referent()) if (num is not None): bref0.number = num rt01 = ReferentToken(bref0, t0, decree) t = decree.next0_ while t is not None: if (t.is_newline_before): break if (isinstance(t, TextToken)): if (t.is_pure_verb): return None rt01.end_token = t t = t.next0_ rts0.append(rt01) return rts0 if (book_prev is not None): tt = t while tt is not None and ((tt.is_char_of(",.") or tt.is_hiphen)): tt = tt.next0_ blt0 = BookLinkToken.try_parse(tt, 0) if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE): rts0 = list() bref0 = BookLinkRefReferent._new372(book_prev) if (num is not None): bref0.number = num bref0.pages = blt0.value rt00 = ReferentToken(bref0, t0, blt0.end_token) rts0.append(rt00) return rts0 return None if (br is not None and ((tn1 == br.end_token or tn1 == br.end_token.previous))): tn0 = tn0.next0_ tn1 = tn1.previous if (start_of_name is None): while tn0 is not None: if (tn0.is_char_of(":,~")): tn0 = tn0.next0_ else: break while tn1 is not None and tn1.begin_char > tn0.begin_char: if (tn1.is_char_of(".;,:(~") or tn1.is_hiphen or tn1.is_value("РЕД", None)): pass else: break tn1 = tn1.previous nam = MiscHelper.get_text_value( tn0, tn1, Utils.valToEnum( (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) if (start_of_name is not None): if (nam is None or (len(nam) < 3)): nam = start_of_name else: nam = "{0}{1}{2}".format( start_of_name, (" " if tn0.is_whitespace_before else ""), nam) if (nam is None): return None res.name = nam if (num is None and not is_in_lit): if (len(nam) < 20): return None coef -= (2) if (len(nam) > 500): coef -= (math.floor(len(nam) / 500)) if (is_bracket_regime): coef -= 1 if (len(nam) > 200): if (num is None): return None if (res.find_slot(BookLinkReferent.ATTR_AUTHOR, None, True) is None and not BookLinkToken.check_link_before(t0, num)): return None en = 0 ru = 0 ua = 0 cha = 0 nocha = 0 chalen = 0 lt0 = tn0 lt1 = tn1 if (tn1 is None): if (t is None): return None lt0 = t0 lt1 = t tn1 = t.previous tt = lt0 while tt is not None and tt.end_char <= lt1.end_char: if ((isinstance(tt, TextToken)) and tt.chars.is_letter): if (tt.chars.is_latin_letter): en += 1 elif (tt.morph.language.is_ua): ua += 1 elif (tt.morph.language.is_ru): ru += 1 if (tt.length_char > 2): cha += 1 chalen += tt.length_char elif (not (isinstance(tt, ReferentToken))): nocha += 1 tt = tt.next0_ if (ru > (ua + en)): res.lang = "RU" elif (ua > (ru + en)): res.lang = "UA" elif (en > (ru + ua)): res.lang = "EN" if (nocha > 3 and nocha > cha and start_of_name is None): if (nocha > (math.floor(chalen / 3))): coef -= (2) if (res.lang == "EN"): tt = tn0.next0_ first_pass3030 = True while True: if first_pass3030: first_pass3030 = False else: tt = tt.next0_ if (not (tt is not None and (tt.end_char < tn1.end_char))): break if (tt.is_comma and tt.next0_ is not None and ((not tt.next0_.chars.is_all_lower or (isinstance(tt.next0_, ReferentToken))))): if (tt.next0_.next0_ is not None and tt.next0_.next0_.is_comma_and): if (isinstance(tt.next0_, ReferentToken)): pass else: continue nam = MiscHelper.get_text_value( tn0, tt.previous, Utils.valToEnum((GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER), GetTextAttr)) if (nam is not None and len(nam) > 15): res.name = nam break rt = ReferentToken(res, t00, tn1) authors = True edits = False br = (None) first_pass3031 = True while True: if first_pass3031: first_pass3031 = False else: t = t.next0_ if (not (t is not None)): break if (max_char > 0 and t.begin_char >= max_char): break if (BracketHelper.can_be_start_of_sequence(t, False, False)): br = BracketHelper.try_parse(t, BracketParseAttr.CANBEMANYLINES, 100) if (br is not None and br.length_char > 300): br = (None) blt = BookLinkToken.try_parse(t, 0) if (t.is_newline_before and not t.is_char('/') and not t.previous.is_char('/')): if (blt is not None and blt.typ == BookLinkTyp.NUMBER): break if (t.previous.is_char_of(":")): pass elif (blt is not None and (( ((blt.typ == BookLinkTyp.DELIMETER or blt.typ == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES) or blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))): pass elif (num is not None and BookLinkToken.try_parse_author( t, FioTemplateType.UNDEFINED) is not None): pass elif (num is not None and blt is not None and blt.typ != BookLinkTyp.NUMBER): pass elif (br is not None and (t.end_char < br.end_char) and t.begin_char > br.begin_char): pass else: ok = False mmm = 50 tt = t.next0_ while tt is not None and mmm > 0: if (tt.is_newline_before): blt2 = BookLinkToken.try_parse(tt, 0) if (blt2 is not None and blt2.typ == BookLinkTyp.NUMBER and blt2.value == next_num): ok = True break if (blt2 is not None): if (blt2.typ == BookLinkTyp.PAGES or blt2.typ == BookLinkTyp.GEO or blt2.typ == BookLinkTyp.PRESS): ok = True break tt = tt.next0_ mmm -= 1 if (not ok): npt = NounPhraseHelper.try_parse( t.previous, Utils.valToEnum( ((NounPhraseParseAttr.MULTILINES) | (NounPhraseParseAttr.PARSEADVERBS) | (NounPhraseParseAttr.PARSEPREPOSITION)) | (NounPhraseParseAttr.PARSEVERBS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None) if (npt is not None and npt.end_char >= t.end_char): ok = True if (not ok): break rt.end_token = t if (blt is not None): rt.end_token = blt.end_token if (t.is_char_of(".,") or t.is_hiphen): continue if (t.is_value("С", None)): pass if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None and blt.typ == BookLinkTyp.EDITORS): edits = True t = blt.end_token coef += 1 continue if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None and blt.typ == BookLinkTyp.SOSTAVITEL): edits = False t = blt.end_token coef += 1 continue if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors): blt2 = BookLinkToken.try_parse_author(t, prev_pers_templ) if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON): prev_pers_templ = blt2.person_template if (not edits): BookLinkAnalyzer.__add_author(res, blt2) coef += 1 t = blt2.end_token continue if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS): if (not edits): res.authors_and_other = True coef += 1 t = blt2.end_token continue authors = False if (blt is None): continue if (blt.typ == BookLinkTyp.ELECTRONRES or blt.typ == BookLinkTyp.URL): is_electr_res = True if (blt.typ == BookLinkTyp.ELECTRONRES): coef += 1.5 else: coef += 0.5 if (isinstance(blt.ref, UriReferent)): res.add_slot(BookLinkReferent.ATTR_URL, Utils.asObjectOrNull(blt.ref, UriReferent), False, 0) elif (blt.typ == BookLinkTyp.YEAR): if (res.year == 0): res.year = int(blt.value) coef += 0.5 elif (blt.typ == BookLinkTyp.DELIMETER): coef += 1 if (blt.length_char == 2): regtyp = BookLinkAnalyzer.RegionTyp.SECOND else: regtyp = BookLinkAnalyzer.RegionTyp.FIRST elif ( (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE or blt.typ == BookLinkTyp.PAGES) or blt.typ == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE) or blt.typ == BookLinkTyp.PRESS or blt.typ == BookLinkTyp.VOLUME) or blt.typ == BookLinkTyp.N): coef += 1 elif (blt.typ == BookLinkTyp.PAGERANGE): pages = blt coef += 1 if (is_bracket_regime and blt.end_token.next0_ is not None and blt.end_token.next0_.is_char(')')): coef += (2) if (res.name is not None and res.find_slot(BookLinkReferent.ATTR_AUTHOR, None, True) is not None): coef = (10) elif (blt.typ == BookLinkTyp.GEO and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))): coef += 1 elif (blt.typ == BookLinkTyp.GEO and t.previous is not None and t.previous.is_char('.')): coef += 1 elif (blt.typ == BookLinkTyp.ANDOTHERS): coef += 1 if (authors): res.authors_and_other = True coef += blt.add_coef t = blt.end_token if ((coef < 2.5) and num is not None): if (BookLinkToken.check_link_before(t0, num)): coef += (2) elif (BookLinkToken.check_link_after(rt.end_token, num)): coef += (1) if (rt.length_char > 500): return None if (is_in_lit): coef += 1 if (coef < 2.5): if (is_electr_res and uri is not None): pass elif (coef >= 2 and is_in_lit): pass else: return None for rr in corr_authors: pits0 = PersonItemToken.try_attach_list( rr.begin_token, None, PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10) if (pits0 is None or (len(pits0) < 2)): continue if (pits0[0].typ == PersonItemToken.ItemType.VALUE): exi = False for i in range(len(rr.referent.slots) - 1, -1, -1): s = rr.referent.slots[i] if (s.type_name == PersonReferent.ATTR_LASTNAME): ln = Utils.asObjectOrNull(s.value, str) if (ln is None): continue if (ln == pits0[0].value): exi = True continue if (ln.find('-') > 0): ln = ln[0:0 + ln.find('-')] if (pits0[0].begin_token.is_value(ln, None)): del rr.referent.slots[i] if (not exi): rr.referent.add_slot(PersonReferent.ATTR_LASTNAME, pits0[0].value, False, 0) rts = list() bref = BookLinkRefReferent._new372(res) if (num is not None): bref.number = num rt1 = ReferentToken(bref, t0, rt.end_token) if (pages is not None): if (pages.value is not None): bref.pages = pages.value rt.end_token = pages.begin_token.previous rts.append(rt1) rts.append(rt) return rts
def create_nickname(pr : 'PersonReferent', t : 'Token') -> 'Token': has_keyw = False is_br = False first_pass3367 = True while True: if first_pass3367: first_pass3367 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_hiphen or t.is_comma or t.is_char_of(".:;")): continue if (t.morph.class0_.is_preposition): continue if (t.is_char('(')): is_br = True continue if ((t.is_value("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.is_value("КЛИЧКА", None) or t.is_value("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.is_value("ПСЕВДО", None) or t.is_value("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): has_keyw = True continue break if (not has_keyw or t is None): return None if (BracketHelper.is_bracket(t, True)): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = br.end_token tt = t.next0_ first_pass3368 = True while True: if first_pass3368: first_pass3368 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma_and): continue if (not BracketHelper.is_bracket(tt, True)): break br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is None): break ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) tt = br.end_token t = tt if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ return t else: ret = None first_pass3369 = True while True: if first_pass3369: first_pass3369 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_comma_and): continue if (ret is not None and t.chars.is_all_lower): break if (t.whitespaces_before_count > 2): break pli = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.NO, 10) if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): ni = MiscHelper.get_text_value(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO) if (ni is not None): pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0) t = pli[len(pli) - 1].end_token if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ ret = t continue if ((isinstance(t, ReferentToken)) and not t.chars.is_all_lower and t.begin_token == t.end_token): val = MiscHelper.get_text_value_of_meta_token(Utils.asObjectOrNull(t, ReferentToken), GetTextAttr.NO) pr.add_slot(PersonReferent.ATTR_NICKNAME, val, False, 0) if (is_br and t.next0_ is not None and t.next0_.is_char(')')): t = t.next0_ ret = t continue break return ret return None
def process(self, kit: 'AnalysisKit') -> None: ad = kit.get_analyzer_data(self) models = TerminCollection() objs_by_model = dict() obj_by_names = TerminCollection() t = kit.first_token first_pass3428 = True while True: if first_pass3428: first_pass3428 = False else: t = t.next0_ if (not (t is not None)): break its = WeaponItemToken.try_parse_list(t, 10) if (its is None): continue rts = self.__try_attach(its, False) if (rts is not None): for rt in rts: rt.referent = ad.register_referent(rt.referent) kit.embed_token(rt) t = (rt) for s in rt.referent.slots: if (s.type_name == WeaponReferent.ATTR_MODEL): mod = str(s.value) for k in range(2): if (not str.isdigit(mod[0])): li = [] wrapli2804 = RefOutArgWrapper(None) inoutres2805 = Utils.tryGetValue( objs_by_model, mod, wrapli2804) li = wrapli2804.value if (not inoutres2805): li = list() objs_by_model[mod] = li if (not rt.referent in li): li.append(rt.referent) models.add_string(mod, li, None, False) if (k > 0): break brand = rt.referent.get_string_value( WeaponReferent.ATTR_BRAND) if (brand is None): break mod = "{0} {1}".format(brand, mod) elif (s.type_name == WeaponReferent.ATTR_NAME): obj_by_names.add( Termin._new100(str(s.value), rt.referent)) if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): return t = kit.first_token first_pass3429 = True while True: if first_pass3429: first_pass3429 = False else: t = t.next0_ if (not (t is not None)): break br = BracketHelper.try_parse(t, BracketParseAttr.NO, 10) if (br is not None): toks = obj_by_names.try_parse(t.next0_, TerminParseAttr.NO) if (toks is not None and toks.end_token.next0_ == br.end_token): rt0 = ReferentToken( Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token) kit.embed_token(rt0) t = (rt0) continue if (not (isinstance(t, TextToken))): continue if (not t.chars.is_letter): continue tok = models.try_parse(t, TerminParseAttr.NO) if (tok is None): if (not t.chars.is_all_lower): tok = obj_by_names.try_parse(t, TerminParseAttr.NO) if (tok is None): continue if (not tok.is_whitespace_after): if (tok.end_token.next0_ is None or not tok.end_token.next0_.is_char_of(",.)")): if (not BracketHelper.is_bracket(tok.end_token.next0_, False)): continue tr = None li = Utils.asObjectOrNull(tok.termin.tag, list) if (li is not None and len(li) == 1): tr = li[0] else: tr = (Utils.asObjectOrNull(tok.termin.tag, Referent)) if (tr is not None): tit = WeaponItemToken.try_parse(tok.begin_token.previous, None, False, True) if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND): tr.add_slot(WeaponReferent.ATTR_BRAND, tit.value, False, 0) tok.begin_token = tit.begin_token rt0 = ReferentToken(tr, tok.begin_token, tok.end_token) kit.embed_token(rt0) t = (rt0) continue
def parse(t : 'Token', max_char : int=0, prev : 'InstrToken'=None) -> 'InstrToken': from pullenti.ner.instrument.internal.InstrToken1 import InstrToken1 is_start_of_line = False t00 = t if (t is not None): is_start_of_line = t00.is_newline_before while t is not None: if (t.is_table_control_char and not t.is_char(chr(0x1F))): if (t.is_newline_after and not is_start_of_line): is_start_of_line = True t = t.next0_ else: break if (t is None): return None if (t.is_newline_before): is_start_of_line = True if (is_start_of_line): if ((t.is_value("СОДЕРЖИМОЕ", "ВМІСТ") or t.is_value("СОДЕРЖАНИЕ", "ЗМІСТ") or t.is_value("ОГЛАВЛЕНИЕ", "ЗМІСТ")) or ((t.is_value("СПИСОК", None) and t.next0_ is not None and t.next0_.is_value("РАЗДЕЛ", None)))): cont = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False) if (cont is not None and cont.typ == InstrToken1.Types.INDEX): return InstrToken(t, cont.end_token) t0 = t t1 = None has_word = False first_pass3255 = True while True: if first_pass3255: first_pass3255 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_newline_before and t != t0): break if (max_char > 0 and t.begin_char > max_char): break if (is_start_of_line and t == t0): if (t.is_value("ГЛАВА", None)): next0__ = InstrToken.parse(t.next0_, 0, None) if (next0__ is not None and next0__.typ == ILTypes.PERSON): next0__.begin_token = t return next0__ tt = None if ((isinstance(t.get_referent(), PersonReferent)) or (isinstance(t.get_referent(), PersonPropertyReferent)) or (isinstance(t.get_referent(), InstrumentParticipantReferent))): return InstrToken.__correct_person(InstrToken._new1511(t00, t, ILTypes.PERSON, t)) is_ref = False if (isinstance(t.get_referent(), PersonPropertyReferent)): tt = t.next0_ is_ref = True elif (prev is not None and prev.typ == ILTypes.PERSON): rt = t.kit.process_referent(PersonAnalyzer.ANALYZER_NAME, t) if (rt is not None): if (isinstance(rt.referent, PersonReferent)): return InstrToken._new1512(t00, rt.end_token, ILTypes.PERSON) tt = rt.end_token.next0_ cou = 0 t11 = (None if tt is None else tt.previous) first_pass3256 = True while True: if first_pass3256: first_pass3256 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_table_control_char): continue re = tt.get_referent() if (isinstance(re, PersonReferent)): return InstrToken._new1511(t00, tt, ILTypes.PERSON, tt) if (isinstance(re, GeoReferent)): t11 = tt continue if (re is not None): break if (DecreeToken.is_keyword(tt, False) is not None): break if (tt.is_newline_before): cou += 1 if (cou > 4): break if (tt is None and is_ref): return InstrToken._new1511(t00, Utils.ifNotNull(t11, t), ILTypes.PERSON, t) dt = DecreeToken.try_attach(t, None, False) if (dt is not None): if (dt.typ == DecreeToken.ItemType.TYP and not t.chars.is_all_lower): if (t != t0): break has_verb_ = False tt = dt.end_token while tt is not None: if (tt.is_newline_before): break elif ((isinstance(tt, TextToken)) and tt.is_pure_verb): has_verb_ = True break tt = tt.next0_ if (not has_verb_): res2 = InstrToken._new1515(t0, dt.end_token, ILTypes.TYP, Utils.ifNotNull(dt.full_value, dt.value)) if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ" or res2.value == "ДОДАТКОВА УГОДА"): if (res2.begin_char > 500 and res2.newlines_before_count > 1): res2.typ = ILTypes.APPENDIX return res2 if (dt.typ == DecreeToken.ItemType.NUMBER): if (t != t0): break return InstrToken._new1515(t0, dt.end_token, ILTypes.REGNUMBER, dt.value) if (dt.typ == DecreeToken.ItemType.ORG): if (t != t0): break return InstrToken._new1517(t0, dt.end_token, ILTypes.ORGANIZATION, dt.ref, dt.value) if (dt.typ == DecreeToken.ItemType.TERR): if (t != t0): break re = InstrToken._new1517(t0, dt.end_token, ILTypes.GEO, dt.ref, dt.value) t1 = re.end_token.next0_ if (t1 is not None and t1.is_char(',')): t1 = t1.next0_ if (t1 is not None and t1.is_value("КРЕМЛЬ", None)): re.end_token = t1 elif ((t1 is not None and t1.is_value("ДОМ", "БУДИНОК") and t1.next0_ is not None) and t1.next0_.is_value("СОВЕТ", "РАД")): re.end_token = t1.next0_ if (t1.next0_.next0_ is not None and (isinstance(t1.next0_.next0_.get_referent(), GeoReferent))): re.end_token = t1.next0_.next0_ return re if (dt.typ == DecreeToken.ItemType.OWNER): if (t != t0): break if (dt.ref is not None and str(dt.ref.referent).startswith("агент")): dt = (None) if (dt is not None): res1 = InstrToken._new1517(t0, dt.end_token, ILTypes.PERSON, dt.ref, dt.value) return InstrToken.__correct_person(res1) if (BracketHelper.can_be_start_of_sequence(t, False, False)): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token t = t1 continue if (t.next0_ is not None and BracketHelper.can_be_end_of_sequence(t.next0_, False, None, False)): t1 = t.next0_ t = t1 continue if (isinstance(t, TextToken)): if (t.is_char('_')): t1 = t continue r = t.get_referent() if (isinstance(r, DateReferent)): tt = t if (tt.next0_ is not None and tt.next0_.is_char_of(",;")): tt = tt.next0_ if (not t.is_newline_before and not tt.is_newline_after): t1 = tt continue if (not has_word): return InstrToken._new1511(t, tt, ILTypes.DATE, t) if (t != t0): break has_word = True if (isinstance(r, InstrumentParticipantReferent)): tt = t.begin_token first_pass3257 = True while True: if first_pass3257: first_pass3257 = False else: tt = tt.next0_ if (not (tt is not None and (tt.end_char < t.end_char))): break rr = tt.get_referent() if (rr is None): continue if ((isinstance(rr, OrganizationReferent)) or (isinstance(rr, BankDataReferent)) or (isinstance(rr, UriReferent))): r = (None) break if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, InstrumentParticipantReferent))): if (t != t0): break if (isinstance(r, InstrumentParticipantReferent)): pass res1 = InstrToken._new1511(t, t, ILTypes.PERSON, t) return InstrToken.__correct_person(res1) if (isinstance(r, OrganizationReferent)): if (t != t0): break return InstrToken._new1511(t, t, ILTypes.ORGANIZATION, t) if (isinstance(r, DecreePartReferent)): dpr = Utils.asObjectOrNull(r, DecreePartReferent) if (dpr.appendix is not None): if (t.is_newline_before or is_start_of_line): if (t.is_newline_after or t.whitespaces_before_count > 30): return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ") ok = True tt = t.next0_ first_pass3258 = True while True: if first_pass3258: first_pass3258 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt is not None): tt = npt.end_token continue ok = False break if (ok): return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ") if ((isinstance(r, DecreeReferent)) and r.kind == DecreeKind.PUBLISHER and t == t0): res1 = InstrToken._new1512(t, t, ILTypes.APPROVED) tt = t.next0_ first_pass3259 = True while True: if first_pass3259: first_pass3259 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_char_of(",;")): continue if ((isinstance(tt.get_referent(), DecreeReferent)) and tt.get_referent().kind == DecreeKind.PUBLISHER): res1.end_token = t else: break return res1 if (t.is_value("ЗА", None) and t.next0_ is not None and t.is_newline_before): rr = t.next0_.get_referent() if ((isinstance(rr, PersonReferent)) or (isinstance(rr, PersonPropertyReferent)) or (isinstance(rr, InstrumentParticipantReferent))): if (t != t0): break res1 = InstrToken._new1511(t, t.next0_, ILTypes.PERSON, t.next0_) t = t.next0_.next0_ if ((isinstance(rr, InstrumentParticipantReferent)) and t is not None): r = t.get_referent() if ((r) is not None): if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))): res1.end_token = t res1.ref = (t) return res1 ii = 0 while ii < len(InstrToken._m_directives): if (t.is_value(InstrToken._m_directives[ii], None)): if (t.next0_ is not None and t.next0_.is_value("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")): if (t != t0): break t11 = t.next0_ ok = False if (t11.next0_ is not None and t11.next0_.is_char_of(":.") and t11.next0_.is_newline_after): ok = True t11 = t11.next0_ if (ok): return InstrToken._new1515(t, t11, ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii]) if (t.is_newline_after or ((t.next0_ is not None and t.next0_.is_char(':') and t.next0_.is_newline_after))): if (t != t0): break if (not t.is_newline_before): if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ" and InstrToken._m_directives_norm[ii] != "ПОСТАНОВЛЕНИЕ" and InstrToken._m_directives_norm[ii] != "НАКАЗ") and InstrToken._m_directives_norm[ii] != "ПОСТАНОВУ"): break return InstrToken._new1515(t, (t if t.is_newline_after else t.next0_), ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii]) break ii += 1 if (t.is_newline_before and t.chars.is_letter and t.length_char == 1): for d in InstrToken._m_directives: t11 = MiscHelper.try_attach_word_by_letters(d, t, True) if (t11 is not None): if (t11.next0_ is not None and t11.next0_.is_char(':')): t11 = t11.next0_ return InstrToken._new1512(t, t11, ILTypes.DIRECTIVE) tte = (t.begin_token if isinstance(t, MetaToken) else t) term = (tte.term if isinstance(tte, TextToken) else None) if (is_start_of_line and not tte.chars.is_all_lower and t == t0): npt = NounPhraseHelper.try_parse(tte, NounPhraseParseAttr.NO, 0, None) if (npt is not None and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))): # if (tte.Next != null && tte.Next.IsChar(':')) npt = (None) if (npt is not None and npt.morph.case_.is_nominative and (isinstance(npt.end_token, TextToken))): term1 = npt.end_token.term if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1 == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ") or term1 == "ФОРМА" or term == "ЗАЯВКА"): tt1 = npt.end_token.next0_ dt1 = DecreeToken.try_attach(tt1, None, False) if (dt1 is not None and dt1.typ == DecreeToken.ItemType.NUMBER): tt1 = dt1.end_token.next0_ elif (isinstance(tt1, NumberToken)): tt1 = tt1.next0_ elif ((isinstance(tt1, TextToken)) and tt1.length_char == 1 and tt1.chars.is_letter): tt1 = tt1.next0_ ok = True if (tt1 is None): ok = False elif (tt1.is_value("В", "У")): ok = False elif (tt1.is_value("К", None) and tt1.is_newline_before): return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1) elif (not tt1.is_newline_before and InstrToken._check_entered(tt1) is not None): ok = False elif (tt1 == t.next0_ and ((tt1.is_char(':') or ((tt1.is_value("НА", None) and term1 != "ЗАЯВКА"))))): ok = False if (ok): br = BracketHelper.try_parse(tt1, BracketParseAttr.NO, 100) if (br is not None): tt1 = br.end_token.next0_ if (br.end_token.next0_ is None or not br.end_token.is_newline_after or br.end_token.next0_.is_char_of(";,")): ok = False if (tt1 is not None and tt1.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): ok = False if (prev is not None and prev.typ == ILTypes.APPENDIX): ok = False if (ok): cou = 0 ttt = tte.previous while ttt is not None and (cou < 300): if (ttt.is_table_control_char): if (not ttt.is_char(chr(0x1F))): if (ttt == tte.previous and ttt.is_char(chr(0x1E))): pass else: ok = False break ttt = ttt.previous; cou += 1 if (ok): it1 = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False) if (it1 is not None): if (it1.has_verb): ok = False if (ok and t.previous is not None): ttp = t.previous first_pass3260 = True while True: if first_pass3260: first_pass3260 = False else: ttp = ttp.previous if (not (ttp is not None)): break if (ttp.is_table_control_char and not ttp.is_char(chr(0x1F))): continue if (BracketHelper.is_bracket(ttp, False) and not BracketHelper.can_be_end_of_sequence(ttp, False, None, False)): continue if (ttp.is_char_of(";:")): ok = False break if ((ok and t.previous is not None and (t.newlines_before_count < 3)) and not t.is_newline_after): lines = 0 ttp = t.previous first_pass3261 = True while True: if first_pass3261: first_pass3261 = False else: ttp = ttp.previous if (not (ttp is not None)): break if (not ttp.is_newline_before): continue while ttp is not None and (ttp.end_char < t.begin_char): if (isinstance(ttp, NumberToken)): pass elif ((isinstance(ttp, TextToken)) and ttp.length_char > 1): if (ttp.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): ok = False break else: break ttp = ttp.next0_ lines += 1 if (lines > 1): break if (ok and ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК" and term1 != "МНЕНИЕ"))): if (t.newlines_before_count < 3): ok = False if (ok): return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1) app = False if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_ is not None and t.next0_.is_value("МНЕНИЕ", "ДУМКА")) and t == t0 and is_start_of_line): app = True if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА")) and t.next0_ is not None and t.next0_.is_value("СОГЛАШЕНИЕ", "УГОДА")) and t == t0 and is_start_of_line): app = True if (app): tt = t.next0_ while tt is not None: if (tt.is_newline_before): break elif (tt.get_morph_class_in_dictionary() == MorphClass.VERB): app = False break tt = tt.next0_ if (app): return InstrToken._new1512(t, t.next0_, ILTypes.APPENDIX) if (not t.chars.is_all_lower and t == t0): tt = InstrToken._check_approved(t) if (tt is not None): if (tt.next0_ is not None and (isinstance(tt.next0_.get_referent(), DecreeReferent))): return InstrToken._new1511(t, tt, ILTypes.APPROVED, tt.next0_.get_referent()) dt1 = DecreeToken.try_attach(tt.next0_, None, False) if (dt1 is not None and dt1.typ == DecreeToken.ItemType.TYP): return InstrToken._new1512(t, tt, ILTypes.APPROVED) t1 = t is_start_of_line = False if (t1 is None): return None res = InstrToken._new1512(t00, t1, ILTypes.UNDEFINED) res.no_words = True t = t0 first_pass3262 = True while True: if first_pass3262: first_pass3262 = False else: t = t.next0_ if (not (t is not None and t.end_char <= t1.end_char)): break if (not (isinstance(t, TextToken))): if (isinstance(t, ReferentToken)): res.no_words = False continue if (not t.chars.is_letter): continue res.no_words = False if (t.is_pure_verb): res.has_verb = True if (t0.is_value("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None and t0.next0_.is_char_of(":.")): res.typ = ILTypes.QUESTION return res
def __correct_person(res : 'InstrToken') -> 'InstrToken': spec_chars = 0 if (not res.is_pure_person): res.typ = ILTypes.UNDEFINED return res t = res.end_token.next0_ first_pass3254 = True while True: if first_pass3254: first_pass3254 = False else: t = t.next0_ if (not (t is not None)): break if ((isinstance(t, ReferentToken)) and (isinstance(res.ref, ReferentToken))): ok = False if (t.get_referent() == res.ref.referent): ok = True ip = Utils.asObjectOrNull(res.ref.referent, InstrumentParticipantReferent) if (ip is not None and ip._contains_ref(t.get_referent())): ok = True if (not ok and t.previous is not None and t.previous.is_table_control_char): if ((isinstance(res.ref.referent, PersonPropertyReferent)) and (isinstance(t.get_referent(), PersonReferent))): ok = True res.ref = (t) if (ok): res.end_token = t continue tok = InstrToken.__m_ontology.try_parse(t, TerminParseAttr.NO) if (tok is not None): if ((((tok.termin.canonic_text == "ПОДПИСЬ" or tok.termin.canonic_text == "ПІДПИС")) and t.is_newline_before and t.next0_ is not None) and t.next0_.is_value("СТОРОНА", None)): break t = tok.end_token res.end_token = t continue if (t.is_char(',')): continue if (t.is_table_control_char and not t.is_newline_before): continue if (t.is_char_of("_/\\")): res.end_token = t spec_chars += 1 continue if (t.is_char('(') and t.next0_ is not None): tok = InstrToken.__m_ontology.try_parse(t.next0_, TerminParseAttr.NO) if ((tok) is not None): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token res.end_token = t continue break rt0 = Utils.asObjectOrNull(res.ref, ReferentToken) if (rt0 is not None and (isinstance(rt0.referent, InstrumentParticipantReferent))): tt = res.begin_token while tt is not None and tt.end_char <= res.end_char: if ((isinstance(tt.get_referent(), PersonReferent)) or (isinstance(tt.get_referent(), PersonPropertyReferent))): res.ref = (tt) return res elif ((isinstance(tt, TextToken)) and tt.is_char_of("_/\\")): spec_chars += 1 elif (isinstance(tt, MetaToken)): ttt = tt.begin_token while ttt is not None and ttt.end_char <= tt.end_char: if ((isinstance(ttt.get_referent(), PersonReferent)) or (isinstance(ttt.get_referent(), PersonPropertyReferent))): res.ref = (ttt) return res elif ((isinstance(ttt, TextToken)) and ttt.is_char_of("_/\\")): spec_chars += 1 ttt = ttt.next0_ tt = tt.next0_ if (spec_chars < 10): res.typ = ILTypes.UNDEFINED return res
def try_parse(t : 'Token', loc_onto : 'IntOntologyCollection') -> 'NamedItemToken': if (t is None): return None if (isinstance(t, ReferentToken)): r = t.get_referent() if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or (isinstance(r, GeoReferent))) or r.type_name == "ORGANIZATION"): return NamedItemToken._new1758(t, t, r, t.morph) return None typ = NamedItemToken.__m_types.try_parse(t, TerminParseAttr.NO) nam = NamedItemToken.__m_names.try_parse(t, TerminParseAttr.NO) if (typ is not None): if (not (isinstance(t, TextToken))): return None res = NamedItemToken._new1759(typ.begin_token, typ.end_token, typ.morph, typ.chars) res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind)) res.type_value = typ.termin.canonic_text if ((nam is not None and nam.end_token == typ.end_token and not t.chars.is_all_lower) and (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) == res.kind): res.name_value = nam.termin.canonic_text res.is_wellknown = True return res if (nam is not None): if (nam.begin_token.chars.is_all_lower): return None res = NamedItemToken._new1759(nam.begin_token, nam.end_token, nam.morph, nam.chars) res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) res.name_value = nam.termin.canonic_text ok = True if (not t.is_whitespace_before and t.previous is not None): ok = False elif (not t.is_whitespace_after and t.next0_ is not None): if (t.next0_.is_char_of(",.;!?") and t.next0_.is_whitespace_after): pass else: ok = False if (ok): res.is_wellknown = True res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str)) return res adj = MiscLocationHelper.try_attach_nord_west(t) if (adj is not None): if (adj.morph.class0_.is_noun): if (adj.end_token.is_value("ВОСТОК", None)): if (adj.begin_token == adj.end_token): return None re = NamedItemToken._new1761(t, adj.end_token, adj.morph) re.kind = NamedEntityKind.LOCATION re.name_value = MiscHelper.get_text_value(t, adj.end_token, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) re.is_wellknown = True return re return None if (adj.whitespaces_after_count > 2): return None if ((isinstance(adj.end_token.next0_, ReferentToken)) and (isinstance(adj.end_token.next0_.get_referent(), GeoReferent))): re = NamedItemToken._new1761(t, adj.end_token.next0_, adj.end_token.next0_.morph) re.kind = NamedEntityKind.LOCATION re.name_value = MiscHelper.get_text_value(t, adj.end_token.next0_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) re.is_wellknown = True re.ref = adj.end_token.next0_.get_referent() return re res = NamedItemToken.try_parse(adj.end_token.next0_, loc_onto) if (res is not None and res.kind == NamedEntityKind.LOCATION): s = adj.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, res.morph.gender, False) if (s is not None): if (res.name_value is None): res.name_value = s.upper() else: res.name_value = "{0} {1}".format(s.upper(), res.name_value) res.type_value = (None) res.begin_token = t res.chars = t.chars res.is_wellknown = True return res if (t.chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence(t)): npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None and len(npt.adjectives) > 0): test = NamedItemToken.try_parse(npt.noun.begin_token, loc_onto) if (test is not None and test.end_token == npt.end_token and test.type_value is not None): test.begin_token = t tmp = io.StringIO() for a in npt.adjectives: s = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, test.morph.gender, False) if (tmp.tell() > 0): print(' ', end="", file=tmp) print(s, end="", file=tmp) test.name_value = Utils.toStringStringIO(tmp) test.chars = t.chars if (test.kind == NamedEntityKind.LOCATION): test.is_wellknown = True return test if ((BracketHelper.is_bracket(t, True) and t.next0_ is not None and t.next0_.chars.is_letter) and not t.next0_.chars.is_all_lower): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): res = NamedItemToken(t, br.end_token) res.is_in_bracket = True res.name_value = MiscHelper.get_text_value(t, br.end_token, GetTextAttr.NO) nam = NamedItemToken.__m_names.try_parse(t.next0_, TerminParseAttr.NO) if (nam is not None and nam.end_token == br.end_token.previous): res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) res.is_wellknown = True res.name_value = nam.termin.canonic_text return res if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): res = NamedItemToken._new1761(t, t, t.morph) str0_ = t.term if (str0_.endswith("О") or str0_.endswith("И") or str0_.endswith("Ы")): res.name_value = str0_ else: res.name_value = t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) res.chars = t.chars if (((not t.is_whitespace_after and t.next0_ is not None and t.next0_.is_hiphen) and (isinstance(t.next0_.next0_, TextToken)) and not t.next0_.next0_.is_whitespace_after) and t.chars.is_cyrillic_letter == t.next0_.next0_.chars.is_cyrillic_letter): res.end_token = t.next0_.next0_ t = res.end_token res.name_value = "{0}-{1}".format(res.name_value, t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)) return res return None
def __try_parse_thesis(t: 'Token') -> 'ReferentToken': if (t is None): return None t0 = t tt = t mc = tt.get_morph_class_in_dictionary() preamb = None if (mc.is_conjunction): return None if (t.is_value("LET", None)): return None if (mc.is_preposition or mc.is_misc or mc.is_adverb): if (not MiscHelper.is_eng_article(tt)): tt = tt.next0_ first_pass3131 = True while True: if first_pass3131: first_pass3131 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_comma): break if (tt.is_char('(')): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token continue if (MiscHelper.can_be_start_of_sentence(tt)): break npt0 = NounPhraseHelper.try_parse( tt, Utils.valToEnum( (NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.REFERENTCANBENOUN), NounPhraseParseAttr), 0, None) if (npt0 is not None): tt = npt0.end_token continue if (tt.get_morph_class_in_dictionary().is_verb): break if (tt is None or not tt.is_comma or tt.next0_ is None): return None preamb = MetaToken(t0, tt.previous) tt = tt.next0_ t1 = tt mc = tt.get_morph_class_in_dictionary() npt = NounPhraseHelper.try_parse( tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.REFERENTCANBENOUN) | (NounPhraseParseAttr.PARSEADVERBS), NounPhraseParseAttr), 0, None) if (npt is None and (isinstance(tt, TextToken))): if (tt.chars.is_all_upper): npt = NounPhraseToken(tt, tt) elif (not tt.chars.is_all_lower): if (mc.is_proper or preamb is not None): npt = NounPhraseToken(tt, tt) if (npt is None): return None if (mc.is_personal_pronoun): return None t2 = npt.end_token.next0_ if (t2 is None or MiscHelper.can_be_start_of_sentence(t2) or not (isinstance(t2, TextToken))): return None if (not t2.get_morph_class_in_dictionary().is_verb): return None t3 = t2 tt = t2.next0_ while tt is not None: if (not tt.get_morph_class_in_dictionary().is_verb): break tt = tt.next0_ first_pass3132 = True while True: if first_pass3132: first_pass3132 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.next0_ is None): t3 = tt break if (tt.is_char_of(".;!?")): if (MiscHelper.can_be_start_of_sentence(tt.next0_)): t3 = tt break if (not (isinstance(tt, TextToken))): continue if (BracketHelper.can_be_start_of_sequence(tt, False, False)): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token continue tt = t3 if (t3.is_char_of(";.!?")): tt = tt.previous txt = MiscHelper.get_text_value( t2, tt, Utils.valToEnum( (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES), GetTextAttr)) if (txt is None or (len(txt) < 15)): return None if (t0 != t1): tt = t1.previous if (tt.is_comma): tt = tt.previous txt0 = MiscHelper.get_text_value( t0, tt, Utils.valToEnum( (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES), GetTextAttr)) if (txt0 is not None and len(txt0) > 10): if (t0.chars.is_capital_upper): txt0 = ((str.lower(txt0[0])) + txt0[1:]) txt = "{0}, {1}".format(txt, txt0) tt = t1 if (MiscHelper.is_eng_article(tt)): tt = tt.next0_ nam = MiscHelper.get_text_value(tt, t2.previous, GetTextAttr.KEEPQUOTES) if (nam.startswith("SO-CALLED")): nam = nam[9:].strip() dr = DefinitionReferent() dr.kind = DefinitionKind.ASSERTATION dr.add_slot(DefinitionReferent.ATTR_TERMIN, nam, False, 0) dr.add_slot(DefinitionReferent.ATTR_VALUE, txt, False, 0) return ReferentToken(dr, t0, t3)
def parse(t : 'Token', max_char : int, prev : 'LineToken') -> 'LineToken': from pullenti.morph.LanguageHelper import LanguageHelper from pullenti.ner.NumberToken import NumberToken from pullenti.ner.TextToken import TextToken from pullenti.ner.core.BracketHelper import BracketHelper from pullenti.ner.core.BracketParseAttr import BracketParseAttr from pullenti.ner.decree.DecreeReferent import DecreeReferent if (t is None or t.end_char > max_char): return None res = ListHelper.LineToken(t, t) first_pass3272 = True while True: if first_pass3272: first_pass3272 = False else: t = t.next0_ if (not (t is not None and t.end_char <= max_char)): break if (t.is_char(':')): if (res.is_newline_before and res.begin_token.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): res.is_list_head = True res.end_token = t break if (t.is_char(';')): if (not t.is_whitespace_after): pass if (t.previous is not None and (isinstance(t.previous.get_referent(), DecreeReferent))): if (not t.is_whitespace_after): continue if (t.next0_ is not None and (isinstance(t.next0_.get_referent(), DecreeReferent))): continue res.is_list_item = True res.end_token = t break if (t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token res.end_token = t continue if (t.is_newline_before and t != res.begin_token): next0__ = True if (t.previous.is_comma or t.previous.is_and or t.is_char_of("(")): next0__ = False elif (t.chars.is_letter or (isinstance(t, NumberToken))): if (t.chars.is_all_lower): next0__ = False elif (t.previous.chars.is_letter): next0__ = False if (next0__): break res.end_token = t if (res.begin_token.is_hiphen): res.is_list_item = (res.begin_token.next0_ is not None and not res.begin_token.next0_.is_hiphen) elif (res.begin_token.is_char_of("·")): res.is_list_item = True res.begin_token = res.begin_token.next0_ elif (res.begin_token.next0_ is not None and ((res.begin_token.next0_.is_char(')') or ((prev is not None and ((prev.is_list_item or prev.is_list_head))))))): if (res.begin_token.length_char == 1 or (isinstance(res.begin_token, NumberToken))): res.is_list_item = True if ((isinstance(res.begin_token, NumberToken)) and res.begin_token.int_value is not None): res.number = res.begin_token.int_value elif ((isinstance(res.begin_token, TextToken)) and res.begin_token.length_char == 1): te = res.begin_token.term if (LanguageHelper.is_cyrillic_char(te[0])): res.number = ((ord(te[0])) - (ord('А'))) elif (LanguageHelper.is_latin_char(te[0])): res.number = ((ord(te[0])) - (ord('A'))) return res
def __calc_rank_and_value(self, min_newlines_count: int) -> bool: self.rank = 0 if (self.begin_token.chars.is_all_lower): self.rank -= 30 words = 0 up_words = 0 notwords = 0 line_number = 0 tstart = self.begin_token tend = self.end_token t = self.begin_token first_pass3396 = True while True: if first_pass3396: first_pass3396 = False else: t = t.next0_ if (not (t != self.end_token.next0_ and t is not None and t.end_char <= self.end_token.end_char)): break if (t.is_newline_before): pass tit = TitleItemToken.try_attach(t) if (tit is not None): if (tit.typ == TitleItemToken.Types.THEME or tit.typ == TitleItemToken.Types.TYPANDTHEME): if (t != self.begin_token): if (line_number > 0): return False notwords = 0 up_words = notwords words = up_words tstart = tit.end_token.next0_ t = tit.end_token if (t.next0_ is None): return False if (t.next0_.chars.is_letter and t.next0_.chars.is_all_lower): self.rank += 20 else: self.rank += 100 tstart = t.next0_ if (tit.typ == TitleItemToken.Types.TYPANDTHEME): self.type_value = tit.value continue if (tit.typ == TitleItemToken.Types.TYP): if (t == self.begin_token): if (tit.end_token.is_newline_after): self.type_value = tit.value self.rank += 5 tstart = tit.end_token.next0_ t = tit.end_token words += 1 if (tit.begin_token != tit.end_token): words += 1 if (tit.chars.is_all_upper): up_words += 1 continue if (tit.typ == TitleItemToken.Types.DUST or tit.typ == TitleItemToken.Types.SPECIALITY): if (t == self.begin_token): return False self.rank -= 20 if (tit.typ == TitleItemToken.Types.SPECIALITY): self.speciality = tit.value t = tit.end_token continue if (tit.typ == TitleItemToken.Types.CONSULTANT or tit.typ == TitleItemToken.Types.BOSS or tit.typ == TitleItemToken.Types.EDITOR): t = tit.end_token if (t.next0_ is not None and ((t.next0_.is_char_of(":") or t.next0_.is_hiphen or t.whitespaces_after_count > 4))): self.rank -= 10 else: self.rank -= 2 continue return False blt = BookLinkToken.try_parse(t, 0) if (blt is not None): if (blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.N or blt.typ == BookLinkTyp.PAGES): self.rank -= 10 elif (blt.typ == BookLinkTyp.N or blt.typ == BookLinkTyp.PAGERANGE): self.rank -= 20 if (t == self.begin_token and BookLinkToken.try_parse_author( t, FioTemplateType.UNDEFINED) is not None): self.rank -= 20 if (t.is_newline_before and t != self.begin_token): line_number += 1 if (line_number > 4): return False if (t.chars.is_all_lower): self.rank += 10 elif (t.previous.is_char('.')): self.rank -= 10 elif (t.previous.is_char_of(",-")): self.rank += 10 else: npt = NounPhraseHelper.try_parse(t.previous, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.end_char >= t.end_char): self.rank += 10 if (t != self.begin_token and t.newlines_before_count > min_newlines_count): self.rank -= (t.newlines_before_count - min_newlines_count) bst = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (bst is not None and bst.is_quote_type and bst.end_token.end_char <= self.end_token.end_char): if (words == 0): tstart = bst.begin_token self.rank += 10 if (bst.end_token == self.end_token): tend = self.end_token self.rank += 10 rli = t.get_referents() if (rli is not None): for r in rli: if (isinstance(r, OrganizationReferent)): if (t.is_newline_before): self.rank -= 10 else: self.rank -= 4 continue if ((isinstance(r, GeoReferent)) or (isinstance(r, PersonReferent))): if (t.is_newline_before): self.rank -= 5 if (t.is_newline_after or t.next0_ is None): self.rank -= 20 elif (t.next0_.is_hiphen or (isinstance(t.next0_, NumberToken)) or (isinstance(t.next0_.get_referent(), DateReferent))): self.rank -= 20 elif (t != self.begin_token): self.rank -= 20 continue if ((isinstance(r, GeoReferent)) or (isinstance(r, DenominationReferent))): continue if ((isinstance(r, UriReferent)) or (isinstance(r, PhoneReferent))): return False if (t.is_newline_before): self.rank -= 4 else: self.rank -= 2 if (t == self.begin_token and (isinstance( self.end_token.get_referent(), PersonReferent))): self.rank -= 10 words += 1 if (t.chars.is_all_upper): up_words += 1 if (t == self.begin_token): if (t.is_newline_after): self.rank -= 10 elif (t.next0_ is not None and t.next0_.is_char('.') and t.next0_.is_newline_after): self.rank -= 10 continue if (isinstance(t, NumberToken)): if (t.typ == NumberSpellingType.WORDS): words += 1 if (t.chars.is_all_upper): up_words += 1 else: notwords += 1 continue pat = PersonAttrToken.try_attach( t, None, PersonAttrToken.PersonAttrAttachAttrs.NO) if (pat is not None): if (t.is_newline_before): if (not pat.morph.case_.is_undefined and not pat.morph.case_.is_nominative): pass elif (pat.chars.is_all_upper): pass else: self.rank -= 20 elif (t.chars.is_all_lower): self.rank -= 1 while t is not None: words += 1 if (t.chars.is_all_upper): up_words += 1 if (t == pat.end_token): break t = t.next0_ continue oitt = OrgItemTypeToken.try_attach(t, True, None) if (oitt is not None): if (oitt.morph.number != MorphNumber.PLURAL and not oitt.is_doubt_root_word): if (not oitt.morph.case_.is_undefined and not oitt.morph.case_.is_nominative): words += 1 if (t.chars.is_all_upper): up_words += 1 else: self.rank -= 4 if (t == self.begin_token): self.rank -= 5 else: words += 1 if (t.chars.is_all_upper): up_words += 1 t = oitt.end_token continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): if (tt.is_char('©')): self.rank -= 10 if (tt.is_char('_')): self.rank -= 1 if (tt.chars.is_letter): if (tt.length_char > 2): words += 1 if (t.chars.is_all_upper): up_words += 1 elif (not tt.is_char(',')): notwords += 1 if (tt.is_pure_verb): self.rank -= 30 words -= 1 break if (tt == self.end_token): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): self.rank -= 10 elif (tt.is_char('.')): self.rank += 5 elif (tt.is_char_of("._")): self.rank -= 5 self.rank += words self.rank -= notwords if ((words < 1) and (self.rank < 50)): return False if (tstart is None or tend is None): return False if (tstart.end_char > tend.end_char): return False tit1 = TitleItemToken.try_attach(self.end_token.next0_) if (tit1 is not None and ((tit1.typ == TitleItemToken.Types.TYP or tit1.typ == TitleItemToken.Types.SPECIALITY))): if (tit1.end_token.is_newline_after): self.rank += 15 else: self.rank += 10 if (tit1.typ == TitleItemToken.Types.SPECIALITY): self.speciality = tit1.value if (up_words > 4 and up_words > (math.floor((0.8 * (words))))): if (tstart.previous is not None and (isinstance(tstart.previous.get_referent(), PersonReferent))): self.rank += (5 + up_words) self.begin_name_token = tstart self.end_name_token = tend return True
def __try_attach_contract_ground(t : 'Token', ip : 'InstrumentParticipantReferent', can_be_passport : bool=False) -> 'Token': ok = False first_pass3289 = True while True: if first_pass3289: first_pass3289 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_char(',') or t.morph.class0_.is_preposition): continue if (t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token continue if (t.is_value("ОСНОВАНИЕ", None) or t.is_value("ДЕЙСТВОВАТЬ", None) or t.is_value("ДЕЙСТВУЮЩИЙ", None)): ok = True if (t.next0_ is not None and t.next0_.is_char('(')): br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100) if (br is not None and (br.length_char < 10)): t = br.end_token continue dr = Utils.asObjectOrNull(t.get_referent(), DecreeReferent) if (dr is not None): ip.ground = dr return t pir = Utils.asObjectOrNull(t.get_referent(), PersonIdentityReferent) if (pir is not None and can_be_passport): if (pir.typ is not None and not "паспорт" in pir.typ): ip.ground = pir return t if (t.is_value("УСТАВ", None)): ip.ground = t.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) return t if (t.is_value("ДОВЕРЕННОСТЬ", None)): dts = DecreeToken.try_attach_list(t.next0_, None, 10, False) if (dts is None): has_spec = False ttt = t.next0_ first_pass3290 = True while True: if first_pass3290: first_pass3290 = False else: ttt = ttt.next0_ if (not (ttt is not None and ((ttt.end_char - t.end_char) < 200))): break if (ttt.is_comma): continue if (ttt.is_value("УДОСТОВЕРИТЬ", None) or ttt.is_value("УДОСТОВЕРЯТЬ", None)): has_spec = True continue dt = DecreeToken.try_attach(ttt, None, False) if (dt is not None): if (dt.typ == DecreeToken.ItemType.DATE or dt.typ == DecreeToken.ItemType.NUMBER): dts = DecreeToken.try_attach_list(ttt, None, 10, False) break npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None) if (npt is not None): if (npt.end_token.is_value("НОТАРИУС", None)): ttt = npt.end_token has_spec = True continue if (ttt.get_referent() is not None): if (has_spec): continue break if (dts is not None and len(dts) > 0): t0 = t dr = DecreeReferent() dr.typ = "ДОВЕРЕННОСТЬ" for d in dts: if (d.typ == DecreeToken.ItemType.DATE): dr._add_date(d) t = d.end_token elif (d.typ == DecreeToken.ItemType.NUMBER): dr._add_number(d) t = d.end_token else: break ad = t.kit.get_analyzer_data_by_analyzer_name(InstrumentAnalyzer.ANALYZER_NAME) ip.ground = ad.register_referent(dr) rt = ReferentToken(Utils.asObjectOrNull(ip.ground, Referent), t0, t) t.kit.embed_token(rt) return rt ip.ground = "ДОВЕРЕННОСТЬ" return t break return None
def __correct_tail_attributes(p : 'PersonReferent', t0 : 'Token') -> 'Token': res = t0 t = t0 if (t is not None and t.is_char(',')): t = t.next0_ born = False die = False if (t is not None and ((t.is_value("РОДИТЬСЯ", "НАРОДИТИСЯ") or t.is_value("BORN", None)))): t = t.next0_ born = True elif (t is not None and ((t.is_value("УМЕРЕТЬ", "ПОМЕРТИ") or t.is_value("СКОНЧАТЬСЯ", None) or t.is_value("DIED", None)))): t = t.next0_ die = True elif ((t is not None and t.is_value("ДАТА", None) and t.next0_ is not None) and t.next0_.is_value("РОЖДЕНИЕ", "НАРОДЖЕННЯ")): t = t.next0_.next0_ born = True while t is not None: if (t.morph.class0_.is_preposition or t.is_hiphen or t.is_char(':')): t = t.next0_ else: break if (t is not None and t.get_referent() is not None): r = t.get_referent() if (r.type_name == "DATE"): t1 = t if (t.next0_ is not None and ((t.next0_.is_value("Р", None) or t.next0_.is_value("РОЖДЕНИЕ", "НАРОДЖЕННЯ")))): born = True t1 = t.next0_ if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ if (born): if (p is not None): p.add_slot(PersonReferent.ATTR_BORN, r, False, 0) res = t1 t = t1 elif (die): if (p is not None): p.add_slot(PersonReferent.ATTR_DIE, r, False, 0) res = t1 t = t1 if (die and t is not None): ag = NumberHelper.try_parse_age(t.next0_) if (ag is not None): if (p is not None): p.add_slot(PersonReferent.ATTR_AGE, str(ag.value), False, 0) t = ag.end_token.next0_ res = ag.end_token if (t is None): return res if (t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t1 = t.next0_ born = False if (t1.is_value("РОД", None)): born = True t1 = t1.next0_ if (t1 is not None and t1.is_char('.')): t1 = t1.next0_ if (isinstance(t1, ReferentToken)): r = t1.get_referent() if (r.type_name == "DATERANGE" and t1.next0_ == br.end_token): bd = Utils.asObjectOrNull(r.get_slot_value("FROM"), Referent) to = Utils.asObjectOrNull(r.get_slot_value("TO"), Referent) if (bd is not None and to is not None): if (p is not None): p.add_slot(PersonReferent.ATTR_BORN, bd, False, 0) p.add_slot(PersonReferent.ATTR_DIE, to, False, 0) res = br.end_token t = res elif (r.type_name == "DATE" and t1.next0_ == br.end_token): if (p is not None): p.add_slot(PersonReferent.ATTR_BORN, r, False, 0) res = br.end_token t = res return res
def process(self, kit: 'AnalysisKit') -> None: ad = kit.get_analyzer_data(self) is_lit_block = 0 refs_by_num = dict() rts = [] t = kit.first_token first_pass3022 = True while True: if first_pass3022: first_pass3022 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None and br.length_char > 70 and (br.length_char < 400)): if (br.is_newline_after or ((br.end_token.next0_ is not None and br.end_token.next0_.is_char_of(".;")))): rts = BookLinkAnalyzer.__try_parse( t.next0_, False, br.end_char) if (rts is not None and len(rts) >= 1): if (len(rts) > 1): rts[1].referent = ad.register_referent( rts[1].referent) kit.embed_token(rts[1]) rts[0].referent.book = Utils.asObjectOrNull( rts[1].referent, BookLinkReferent) if (rts[0].begin_char == rts[1].begin_char): rts[0].begin_token = rts[1] if (rts[0].end_char == rts[1].end_char): rts[0].end_token = rts[1] rts[0].begin_token = t rts[0].end_token = br.end_token rts[0].referent.typ = BookLinkRefType.INLINE rts[0].referent = ad.register_referent( rts[0].referent) kit.embed_token(rts[0]) t = (rts[0]) continue if (not t.is_newline_before): continue if (is_lit_block <= 0): tt = BookLinkToken.parse_start_of_lit_block(t) if (tt is not None): is_lit_block = 5 t = tt continue rts = BookLinkAnalyzer.__try_parse(t, is_lit_block > 0, 0) if (rts is None or (len(rts) < 1)): is_lit_block -= 1 if (is_lit_block < 0): is_lit_block = 0 continue is_lit_block += 1 if (is_lit_block > 5): is_lit_block = 5 if (len(rts) > 1): rts[1].referent = ad.register_referent(rts[1].referent) kit.embed_token(rts[1]) rts[0].referent.book = Utils.asObjectOrNull( rts[1].referent, BookLinkReferent) if (rts[0].begin_char == rts[1].begin_char): rts[0].begin_token = rts[1] if (rts[0].end_char == rts[1].end_char): rts[0].end_token = rts[1] re = Utils.asObjectOrNull(rts[0].referent, BookLinkRefReferent) re = (Utils.asObjectOrNull(ad.register_referent(re), BookLinkRefReferent)) rts[0].referent = (re) kit.embed_token(rts[0]) t = (rts[0]) if (re.number is not None): li = [] wrapli368 = RefOutArgWrapper(None) inoutres369 = Utils.tryGetValue(refs_by_num, re.number, wrapli368) li = wrapli368.value if (not inoutres369): li = list() refs_by_num[re.number] = li li.append(re) t = kit.first_token first_pass3023 = True while True: if first_pass3023: first_pass3023 = False else: t = t.next0_ if (not (t is not None)): break if (not (isinstance(t, TextToken))): continue rt = BookLinkAnalyzer.__try_parse_short_inline(t) if (rt is None): continue re = Utils.asObjectOrNull(rt.referent, BookLinkRefReferent) li = [] wrapli370 = RefOutArgWrapper(None) inoutres371 = Utils.tryGetValue(refs_by_num, Utils.ifNotNull(re.number, ""), wrapli370) li = wrapli370.value if (not inoutres371): continue i = 0 i = 0 while i < len(li): if (t.begin_char < li[i].occurrence[0].begin_char): break i += 1 if (i >= len(li)): continue re.book = li[i].book if (re.pages is None): re.pages = li[i].pages re.typ = BookLinkRefType.INLINE re = (Utils.asObjectOrNull(ad.register_referent(re), BookLinkRefReferent)) rt.referent = (re) kit.embed_token(rt) t = (rt)
def try_attach_requisites(t : 'Token', cur : 'InstrumentParticipantReferent', other : 'InstrumentParticipantReferent', cant_be_empty : bool=False) -> 'ReferentToken': if (t is None or cur is None): return None if (t.is_table_control_char): return None err = 0 spec_chars = 0 rt = None t0 = t is_in_tab_cell = False cou = 0 tt = t.next0_ while tt is not None and (cou < 300): if (tt.is_table_control_char): is_in_tab_cell = True break tt = tt.next0_; cou += 1 first_pass3286 = True while True: if first_pass3286: first_pass3286 = False else: t = t.next0_ if (not (t is not None)): break if (t.begin_char == 8923): pass if (t.is_table_control_char): if (t != t0): if (rt is not None): rt.end_token = t.previous elif (not cant_be_empty): rt = ReferentToken(cur, t0, t.previous) break else: continue if ((t.is_char_of(":.") or t.is_value("М", None) or t.is_value("M", None)) or t.is_value("П", None)): if (rt is not None): rt.end_token = t continue pp = ParticipantToken.try_attach_to_exist(t, cur, other) if (pp is not None): if (pp.referent != cur): break if (rt is None): rt = ReferentToken(cur, t, t) rt.end_token = pp.end_token err = 0 continue if (t.is_newline_before): iii = InstrToken.parse(t, 0, None) if (iii is not None): if (iii.typ == ILTypes.APPENDIX): break if (t.whitespaces_before_count > 25 and not is_in_tab_cell): if (t != t0): if (t.previous is not None and t.previous.is_char_of(",;")): pass elif (t.newlines_before_count > 1): break if ((isinstance(t.get_referent(), PersonReferent)) or (isinstance(t.get_referent(), OrganizationReferent))): if (not cur._contains_ref(t.get_referent())): break if ((t.is_char_of(";:,.") or t.is_hiphen or t.morph.class0_.is_preposition) or t.morph.class0_.is_conjunction): continue if (t.is_char_of("_/\\")): spec_chars += 1 if (spec_chars > 10 and rt is None): rt = ReferentToken(cur, t0, t) if (rt is not None): rt.end_token = t continue if (t.is_newline_before and (isinstance(t, NumberToken))): break if (t.is_value("ОФИС", None)): if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token continue if ((isinstance(t.next0_, TextToken)) and not t.next0_.chars.is_all_lower): t = t.next0_ continue r = t.get_referent() if ((((isinstance(r, PersonReferent)) or (isinstance(r, AddressReferent)) or (isinstance(r, UriReferent))) or (isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent))) or (isinstance(r, PersonIdentityReferent)) or (isinstance(r, BankDataReferent))): if (other is not None and other.find_slot(None, r, True) is not None): if (not (isinstance(r, UriReferent))): break if (rt is None): rt = ReferentToken(cur, t, t) if (cur.find_slot(InstrumentParticipantReferent.ATTR_DELEGATE, r, True) is not None): pass else: cur.add_slot(InstrumentParticipantReferent.ATTR_REF, r, False, 0) rt.end_token = t err = 0 else: if ((isinstance(t, TextToken)) and t.length_char > 1): err += 1 if (is_in_tab_cell and rt is not None): if (err > 300): break elif (err > 4): break return rt
def try_attach(t: 'Token') -> 'TitleItemToken': tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): t1 = tt if (tt.term == "ТЕМА"): tit = TitleItemToken.try_attach(tt.next0_) if (tit is not None and tit.typ == TitleItemToken.Types.TYP): t1 = tit.end_token if (t1.next0_ is not None and t1.next0_.is_char(':')): t1 = t1.next0_ return TitleItemToken._new2655( t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value) if (tt.next0_ is not None and tt.next0_.is_char(':')): t1 = tt.next0_ return TitleItemToken(tt, t1, TitleItemToken.Types.THEME) if (tt.term == "ПО" or tt.term == "НА"): if (tt.next0_ is not None and tt.next0_.is_value("ТЕМА", None)): t1 = tt.next0_ if (t1.next0_ is not None and t1.next0_.is_char(':')): t1 = t1.next0_ return TitleItemToken(tt, t1, TitleItemToken.Types.THEME) if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"): tt2 = tt.next0_ if (tt2 is not None and tt2.is_char('.')): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): if (tt2.term == "C" or tt2.term == "С"): tt2 = tt2.next0_ if (isinstance(tt2, TextToken)): return TitleItemToken( t, tt2, TitleItemToken.Types.TRANSLATE) if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION" or tt.term == "СЕКЦІЯ"): t1 = tt.next0_ if (t1 is not None and t1.is_char(':')): t1 = t1.next0_ br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token elif (t1 != tt.next0_): while t1 is not None: if (t1.is_newline_after): break t1 = t1.next0_ if (t1 is None): return None if (t1 != tt.next0_): return TitleItemToken(tt, t1, TitleItemToken.Types.DUST) t1 = (None) if (tt.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): t1 = tt.next0_ elif (tt.morph.class0_.is_preposition and tt.next0_ is not None and tt.next0_.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")): t1 = tt.next0_.next0_ elif (tt.is_char('/') and tt.is_newline_before): t1 = tt.next0_ if (t1 is not None): if (t1.is_char_of(":") or t1.is_hiphen): t1 = t1.next0_ spec = TitleItemToken.__try_attach_speciality(t1, True) if (spec is not None): spec.begin_token = t return spec sss = TitleItemToken.__try_attach_speciality(t, False) if (sss is not None): return sss if (isinstance(t, ReferentToken)): return None npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None) if (npt is not None): s = npt.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) tok = TitleItemToken.M_TERMINS.try_parse(npt.end_token, TerminParseAttr.NO) if (tok is not None): ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types) if (ty == TitleItemToken.Types.TYP): tit = TitleItemToken.try_attach(tok.end_token.next0_) if (tit is not None and tit.typ == TitleItemToken.Types.THEME): return TitleItemToken._new2655( npt.begin_token, tit.end_token, TitleItemToken.Types.TYPANDTHEME, s) if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"): return None t1 = tok.end_token if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"): err = 0 ttt = t1.next0_ first_pass3394 = True while True: if first_pass3394: first_pass3394 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.morph.class0_.is_preposition): continue if (ttt.is_value("СОИСКАНИЕ", "")): continue npt1 = NounPhraseHelper.try_parse( ttt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.noun.is_value( "СТЕПЕНЬ", "СТУПІНЬ")): ttt = npt1.end_token t1 = ttt continue rt = t1.kit.process_referent("PERSON", ttt) if (rt is not None and (isinstance( rt.referent, PersonPropertyReferent))): ppr = Utils.asObjectOrNull( rt.referent, PersonPropertyReferent) if (ppr.name == "доктор наук"): t1 = rt.end_token s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ" break elif (ppr.name == "кандидат наук"): t1 = rt.end_token s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ" break elif (ppr.name == "магистр"): t1 = rt.end_token s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" break if (ttt.is_value("ДОКТОР", None) or ttt.is_value("КАНДИДАТ", None) or ttt.is_value("МАГИСТР", "МАГІСТР")): t1 = ttt npt1 = NounPhraseHelper.try_parse( ttt.next0_, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_token.is_value( "НАУК", None)): t1 = npt1.end_token s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value("МАГИСТР", "МАГІСТР") else ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value( "ДОКТОР", None) else "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ")) break err += 1 if (err > 3): break if (t1.next0_ is not None and t1.next0_.is_char('.')): t1 = t1.next0_ if (s.endswith("ОТЧЕТ") and t1.next0_ is not None and t1.next0_.is_value("О", None)): npt1 = NounPhraseHelper.try_parse( t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0, None) if (npt1 is not None and npt1.morph.case_.is_prepositional): t1 = npt1.end_token return TitleItemToken._new2655(npt.begin_token, t1, ty, s) tok1 = TitleItemToken.M_TERMINS.try_parse(t, TerminParseAttr.NO) if (tok1 is not None): t1 = tok1.end_token re = TitleItemToken( tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types)) return re if (BracketHelper.can_be_start_of_sequence(t, False, False)): tok1 = TitleItemToken.M_TERMINS.try_parse(t.next0_, TerminParseAttr.NO) if (tok1 is not None and BracketHelper.can_be_end_of_sequence( tok1.end_token.next0_, False, None, False)): t1 = tok1.end_token.next0_ return TitleItemToken( tok1.begin_token, t1, Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types)) return None
def try_attach(t : 'Token', p1 : 'InstrumentParticipantReferent'=None, p2 : 'InstrumentParticipantReferent'=None, is_contract : bool=False) -> 'ParticipantToken': if (t is None): return None tt = t br = False if (p1 is None and p2 is None and is_contract): r1 = t.get_referent() if ((r1 is not None and t.next0_ is not None and t.next0_.is_comma_and) and (isinstance(t.next0_.next0_, ReferentToken))): r2 = t.next0_.next0_.get_referent() if (r1.type_name == r2.type_name): ttt = t.next0_.next0_.next0_ refs = list() refs.append(r1) refs.append(r2) first_pass3282 = True while True: if first_pass3282: first_pass3282 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if ((ttt.is_comma_and and ttt.next0_ is not None and ttt.next0_.get_referent() is not None) and ttt.next0_.get_referent().type_name == r1.type_name): ttt = ttt.next0_ if (not ttt.get_referent() in refs): refs.append(ttt.get_referent()) continue break first_pass3283 = True while True: if first_pass3283: first_pass3283 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_comma or ttt.morph.class0_.is_preposition): continue if ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): continue if (ttt.is_value("ДОГОВАРИВАТЬСЯ", None)): continue npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None) if (npt is not None and npt.noun.is_value("СТОРОНА", None) and npt.morph.number != MorphNumber.SINGULAR): re = ParticipantToken._new1573(t, npt.end_token, ParticipantToken.Kinds.NAMEDASPARTS) re.parts = refs return re break if ((isinstance(r1, OrganizationReferent)) or (isinstance(r1, PersonReferent))): has_br = False has_named = False if (isinstance(r1, PersonReferent)): if (t.previous is not None and t.previous.is_value("ЛИЦО", None)): return None elif (t.previous is not None and ((t.previous.is_value("ВЫДАВАТЬ", None) or t.previous.is_value("ВЫДАТЬ", None)))): return None ttt = t.begin_token while ttt is not None and (ttt.end_char < t.end_char): if (ttt.is_char('(')): has_br = True elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): has_named = True elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): pass elif (isinstance(ttt, ReferentToken)): pass elif (has_br or has_named): npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None) if (npt is None): break if (has_br): if (npt.end_token.next0_ is None or not npt.end_token.next0_.is_char(')')): break if (not has_named): if (ParticipantToken.M_ONTOLOGY.try_parse(ttt, TerminParseAttr.NO) is None): break re = ParticipantToken._new1573(t, t, ParticipantToken.Kinds.NAMEDAS) re.typ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) re.parts = list() re.parts.append(r1) return re ttt = ttt.next0_ has_br = False has_named = False end_side = None brr = None add_refs = None ttt = t.next0_ first_pass3284 = True while True: if first_pass3284: first_pass3284 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if ((isinstance(ttt, NumberToken)) and (isinstance(ttt.next0_, TextToken)) and ttt.next0_.term == "СТОРОНЫ"): ttt = ttt.next0_ end_side = ttt if (ttt.next0_ is not None and ttt.next0_.is_comma): ttt = ttt.next0_ if (ttt.next0_ is not None and ttt.next0_.is_and): break if (brr is not None and ttt.begin_char > brr.end_char): brr = (None) if (BracketHelper.can_be_start_of_sequence(ttt, False, False)): brr = BracketHelper.try_parse(ttt, BracketParseAttr.NO, 100) if (brr is not None and (brr.length_char < 7) and ttt.is_char('(')): ttt = brr.end_token brr = (None) continue elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): has_named = True elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): pass elif (brr is not None or has_named): if (BracketHelper.can_be_start_of_sequence(ttt, True, False)): ttt = ttt.next0_ npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None) typ22 = None if (npt is not None): ttt = npt.end_token if (npt.end_token.is_value("ДОГОВОР", None)): continue else: ttok = None if (isinstance(ttt, MetaToken)): ttok = ParticipantToken.M_ONTOLOGY.try_parse(ttt.begin_token, TerminParseAttr.NO) if (ttok is not None): typ22 = ttok.termin.canonic_text elif (has_named and ttt.morph.class0_.is_adjective): typ22 = ttt.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False) elif (brr is not None): continue else: break if (BracketHelper.can_be_end_of_sequence(ttt.next0_, True, None, False)): ttt = ttt.next0_ if (brr is not None): if (ttt.next0_ is None): ttt = brr.end_token continue ttt = ttt.next0_ if (not has_named and typ22 is None): if (ParticipantToken.M_ONTOLOGY.try_parse(npt.begin_token, TerminParseAttr.NO) is None): break re = ParticipantToken._new1573(t, ttt, ParticipantToken.Kinds.NAMEDAS) re.typ = (Utils.ifNotNull(typ22, npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False))) re.parts = list() re.parts.append(r1) return re elif ((ttt.is_value("ЗАРЕГИСТРИРОВАННЫЙ", None) or ttt.is_value("КАЧЕСТВО", None) or ttt.is_value("ПРОЖИВАЮЩИЙ", None)) or ttt.is_value("ЗАРЕГ", None)): pass elif (ttt.get_referent() == r1): pass elif ((isinstance(ttt.get_referent(), PersonIdentityReferent)) or (isinstance(ttt.get_referent(), AddressReferent))): if (add_refs is None): add_refs = list() add_refs.append(ttt.get_referent()) else: prr = ttt.kit.process_referent("PERSONPROPERTY", ttt) if (prr is not None): ttt = prr.end_token continue if (isinstance(ttt.get_referent(), GeoReferent)): continue npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None) if (npt is not None): if ((npt.noun.is_value("МЕСТО", None) or npt.noun.is_value("ЖИТЕЛЬСТВО", None) or npt.noun.is_value("ПРЕДПРИНИМАТЕЛЬ", None)) or npt.noun.is_value("ПОЛ", None) or npt.noun.is_value("РОЖДЕНИЕ", None)): ttt = npt.end_token continue if (ttt.is_newline_before): break if (ttt.length_char < 3): continue mc = ttt.get_morph_class_in_dictionary() if (mc.is_adverb or mc.is_adjective): continue if (ttt.chars.is_all_upper): continue break if (end_side is not None or ((add_refs is not None and t.previous is not None and t.previous.is_and))): re = ParticipantToken._new1573(t, Utils.ifNotNull(end_side, t), ParticipantToken.Kinds.NAMEDAS) re.typ = (None) re.parts = list() re.parts.append(r1) if (add_refs is not None): re.parts.extend(add_refs) return re too = ParticipantToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO) if (too is not None): if ((isinstance(t.previous, TextToken)) and t.previous.is_value("ЛИЦО", None)): too = (None) if (too is not None and too.termin.tag is not None and too.termin.canonic_text != "СТОРОНА"): tt1 = too.end_token.next0_ if (tt1 is not None): if (tt1.is_hiphen or tt1.is_char(':')): tt1 = tt1.next0_ if (isinstance(tt1, ReferentToken)): r1 = tt1.get_referent() if ((isinstance(r1, PersonReferent)) or (isinstance(r1, OrganizationReferent))): re = ParticipantToken._new1573(t, tt1, ParticipantToken.Kinds.NAMEDAS) re.typ = too.termin.canonic_text re.parts = list() re.parts.append(r1) return re add_typ1 = (None if p1 is None else p1.typ) add_typ2 = (None if p2 is None else p2.typ) if (BracketHelper.can_be_start_of_sequence(tt, False, False) and tt.next0_ is not None): br = True tt = tt.next0_ term1 = None term2 = None if (add_typ1 is not None and add_typ1.find(' ') > 0 and not add_typ1.startswith("СТОРОНА")): term1 = Termin(add_typ1) if (add_typ2 is not None and add_typ2.find(' ') > 0 and not add_typ2.startswith("СТОРОНА")): term2 = Termin(add_typ2) named = False typ_ = None t1 = None t0 = tt first_pass3285 = True while True: if first_pass3285: first_pass3285 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.morph.class0_.is_preposition and typ_ is not None): continue if (tt.is_char_of("(:)") or tt.is_hiphen): continue if (tt.is_table_control_char): break if (tt.is_newline_before and tt != t0): if (isinstance(tt, NumberToken)): break if ((isinstance(tt, TextToken)) and (isinstance(tt.previous, TextToken))): if (tt.previous.is_value(tt.term, None)): break if (BracketHelper.is_bracket(tt, False)): continue tok = (ParticipantToken.M_ONTOLOGY.try_parse(tt, TerminParseAttr.NO) if ParticipantToken.M_ONTOLOGY is not None else None) if (tok is not None and (isinstance(tt.previous, TextToken))): if (tt.previous.is_value("ЛИЦО", None)): return None if (tok is None): if (add_typ1 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ1, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ1, None)))))): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): break typ_ = add_typ1 t1 = tt continue if (add_typ2 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ2, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ2, None)))))): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): break typ_ = add_typ2 t1 = tt continue if (tt.chars.is_letter): if (term1 is not None): tok1 = term1.try_parse(tt, TerminParseAttr.NO) if (tok1 is not None): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): break typ_ = add_typ1 tt = tok1.end_token t1 = tt continue if (term2 is not None): tok2 = term2.try_parse(tt, TerminParseAttr.NO) if (tok2 is not None): if (typ_ is not None): if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): break typ_ = add_typ2 tt = tok2.end_token t1 = tt continue if (named and tt.get_morph_class_in_dictionary().is_noun): if (not tt.chars.is_all_lower or BracketHelper.is_bracket(tt.previous, True)): if (DecreeToken.is_keyword(tt, False) is None): val = tt.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) if (typ_ is not None): if (not ParticipantToken.__is_types_equal(typ_, val)): break typ_ = val t1 = tt continue if (named and typ_ is None and is_contract): if ((isinstance(tt, TextToken)) and tt.chars.is_cyrillic_letter and tt.chars.is_capital_upper): dc = tt.get_morph_class_in_dictionary() if (dc.is_undefined or dc.is_noun): dt = DecreeToken.try_attach(tt, None, False) ok = True if (dt is not None): ok = False elif (tt.is_value("СТОРОНА", None)): ok = False if (ok): typ_ = tt.lemma t1 = tt continue if (dc.is_adjective): npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt is not None and len(npt.adjectives) > 0 and npt.noun.get_morph_class_in_dictionary().is_noun): typ_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False) t1 = npt.end_token continue if (tt == t): break if ((isinstance(tt, NumberToken)) or tt.is_char('.')): break if (tt.length_char < 4): if (typ_ is not None): continue break if (tok.termin.tag is None): named = True else: if (typ_ is not None): break if (tok.termin.canonic_text == "СТОРОНА"): tt1 = tt.next0_ if (tt1 is not None and tt1.is_hiphen): tt1 = tt1.next0_ if (not (isinstance(tt1, NumberToken))): break if (tt1.is_newline_before): break typ_ = "{0} {1}".format(tok.termin.canonic_text, tt1.value) t1 = tt1 else: typ_ = tok.termin.canonic_text t1 = tok.end_token break tt = tok.end_token if (typ_ is None): return None if (not named and t1 != t and not typ_.startswith("СТОРОНА")): if (not ParticipantToken.__is_types_equal(typ_, add_typ1) and not ParticipantToken.__is_types_equal(typ_, add_typ2)): return None if (BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): t1 = t1.next0_ if (not t.is_whitespace_before and BracketHelper.can_be_start_of_sequence(t.previous, False, False)): t = t.previous elif (BracketHelper.can_be_start_of_sequence(t, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, True, t, True)): t1 = t1.next0_ if (br and t1.next0_ is not None and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): t1 = t1.next0_ res = ParticipantToken._new1578(t, t1, (ParticipantToken.Kinds.NAMEDAS if named else ParticipantToken.Kinds.PURE), typ_) if (t.is_char(':')): res.begin_token = t.next0_ return res
def try_parse_number_with_postfix(t: 'Token') -> 'NumberExToken': if (t is None): return None t0 = t is_dollar = None if (t.length_char == 1 and t.next0_ is not None): is_dollar = NumberHelper._is_money_char(t) if ((is_dollar) is not None): t = t.next0_ nt = Utils.asObjectOrNull(t, NumberToken) if (nt is None): if ((not (isinstance(t.previous, NumberToken)) and t.is_char('(') and (isinstance(t.next0_, NumberToken))) and t.next0_.next0_ is not None and t.next0_.next0_.is_char(')')): toks1 = NumberExHelper._m_postfixes.try_parse( t.next0_.next0_.next0_, TerminParseAttr.NO) if (toks1 is not None and (Utils.valToEnum(toks1.termin.tag, NumberExType)) == NumberExType.MONEY): nt0 = Utils.asObjectOrNull(t.next0_, NumberToken) res = NumberExToken._new405(t, toks1.end_token, nt0.value, nt0.typ, NumberExType.MONEY, nt0.real_value, toks1.begin_token.morph) return NumberExHelper.__correct_money( res, toks1.begin_token) tt = Utils.asObjectOrNull(t, TextToken) if (tt is None or not tt.morph.class0_.is_adjective): return None val = tt.term i = 4 first_pass3038 = True while True: if first_pass3038: first_pass3038 = False else: i += 1 if (not (i < (len(val) - 5))): break v = val[0:0 + i] li = NumberHelper._m_nums.find_termins_by_string( v, tt.morph.language) if (li is None): continue vv = val[i:] lii = NumberExHelper._m_postfixes.find_termins_by_string( vv, tt.morph.language) if (lii is not None and len(lii) > 0): re = NumberExToken._new406( t, t, str(li[0].tag), NumberSpellingType.WORDS, Utils.valToEnum(lii[0].tag, NumberExType), t.morph) NumberExHelper.__correct_ext_types(re) return re break return None if (t.next0_ is None and is_dollar is None): return None f = nt.real_value if (math.isnan(f)): return None t1 = nt.next0_ if (((t1 is not None and t1.is_char_of(",."))) or (((isinstance(t1, NumberToken)) and (t1.whitespaces_before_count < 3)))): d = 0 tt11 = NumberHelper.try_parse_real_number(nt, False, False) if (tt11 is not None): t1 = tt11.end_token.next0_ f = tt11.real_value if (t1 is None): if (is_dollar is None): return None elif ((t1.next0_ is not None and t1.next0_.is_value("С", "З") and t1.next0_.next0_ is not None) and t1.next0_.next0_.is_value("ПОЛОВИНА", None)): f += 0.5 t1 = t1.next0_.next0_ if (t1 is not None and t1.is_hiphen and t1.next0_ is not None): t1 = t1.next0_ det = False altf = f if (((isinstance(t1, NumberToken)) and t1.previous is not None and t1.previous.is_hiphen) and t1.int_value == 0 and t1.length_char == 2): t1 = t1.next0_ if ((t1 is not None and t1.next0_ is not None and t1.is_char('(')) and (((isinstance(t1.next0_, NumberToken)) or t1.next0_.is_value("НОЛЬ", None))) and t1.next0_.next0_ is not None): nt1 = Utils.asObjectOrNull(t1.next0_, NumberToken) val = 0 if (nt1 is not None): val = nt1.real_value if (math.floor(f) == math.floor(val)): ttt = t1.next0_.next0_ if (ttt.is_char(')')): t1 = ttt.next0_ det = True if ((isinstance(t1, NumberToken)) and t1.int_value is not None and t1.int_value == 0): t1 = t1.next0_ elif (((((isinstance(ttt, NumberToken)) and (ttt.real_value < 100) and ttt.next0_ is not None) and ttt.next0_.is_char('/') and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.get_source_text() == "100" and ttt.next0_.next0_.next0_ is not None) and ttt.next0_.next0_.next0_.is_char(')')): rest = NumberExHelper.__get_decimal_rest100(f) if (ttt.int_value is not None and rest == ttt.int_value): t1 = ttt.next0_.next0_.next0_.next0_ det = True elif ((ttt.is_value("ЦЕЛЫХ", None) and (isinstance(ttt.next0_, NumberToken)) and ttt.next0_.next0_ is not None) and ttt.next0_.next0_.next0_ is not None and ttt.next0_.next0_.next0_.is_char(')')): num2 = Utils.asObjectOrNull(ttt.next0_, NumberToken) altf = num2.real_value if (ttt.next0_.next0_.is_value("ДЕСЯТЫЙ", None)): altf /= (10) elif (ttt.next0_.next0_.is_value("СОТЫЙ", None)): altf /= (100) elif (ttt.next0_.next0_.is_value("ТЫСЯЧНЫЙ", None)): altf /= (1000) elif (ttt.next0_.next0_.is_value("ДЕСЯТИТЫСЯЧНЫЙ", None)): altf /= (10000) elif (ttt.next0_.next0_.is_value("СТОТЫСЯЧНЫЙ", None)): altf /= (100000) elif (ttt.next0_.next0_.is_value("МИЛЛИОННЫЙ", None)): altf /= (1000000) if (altf < 1): altf += val t1 = ttt.next0_.next0_.next0_.next0_ det = True else: toks1 = NumberExHelper._m_postfixes.try_parse( ttt, TerminParseAttr.NO) if (toks1 is not None): if ((Utils.valToEnum( toks1.termin.tag, NumberExType)) == NumberExType.MONEY): if (toks1.end_token.next0_ is not None and toks1.end_token.next0_.is_char(')')): res = NumberExToken._new407( t, toks1.end_token.next0_, nt.value, nt.typ, NumberExType.MONEY, f, altf, toks1.begin_token.morph) return NumberExHelper.__correct_money( res, toks1.begin_token) res2 = NumberExHelper.try_parse_number_with_postfix( t1.next0_) if (res2 is not None and res2.end_token.next0_ is not None and res2.end_token.next0_.is_char(')')): res2.begin_token = t res2.end_token = res2.end_token.next0_ res2.alt_real_value = res2.real_value res2.real_value = f NumberExHelper.__correct_ext_types(res2) if (res2.whitespaces_after_count < 2): toks2 = NumberExHelper._m_postfixes.try_parse( res2.end_token.next0_, TerminParseAttr.NO) if (toks2 is not None): if ((Utils.valToEnum( toks2.termin.tag, NumberExType)) == NumberExType.MONEY): res2.end_token = toks2.end_token return res2 elif (nt1 is not None and nt1.typ == NumberSpellingType.WORDS and nt.typ == NumberSpellingType.DIGIT): altf = nt1.real_value ttt = t1.next0_.next0_ if (ttt.is_char(')')): t1 = ttt.next0_ det = True if (not det): altf = f if ((t1 is not None and t1.is_char('(') and t1.next0_ is not None) and t1.next0_.is_value("СУММА", None)): br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100) if (br is not None): t1 = br.end_token.next0_ if (is_dollar is not None): te = None if (t1 is not None): te = t1.previous else: t1 = t0 while t1 is not None: if (t1.next0_ is None): te = t1 t1 = t1.next0_ if (te is None): return None if (te.is_hiphen and te.next0_ is not None): if (te.next0_.is_value("МИЛЛИОННЫЙ", None)): f *= (1000000) altf *= (1000000) te = te.next0_ elif (te.next0_.is_value("МИЛЛИАРДНЫЙ", None)): f *= (1000000000) altf *= (1000000000) te = te.next0_ if (not te.is_whitespace_after and (isinstance(te.next0_, TextToken))): if (te.next0_.is_value("M", None)): f *= (1000000) altf *= (1000000) te = te.next0_ elif (te.next0_.is_value("BN", None)): f *= (1000000000) altf *= (1000000000) te = te.next0_ return NumberExToken._new408(t0, te, "", nt.typ, NumberExType.MONEY, f, altf, is_dollar) if (t1 is None or ((t1.is_newline_before and not det))): return None toks = NumberExHelper._m_postfixes.try_parse(t1, TerminParseAttr.NO) if ((toks is None and det and (isinstance(t1, NumberToken))) and t1.value == "0"): toks = NumberExHelper._m_postfixes.try_parse( t1.next0_, TerminParseAttr.NO) if (toks is None and t1.is_char('р')): cou = 10 ttt = t0.previous first_pass3039 = True while True: if first_pass3039: first_pass3039 = False else: ttt = ttt.previous cou -= 1 if (not (ttt is not None and cou > 0)): break if (ttt.is_value("СУММА", None) or ttt.is_value("НАЛИЧНЫЙ", None) or ttt.is_value("БАЛАНС", None)): pass elif (ttt.get_referent() is not None and ttt.get_referent().type_name == "MONEY"): pass else: continue toks = TerminToken._new409( t1, t1, NumberExHelper._m_postfixes.find_termins_by_canonic_text( "RUB")[0]) if (t1.next0_ is not None and t1.next0_.is_char('.')): toks.end_token = t1.next0_ ty = Utils.valToEnum(toks.termin.tag, NumberExType) return NumberExToken._new410(t, toks.end_token, nt.value, nt.typ, ty, f, altf, toks.begin_token.morph, "RUB") if (toks is not None): t1 = toks.end_token if (not t1.is_char('.') and t1.next0_ is not None and t1.next0_.is_char('.')): if ((isinstance(t1, TextToken)) and t1.is_value( toks.termin.terms[0].canonical_text, None)): pass elif (not t1.chars.is_letter): pass else: t1 = t1.next0_ if (toks.termin.canonic_text == "LTL"): return None if (toks.begin_token == t1): if (t1.morph.class0_.is_preposition or t1.morph.class0_.is_conjunction): if (t1.is_whitespace_before and t1.is_whitespace_after): return None ty = Utils.valToEnum(toks.termin.tag, NumberExType) res = NumberExToken._new407(t, t1, nt.value, nt.typ, ty, f, altf, toks.begin_token.morph) if (ty != NumberExType.MONEY): NumberExHelper.__correct_ext_types(res) return res return NumberExHelper.__correct_money(res, toks.begin_token) pfx = NumberExHelper.__attach_spec_postfix(t1) if (pfx is not None): pfx.begin_token = t pfx.value = nt.value pfx.typ = nt.typ pfx.real_value = f pfx.alt_real_value = altf return pfx if (t1.next0_ is not None and ((t1.morph.class0_.is_preposition or t1.morph.class0_.is_conjunction))): if (t1.is_value("НА", None)): pass else: nn = NumberExHelper.try_parse_number_with_postfix(t1.next0_) if (nn is not None): return NumberExToken._new412(t, t, nt.value, nt.typ, nn.ex_typ, f, altf, nn.ex_typ2, nn.ex_typ_param) if (not t1.is_whitespace_after and (isinstance(t1.next0_, NumberToken)) and (isinstance(t1, TextToken))): term = t1.term ty = NumberExType.UNDEFINED if (term == "СМХ" or term == "CMX"): ty = NumberExType.SANTIMETER elif (term == "MX" or term == "МХ"): ty = NumberExType.METER elif (term == "MMX" or term == "ММХ"): ty = NumberExType.MILLIMETER if (ty != NumberExType.UNDEFINED): return NumberExToken._new413(t, t1, nt.value, nt.typ, ty, f, altf, True) return None
def get_name_ex(begin: 'Token', end: 'Token', cla: 'MorphClass', mc: 'MorphCase', gender: 'MorphGender' = MorphGender.UNDEFINED, ignore_brackets_and_hiphens: bool = False, ignore_geo_referent: bool = False) -> str: if (end is None or begin is None): return None if (begin.end_char > end.begin_char and begin != end): return None res = io.StringIO() prefix = None t = begin first_pass3064 = True while True: if first_pass3064: first_pass3064 = False else: t = t.next0_ if (not (t is not None and t.end_char <= end.end_char)): break if (res.tell() > 1000): break if (t.is_table_control_char): continue if (ignore_brackets_and_hiphens): if (BracketHelper.is_bracket(t, False)): if (t == end): break if (t.is_char_of("(<[")): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None and br.end_char <= end.end_char): tmp = ProperNameHelper.get_name_ex( br.begin_token.next0_, br.end_token.previous, MorphClass.UNDEFINED, MorphCase.UNDEFINED, MorphGender.UNDEFINED, ignore_brackets_and_hiphens, False) if (tmp is not None): if ((br.end_char == end.end_char and br.begin_token.next0_ == br.end_token.previous and not br.begin_token.next0_.chars.is_letter) and not (isinstance( br.begin_token.next0_, ReferentToken))): pass else: print(" {0}{1}{2}".format( t.get_source_text(), tmp, br.end_token.get_source_text()), end="", file=res, flush=True) t = br.end_token continue if (t.is_hiphen): if (t == end): break elif (t.is_whitespace_before or t.is_whitespace_after): continue tt = Utils.asObjectOrNull(t, TextToken) if (tt is not None): if (not ignore_brackets_and_hiphens): if ((tt.next0_ is not None and tt.next0_.is_hiphen and (isinstance(tt.next0_.next0_, TextToken))) and tt != end and tt.next0_ != end): if (prefix is None): prefix = tt.term else: prefix = "{0}-{1}".format(prefix, tt.term) t = tt.next0_ if (t == end): break else: continue s = None if (cla.value != (0) or not mc.is_undefined or gender != MorphGender.UNDEFINED): for wff in tt.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (cla.value != (0)): if ((((wf.class0_.value) & (cla.value))) == 0): continue if (not mc.is_undefined): if (((wf.case_) & mc).is_undefined): continue if (gender != MorphGender.UNDEFINED): if (((wf.gender) & (gender)) == (MorphGender.UNDEFINED)): continue if (s is None or wf.normal_case == tt.term): s = wf.normal_case if (s is None and gender != MorphGender.UNDEFINED): for wff in tt.morph.items: wf = Utils.asObjectOrNull(wff, MorphWordForm) if (wf is None): continue if (cla.value != (0)): if ((((wf.class0_.value) & (cla.value))) == 0): continue if (not mc.is_undefined): if (((wf.case_) & mc).is_undefined): continue if (s is None or wf.normal_case == tt.term): s = wf.normal_case if (s is None): s = tt.term if (tt.chars.is_last_lower and tt.length_char > 2): s = tt.get_source_text() for i in range(len(s) - 1, -1, -1): if (str.isupper(s[i])): s = s[0:0 + i + 1] break if (prefix is not None): delim = "-" if (ignore_brackets_and_hiphens): delim = " " s = "{0}{1}{2}".format(prefix, delim, s) prefix = (None) if (res.tell() > 0 and len(s) > 0): if (str.isalnum(s[0])): ch0 = Utils.getCharAtStringIO(res, res.tell() - 1) if (ch0 == '-'): pass else: print(' ', end="", file=res) elif (not ignore_brackets_and_hiphens and BracketHelper.can_be_start_of_sequence( tt, False, False)): print(' ', end="", file=res) print(s, end="", file=res) elif (isinstance(t, NumberToken)): if (res.tell() > 0): if (not t.is_whitespace_before and Utils.getCharAtStringIO( res, res.tell() - 1) == '-'): pass else: print(' ', end="", file=res) nt = Utils.asObjectOrNull(t, NumberToken) if ((t.morph.class0_.is_adjective and nt.typ == NumberSpellingType.WORDS and nt.begin_token == nt.end_token) and (isinstance(nt.begin_token, TextToken))): print(nt.begin_token.term, end="", file=res) else: print(nt.value, end="", file=res) elif (isinstance(t, MetaToken)): if ((ignore_geo_referent and t != begin and t.get_referent() is not None) and t.get_referent().type_name == "GEO"): continue s = ProperNameHelper.get_name_ex(t.begin_token, t.end_token, cla, mc, gender, ignore_brackets_and_hiphens, ignore_geo_referent) if (not Utils.isNullOrEmpty(s)): if (res.tell() > 0): if (not t.is_whitespace_before and Utils.getCharAtStringIO( res, res.tell() - 1) == '-'): pass else: print(' ', end="", file=res) print(s, end="", file=res) if (t == end): break if (res.tell() == 0): return None return Utils.toStringStringIO(res)
def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken': if (not (isinstance(t, TextToken))): return None if (t.is_table_control_char): return None t0 = t whd = None minmax = 0 wrapminmax1625 = RefOutArgWrapper(minmax) tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625) minmax = wrapminmax1625.value if (tt is not None): t = tt.next0_ npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None) if (npt is None): whd = NumbersWithUnitToken._try_parsewhl(t) if (whd is not None): npt = NounPhraseToken(t0, whd.end_token) elif (t0.is_value("КПД", None)): npt = NounPhraseToken(t0, t0) elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): npt = NounPhraseToken(t0, t0) elif (t0.is_value("T", None) and t0.chars.is_all_lower): npt = NounPhraseToken(t0, t0) t = t0 if (t.next0_ is not None and t.next0_.is_char('=')): npt.end_token = t.next0_ elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): return None npt = NounPhraseToken(t0, t0) t = t0.next0_ while t is not None: if (t.whitespaces_before_count > 2): break elif (not (isinstance(t, TextToken))): break elif (not t.chars.is_letter): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token npt.end_token = t else: break elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): break else: npt.end_token = t t = t.next0_ else: return None elif (NumberHelper.try_parse_real_number(t, True, False) is not None): return None else: dtok = DateItemToken.try_attach(t, None, False) if (dtok is not None): return None t1 = npt.end_token t = npt.end_token name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph) units = None units2 = None internals_ = list() not0_ = False tt = t1.next0_ first_pass3305 = True while True: if first_pass3305: first_pass3305 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_newline_before): break if (tt.is_table_control_char): break wrapminmax1617 = RefOutArgWrapper(minmax) tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617) minmax = wrapminmax1617.value if (tt2 is not None): tt = tt2 t = tt t1 = t continue if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): t = tt t1 = t if (tt.previous.is_value("НЕ", None)): not0_ = True continue www = NumbersWithUnitToken._try_parsewhl(tt) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (tt.is_value("ПРИ", None)): mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False) if (n1 is not None and len(n1.units) > 0): mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1) internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): tt = tt.next0_ t = tt t1 = t continue if (len(internals_) > 0): if (tt.is_char(':')): break mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False) if (mt1 is not None and mt1.reliable): internals_.append(mt1) tt = mt1.end_token t = tt t1 = t continue if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None) if (npt3 is not None): tt = npt3.end_token t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt.next0_ tt = t t1 = tt if (len(internals_) == 0): name_.end_token = t1 continue if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): tt = tt.next0_.next0_ t = tt t1 = t npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None) if (npt1 is not None and npt1.end_char > tt.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue if ((isinstance(tt, NumberToken)) and tt.previous is not None): if (tt.previous.is_value("USB", None)): t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = tt.next0_ while ttt is not None: if (ttt.is_whitespace_before): break if (ttt.is_char_of(",:")): break tt = ttt t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 ttt = ttt.next0_ continue mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False) if (mt0 is not None): npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None) if (npt1 is not None and npt1.end_char > mt0.end_char): tt = npt1.end_token t = tt t1 = t if (len(internals_) == 0): name_.end_token = t1 continue break if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): www = NumbersWithUnitToken._try_parsewhl(tt.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t if (tt.next0_ is not None and tt.next0_.is_comma): tt = tt.next0_ t1 = tt if (tt.next0_ is not None and tt.next0_.is_char(')')): tt = tt.next0_ t1 = tt continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): t = uu[len(uu) - 1].end_token t1 = t units = uu if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): tt = t1.next0_ t = tt t1 = t continue elif (t1.next0_ is not None and t1.next0_.is_char('(')): uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False) if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): units2 = uu tt = uu[len(uu) - 1].end_token.next0_ t = tt t1 = t continue www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www tt = www.end_token t = tt t1 = t continue if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): break if (t1.next0_ is not None): if (t1.next0_.is_table_control_char or t1.is_newline_after): break units = (None) if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token t = tt t1 = t continue if (tt.is_value("НЕ", None) and tt.next0_ is not None): mc = tt.next0_.get_morph_class_in_dictionary() if (mc.is_adverb or mc.is_misc): break continue if (tt.is_value("ЯМЗ", None)): pass npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None) if (npt2 is None): if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO) if (to is not None): if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): pass else: break t1 = tt continue mc = tt.get_morph_class_in_dictionary() if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): uu = UnitToken.try_parse_list(tt, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break t = tt t1 = t if (len(internals_) == 0): name_.end_token = tt continue if (tt.is_comma): continue if (tt.is_char('.')): if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): continue uu = UnitToken.try_parse_list(tt.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 2 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t break break tt = npt2.end_token t = tt t1 = t if (len(internals_) > 0): pass elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): pass elif (t.chars.is_letter): name_.end_token = t1 t11 = t1 t1 = t1.next0_ first_pass3306 = True while True: if first_pass3306: first_pass3306 = False else: t1 = t1.next0_ if (not (t1 is not None)): break if (t1.is_table_control_char): pass elif (t1.is_char_of(":,_")): if (is_resctriction): return None www = NumbersWithUnitToken._try_parsewhl(t1.next0_) if (www is not None): whd = www t = www.end_token t1 = t continue uu = UnitToken.try_parse_list(t1.next0_, add_units, False) if (uu is not None): if (uu[0].length_char > 1 or len(uu) > 1): units = uu t = uu[len(uu) - 1].end_token t1 = t continue if (t1.is_char(':')): li = list() ttt = t1.next0_ first_pass3307 = True while True: if first_pass3307: first_pass3307 = False else: ttt = ttt.next0_ if (not (ttt is not None)): break if (ttt.is_hiphen or ttt.is_table_control_char): continue if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): continue mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True) if (mt1 is None): break li.append(mt1) ttt = mt1.end_token if (ttt.next0_ is not None and ttt.next0_.is_char(';')): ttt = ttt.next0_ if (ttt.is_char(';')): pass elif (ttt.is_newline_after and mt1.is_newline_before): pass else: break if (len(li) > 1): res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True) if (internals_ is not None and len(internals_) > 0): res0.internal_ex = internals_[0] nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) li[0].begin_token = t0 for v in li: v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip() if (v.nums is not None and len(v.nums.units) == 0 and units is not None): v.nums.units = units return res0 elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): pass elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): pass else: break if (t1 is None): return None mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction) if (mts is None): if (units is not None and len(units) > 0): if (t1 is None or t1.previous.is_char(':')): mts = list() if (t1 is None): t1 = t11 while t1 is not None and t1.next0_ is not None: pass t1 = t1.next0_ else: t1 = t1.previous mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan)) if (mts is None): return None mt = mts[0] if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): return None if (not is_subval and name_.begin_token.morph.class0_.is_preposition): name_.begin_token = name_.begin_token.next0_ if (mt.whl is not None): whd = mt.whl for kk in range(10): if (whd is not None and whd.end_token == name_.end_token): name_.end_token = whd.begin_token.previous continue if (units is not None): if (units[len(units) - 1].end_token == name_.end_token): name_.end_token = units[0].begin_token.previous continue break if (len(mts) > 1 and len(internals_) == 0): if (len(mt.units) == 0): if (units is not None): for m in mts: m.units = units res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True) res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE) k = 0 while k < len(mts): ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k]) if (whd is not None): nams = Utils.asObjectOrNull(whd.tag, list) if (k < len(nams)): ttt.name = nams[k] res1.internals.append(ttt) k += 1 tt1 = res1.end_token.next0_ if (tt1 is not None and tt1.is_char('±')): nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False) if (nn is not None and nn.plus_minus_percent): res1.end_token = nn.end_token res1.nums = nn if (len(nn.units) > 0 and units is None and len(mt.units) == 0): for m in mts: m.units = nn.units return res1 if (not mt.is_whitespace_before): if (mt.begin_token.previous is None): return None if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): pass elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): pass else: return None if (len(mt.units) == 0 and units is not None): mt.units = units if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): i = 1 while i < len(units): if (units[i].pow0_ == -1): j = i while j < len(units): mt.div_num.units.append(units[j]) units[j].pow0_ = (- units[j].pow0_) j += 1 del mt.units[i:i+len(units) - i] break i += 1 if ((minmax < 0) and mt.single_val is not None): mt.from_val = mt.single_val mt.from_include = True mt.single_val = (None) if (minmax > 0 and mt.single_val is not None): mt.to_val = mt.single_val mt.to_include = True mt.single_val = (None) if (len(mt.units) == 0): units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True) if (units is None): if (can_units_absent): pass else: return None else: mt.units = units res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_) if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): name_.begin_token = res.begin_token = name_.begin_token.previous.previous res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO)) res.nums = mt for u in res.nums.units: if (u.keyword is not None): if (u.keyword.begin_char >= res.begin_char): res.reliable = True res.__parse_internals(add_units) if (len(res.internals) > 0 or not can_be_set): return res t1 = res.end_token.next0_ if (t1 is not None and t1.is_comma_and): t1 = t1.next0_ mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False) if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): res.is_set = True res.nums = (None) res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt)) res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0])) res.end_token = mts1[0].end_token return res
def try_parse_list(t : 'Token', max_count : int=10) -> typing.List['WeaponItemToken']: tr = WeaponItemToken.try_parse(t, None, False, False) if (tr is None): return None if (tr.typ == WeaponItemToken.Typs.CLASS or tr.typ == WeaponItemToken.Typs.DATE): return None tr0 = tr res = list() if (len(tr.__inner_tokens) > 0): res.extend(tr.__inner_tokens) if (res[0].begin_char > tr.begin_char): res[0].begin_token = tr.begin_token res.append(tr) t = tr.end_token.next0_ if (tr.typ == WeaponItemToken.Typs.NOUN): while t is not None: if (t.is_char(':') or t.is_hiphen): pass else: break t = t.next0_ and_conj = False first_pass3425 = True while True: if first_pass3425: first_pass3425 = False else: t = t.next0_ if (not (t is not None)): break if (max_count > 0 and len(res) >= max_count): break if (t.is_char(':')): continue if (tr0.typ == WeaponItemToken.Typs.NOUN): if (t.is_hiphen and t.next0_ is not None): t = t.next0_ tr = WeaponItemToken.try_parse(t, tr0, False, False) if (tr is None): if (BracketHelper.can_be_end_of_sequence(t, True, None, False) and t.next0_ is not None): if (tr0.typ == WeaponItemToken.Typs.MODEL or tr0.typ == WeaponItemToken.Typs.BRAND): tt1 = t.next0_ if (tt1 is not None and tt1.is_comma): tt1 = tt1.next0_ tr = WeaponItemToken.try_parse(tt1, tr0, False, False) if (tr is None and (isinstance(t, ReferentToken))): rt = Utils.asObjectOrNull(t, ReferentToken) if (rt.begin_token == rt.end_token and (isinstance(rt.begin_token, TextToken))): tr = WeaponItemToken.try_parse(rt.begin_token, tr0, False, False) if (tr is not None and tr.begin_token == tr.end_token): tr.begin_token = tr.end_token = t if (tr is None and t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): tt = br.end_token.next0_ if (tt is not None and tt.is_comma): tt = tt.next0_ tr = WeaponItemToken.try_parse(tt, tr0, False, False) if (tr is not None and tr.typ == WeaponItemToken.Typs.NUMBER): pass else: tr = (None) if (tr is None and t.is_hiphen): if (tr0.typ == WeaponItemToken.Typs.BRAND or tr0.typ == WeaponItemToken.Typs.MODEL): tr = WeaponItemToken.try_parse(t.next0_, tr0, False, False) if (tr is None and t.is_comma): if ((tr0.typ == WeaponItemToken.Typs.NAME or tr0.typ == WeaponItemToken.Typs.BRAND or tr0.typ == WeaponItemToken.Typs.MODEL) or tr0.typ == WeaponItemToken.Typs.CLASS or tr0.typ == WeaponItemToken.Typs.DATE): tr = WeaponItemToken.try_parse(t.next0_, tr0, True, False) if (tr is not None): if (tr.typ == WeaponItemToken.Typs.NUMBER): pass else: tr = (None) if (tr is None): break if (t.is_newline_before): if (tr.typ != WeaponItemToken.Typs.NUMBER): break if (len(tr.__inner_tokens) > 0): res.extend(tr.__inner_tokens) res.append(tr) tr0 = tr t = tr.end_token if (and_conj): break i = 0 while i < (len(res) - 1): if (res[i].typ == WeaponItemToken.Typs.MODEL and res[i + 1].typ == WeaponItemToken.Typs.MODEL): res[i].end_token = res[i + 1].end_token res[i].value = "{0}{1}{2}".format(res[i].value, ('-' if res[i].end_token.next0_ is not None and res[i].end_token.next0_.is_hiphen else ' '), res[i + 1].value) del res[i + 1] i -= 1 i += 1 return res
def parse_variants(t0 : 'Token', t1 : 'Token', lev : int, max_count : int=0, regime : 'SentItemType'=SentItemType.UNDEFINED) -> typing.List['Sentence']: from pullenti.semantic.internal.SentItem import SentItem if ((t0 is None or t1 is None or t0.end_char > t1.end_char) or lev > 100): return None res = list() sent = Sentence() t = t0 first_pass3463 = True while True: if first_pass3463: first_pass3463 = False else: t = t.next0_ if (not (t is not None and t.end_char <= t1.end_char)): break if (t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token continue items_ = SentItem.parse_near_items(t, t1, lev + 1, sent.items) if (items_ is None or len(items_) == 0): continue if (len(items_) == 1 or ((max_count > 0 and len(res) > max_count))): sent.items.append(items_[0]) t = items_[0].end_token if (regime != SentItemType.UNDEFINED): it = items_[0] if (it.can_be_noun): pass elif (it.typ == SentItemType.DELIM): break elif (it.typ == SentItemType.VERB): if (regime == SentItemType.PARTBEFORE): break continue m_nexts = dict() for it in items_: nexts = None wrapnexts2942 = RefOutArgWrapper(None) inoutres2943 = Utils.tryGetValue(m_nexts, it.end_token.end_char, wrapnexts2942) nexts = wrapnexts2942.value if (not inoutres2943): nexts = Sentence.parse_variants(it.end_token.next0_, t1, lev + 1, max_count, SentItemType.UNDEFINED) m_nexts[it.end_token.end_char] = nexts if (nexts is None or len(nexts) == 0): se = Sentence() for itt in sent.items: itt1 = SentItem(None) itt1.copy_from(itt) se.items.append(itt1) itt0 = SentItem(None) itt0.copy_from(it) se.items.append(itt0) res.append(se) else: for sn in nexts: se = Sentence() for itt in sent.items: itt1 = SentItem(None) itt1.copy_from(itt) se.items.append(itt1) itt0 = SentItem(None) itt0.copy_from(it) se.items.append(itt0) for itt in sn.items: itt1 = SentItem(None) itt1.copy_from(itt) se.items.append(itt1) res.append(se) return res if (len(sent.items) == 0): return None res.append(sent) return res