def process(self, kit : 'AnalysisKit') -> None: # Основная функция выделения объектов ad = Utils.asObjectOrNull(kit.get_analyzer_data(self), AnalyzerDataWithOntology) for k in range(2): detect_new_denoms = False dt = datetime.datetime.now() t = kit.first_token first_pass3147 = True while True: if first_pass3147: first_pass3147 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_whitespace_before): pass elif (t.previous is not None and ((t.previous.is_char_of(",") or BracketHelper.can_be_start_of_sequence(t.previous, False, False)))): pass else: continue rt0 = self.__try_attach_spec(t) if (rt0 is not None): rt0.referent = ad.register_referent(rt0.referent) kit.embed_token(rt0) t = (rt0) continue if (not t.chars.is_letter): continue if (not self.__can_be_start_of_denom(t)): continue ot = None ot = ad.local_ontology.try_attach(t, None, False) if (ot is not None and (isinstance(ot[0].item.referent, DenominationReferent))): if (self.__check_attach(ot[0].begin_token, ot[0].end_token)): cl = Utils.asObjectOrNull(ot[0].item.referent.clone(), DenominationReferent) cl.occurrence.clear() rt = ReferentToken(cl, ot[0].begin_token, ot[0].end_token) kit.embed_token(rt) t = (rt) continue if (k > 0): continue if (t is not None and t.kit.ontology is not None): ot = t.kit.ontology.attach_token(DenominationReferent.OBJ_TYPENAME, t) if ((ot) is not None): if (self.__check_attach(ot[0].begin_token, ot[0].end_token)): dr = DenominationReferent() dr.merge_slots(ot[0].item.referent, True) rt = ReferentToken(ad.register_referent(dr), ot[0].begin_token, ot[0].end_token) kit.embed_token(rt) t = (rt) continue rt0 = self.try_attach(t, False) if (rt0 is not None): rt0.referent = ad.register_referent(rt0.referent) kit.embed_token(rt0) detect_new_denoms = True t = (rt0) if (len(ad.local_ontology.items) > 1000): break if (not detect_new_denoms): break
def __tryAttachSpec(self, t: 'Token') -> 'ReferentToken': """ Некоторые специфические случаи Args: t(Token): """ if (t is None): return None t0 = t nt = Utils.asObjectOrNull(t, NumberToken) if (nt is not None and nt.typ == NumberSpellingType.DIGIT and nt.value == "1"): if (t.next0_ is not None and t.next0_.is_hiphen): t = t.next0_ if ((isinstance(t.next0_, TextToken)) and not t.next0_.is_whitespace_before): if (t.next0_.isValue("C", None) or t.next0_.isValue("С", None)): dr = DenominationReferent() dr.addSlot(DenominationReferent.ATTR_VALUE, "1С", False, 0) dr.addSlot(DenominationReferent.ATTR_VALUE, "1C", False, 0) return ReferentToken(dr, t0, t.next0_) if (((nt is not None and nt.typ == NumberSpellingType.DIGIT and (isinstance(t.next0_, TextToken))) and not t.is_whitespace_after and not t.next0_.chars.is_all_lower) and t.next0_.chars.is_letter): dr = DenominationReferent() dr.addSlot(DenominationReferent.ATTR_VALUE, "{0}{1}".format(nt.getSourceText(), (t.next0_).term), False, 0) return ReferentToken(dr, t0, t.next0_) return None
def createRefenetsTokensWithRegister( self, ad: 'AnalyzerData', name: str, regist: bool = True) -> typing.List['ReferentToken']: res = list() for u in self.units: rt = ReferentToken(u.createReferentWithRegister(ad), u.begin_token, u.end_token) res.append(rt) mr = MeasureReferent() templ = "1" if (self.single_val is not None): mr.addValue(self.single_val) if (self.plus_minus is not None): templ = "[1 ±2{0}]".format( ("%" if self.plus_minus_percent else "")) mr.addValue(self.plus_minus) elif (self.about): templ = "~1" else: if (self.not0_ and ((self.from_val is None or self.to_val is None))): b = self.from_include self.from_include = self.to_include self.to_include = b v = self.from_val self.from_val = self.to_val self.to_val = v num = 1 if (self.from_val is not None): mr.addValue(self.from_val) templ = ("[1" if self.from_include else "]1") num += 1 else: templ = "]" if (self.to_val is not None): mr.addValue(self.to_val) templ = "{0} .. {1}{2}".format( templ, num, (']' if self.to_include else '[')) else: templ += " .. [" mr.template = templ for rt in res: mr.addSlot(MeasureReferent.ATTR_UNIT, rt.referent, False, 0) if (name is not None): mr.addSlot(MeasureReferent.ATTR_NAME, name, False, 0) if (self.div_num is not None): dn = self.div_num.createRefenetsTokensWithRegister(ad, None, True) res.extend(dn) mr.addSlot(MeasureReferent.ATTR_REF, dn[len(dn) - 1].referent, False, 0) ki = UnitToken.calcKind(self.units) if (ki != MeasureKind.UNDEFINED): mr.kind = ki if (regist and ad is not None): mr = (Utils.asObjectOrNull(ad.registerReferent(mr), MeasureReferent)) res.append(ReferentToken(mr, self.begin_token, self.end_token)) return res
def process(self, kit: 'AnalysisKit') -> None: ad = kit.get_analyzer_data(self) delta = 100000 parts = math.floor((((len(kit.sofa.text) + delta) - 1)) / delta) if (parts == 0): parts = 1 cur = 0 next_pos = 0 t = kit.first_token first_pass3182 = True while True: if first_pass3182: first_pass3182 = False else: t = t.next0_ if (not (t is not None)): break if (t.begin_char > next_pos): next_pos += delta cur += 1 if (not self._on_progress(cur, parts, kit)): break at = GoodAttrToken.try_parse(t, None, True, True) if (at is None): continue attr = at._create_attr() if (attr is None): t = at.end_token continue rt = ReferentToken(attr, at.begin_token, at.end_token) rt.referent = ad.register_referent(attr) kit.embed_token(rt) t = (rt)
def process_ontology_item(self, begin: 'Token') -> 'ReferentToken': if (not (isinstance(begin, TextToken))): return None ut = UnitToken.try_parse(begin, None, None, False) if (ut is not None): return ReferentToken(ut.create_referent_with_register(None), ut.begin_token, ut.end_token) u = UnitReferent() u.add_slot(UnitReferent.ATTR_NAME, begin.get_source_text(), False, 0) return ReferentToken(u, begin, begin)
def processOntologyItem(self, begin: 'Token') -> 'ReferentToken': if (not ((isinstance(begin, TextToken)))): return None ut = UnitToken.tryParse(begin, None, None, False) if (ut is not None): return ReferentToken(ut.createReferentWithRegister(None), ut.begin_token, ut.end_token) u = UnitReferent() u.addSlot(UnitReferent.ATTR_NAME, begin.getSourceText(), False, 0) return ReferentToken(u, begin, begin)
def __try_parse_short_inline(t: 'Token') -> 'ReferentToken': if (t is None): return None re = None if (t.is_char('[') and not t.is_newline_before): bb = BookLinkToken.try_parse(t, 0) if (bb is not None and bb.typ == BookLinkTyp.NUMBER): re = BookLinkRefReferent() re.number = bb.value return ReferentToken(re, t, bb.end_token) if (t.is_char('(')): bbb = BookLinkToken.try_parse(t.next0_, 0) if (bbb is None): return None if (bbb.typ == BookLinkTyp.SEE): tt = bbb.end_token.next0_ first_pass3024 = True while True: if first_pass3024: first_pass3024 = False else: tt = tt.next0_ if (not (tt is not None)): break if (tt.is_char_of(",:.")): continue if (tt.is_char('[')): if (((isinstance(tt.next0_, NumberToken)) and tt.next0_.next0_ is not None and tt.next0_.next0_.is_char(']')) and tt.next0_.next0_ is not None and tt.next0_.next0_.next0_.is_char(')')): re = BookLinkRefReferent() re.number = str(tt.next0_.value) return ReferentToken(re, t, tt.next0_.next0_.next0_) if ((isinstance(tt, NumberToken)) and tt.next0_ is not None and tt.next0_.is_char(')')): re = BookLinkRefReferent() re.number = str(tt.value) return ReferentToken(re, t, tt.next0_) break return None if (bbb.typ == BookLinkTyp.NUMBER): tt1 = bbb.end_token.next0_ if (tt1 is not None and tt1.is_comma): tt1 = tt1.next0_ bbb2 = BookLinkToken.try_parse(tt1, 0) if ((bbb2 is not None and bbb2.typ == BookLinkTyp.PAGERANGE and bbb2.end_token.next0_ is not None) and bbb2.end_token.next0_.is_char(')')): re = BookLinkRefReferent() re.number = bbb.value re.pages = bbb2.value return ReferentToken(re, t, bbb2.end_token.next0_) return None
def __analizeSubsidiary(self, bfi: 'BusinessFactItem') -> 'ReferentToken': t1 = bfi.end_token.next0_ if (t1 is None or not ((isinstance(t1.getReferent(), OrganizationReferent)))): return None org0 = None t = bfi.begin_token.previous first_pass2777 = True while True: if first_pass2777: first_pass2777 = False else: t = t.previous if (not (t is not None)): break if (t.isChar('(') or t.isChar('%')): continue if (t.morph.class0_.is_verb): continue if (isinstance(t, NumberToken)): continue org0 = (Utils.asObjectOrNull(t.getReferent(), OrganizationReferent)) if (org0 is not None): break if (org0 is None): return None bfr = BusinessFactReferent._new436(bfi.base_kind) bfr.who = org0 bfr.whom = t1.getReferent() return ReferentToken(bfr, t, t1)
def __try_attach_moscowao(li: typing.List['TerrItemToken'], ad: 'AnalyzerData') -> 'ReferentToken': if (li[0].termin_item is None or not li[0].termin_item.is_moscow_region): return None if (li[0].is_doubt): ok = False if (CityAttachHelper.check_city_after(li[0].end_token.next0_)): ok = True else: ali = AddressItemToken.try_parse_list(li[0].end_token.next0_, None, 2) if (ali is not None and len(ali) > 0 and ali[0].typ == AddressItemToken.ItemType.STREET): ok = True if (not ok): return None reg = GeoReferent() typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ" reg._add_typ(typ) name = li[0].termin_item.canonic_text if (LanguageHelper.ends_with(name, typ)): name = name[0:0 + len(name) - len(typ) - 1].strip() reg._add_name(name) return ReferentToken(reg, li[0].begin_token, li[0].end_token)
def processReferent1(self, begin : 'Token', end : 'Token') -> 'ReferentToken': wrapet2517 = RefOutArgWrapper(None) tpr = TitlePageAnalyzer._process(begin, (0 if end is None else end.end_char), begin.kit, wrapet2517) et = wrapet2517.value if (tpr is None): return None return ReferentToken(tpr, begin, et)
def __try_attach(self, pli : typing.List['PhoneItemToken'], ind : int, is_phone_before : bool, prev_phone : 'PhoneReferent') -> typing.List['ReferentToken']: rt = self.__try_attach_(pli, ind, is_phone_before, prev_phone, 0) if (rt is None): return None res = list() res.append(rt) for i in range(5): ph0 = Utils.asObjectOrNull(rt.referent, PhoneReferent) if (ph0.add_number is not None): return res alt = PhoneItemToken.try_attach_alternate(rt.end_token.next0_, ph0, pli) if (alt is None): break ph = PhoneReferent() for s in rt.referent.slots: ph.add_slot(s.type_name, s.value, False, 0) num = ph.number if (num is None or len(num) <= len(alt.value)): break ph.number = num[0:0+len(num) - len(alt.value)] + alt.value ph._m_template = ph0._m_template rt2 = ReferentToken(ph, alt.begin_token, alt.end_token) res.append(rt2) rt = rt2 add = PhoneItemToken.try_attach_additional(rt.end_token.next0_) if (add is not None): for rr in res: rr.referent.add_number = add.value res[len(res) - 1].end_token = add.end_token return res
def process_referent(self, begin: 'Token', end: 'Token') -> 'ReferentToken': pat = PersonAttrToken.try_attach( begin, None, PersonAttrToken.PersonAttrAttachAttrs.INPROCESS) if (pat is not None and pat.prop_ref is not None): return ReferentToken._new2595(pat.prop_ref, pat.begin_token, pat.end_token, pat.morph, pat) return None
def __add_referents(self, ad : 'AnalyzerData', t : 'Token', cur : int, max0_ : int) -> 'Token': if (not (isinstance(t, ReferentToken))): return t r = t.get_referent() if (r is None): return t if (isinstance(r, DenominationReferent)): dr = Utils.asObjectOrNull(r, DenominationReferent) kref0 = KeywordReferent._new1595(KeywordType.REFERENT) for s in dr.slots: if (s.type_name == DenominationReferent.ATTR_VALUE): kref0.add_slot(KeywordReferent.ATTR_NORMAL, s.value, False, 0) kref0.add_slot(KeywordReferent.ATTR_REF, dr, False, 0) rt0 = ReferentToken(ad.register_referent(kref0), t, t) t.kit.embed_token(rt0) return rt0 if ((isinstance(r, PhoneReferent)) or (isinstance(r, UriReferent)) or (isinstance(r, BankDataReferent))): return t if (isinstance(r, MoneyReferent)): mr = Utils.asObjectOrNull(r, MoneyReferent) kref0 = KeywordReferent._new1595(KeywordType.OBJECT) kref0.add_slot(KeywordReferent.ATTR_NORMAL, mr.currency, False, 0) rt0 = ReferentToken(ad.register_referent(kref0), t, t) t.kit.embed_token(rt0) return rt0 if (r.type_name == "DATE" or r.type_name == "DATERANGE" or r.type_name == "BOOKLINKREF"): return t tt = t.begin_token while tt is not None and tt.end_char <= t.end_char: if (isinstance(tt, ReferentToken)): self.__add_referents(ad, tt, cur, max0_) tt = tt.next0_ kref = KeywordReferent._new1595(KeywordType.REFERENT) norm = None if (r.type_name == "GEO"): norm = r.get_string_value("ALPHA2") if (norm is None): norm = r.to_string(True, None, 0) if (norm is not None): kref.add_slot(KeywordReferent.ATTR_NORMAL, norm.upper(), False, 0) kref.add_slot(KeywordReferent.ATTR_REF, t.get_referent(), False, 0) KeywordAnalyzer.__set_rank(kref, cur, max0_) rt1 = ReferentToken(ad.register_referent(kref), t, t) t.kit.embed_token(rt1) return rt1
def process_ontology_item(self, begin: 'Token') -> 'ReferentToken': if (begin is None): return None ga = GoodAttributeReferent() if (begin.chars.is_latin_letter): if (begin.is_value("KEYWORD", None)): ga.typ = GoodAttrType.KEYWORD begin = begin.next0_ elif (begin.is_value("CHARACTER", None)): ga.typ = GoodAttrType.CHARACTER begin = begin.next0_ elif (begin.is_value("PROPER", None)): ga.typ = GoodAttrType.PROPER begin = begin.next0_ elif (begin.is_value("MODEL", None)): ga.typ = GoodAttrType.MODEL begin = begin.next0_ if (begin is None): return None res = ReferentToken(ga, begin, begin) t = begin first_pass3181 = True while True: if first_pass3181: first_pass3181 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_char(';')): ga.add_slot( GoodAttributeReferent.ATTR_VALUE, MiscHelper.get_text_value(begin, t.previous, GetTextAttr.NO), False, 0) begin = t.next0_ continue res.end_token = t if (res.end_char > begin.begin_char): ga.add_slot( GoodAttributeReferent.ATTR_VALUE, MiscHelper.get_text_value(begin, res.end_token, GetTextAttr.NO), False, 0) if (ga.typ == GoodAttrType.UNDEFINED): if (not begin.chars.is_all_lower): ga.typ = GoodAttrType.PROPER return res
def __try4(li: typing.List['CityItemToken']) -> 'ReferentToken': if ((len(li) > 0 and li[0].typ == CityItemToken.ItemType.NOUN and ((li[0].value != "ГОРОД" and li[0].value != "МІСТО" and li[0].value != "CITY"))) and ((not li[0].doubtful or li[0].geo_object_before))): if (len(li) > 1 and li[1].org_ref is not None): geo_ = GeoReferent() geo_._addTyp(li[0].value) geo_._addOrgReferent(li[1].org_ref.referent) geo_.addExtReferent(li[1].org_ref) return ReferentToken(geo_, li[0].begin_token, li[1].end_token) else: aid = AddressItemToken.tryAttachOrg(li[0].end_token.next0_) if (aid is not None): geo_ = GeoReferent() geo_._addTyp(li[0].value) geo_._addOrgReferent(aid.referent) geo_.addExtReferent(aid.ref_token) return ReferentToken(geo_, li[0].begin_token, aid.end_token) return None
def try_attach_stateusaterritory(t: 'Token') -> 'ReferentToken': if (t is None or not t.chars.is_latin_letter): return None tok = TerrItemToken._m_geo_abbrs.try_parse(t, TerminParseAttr.NO) if (tok is None): return None g = Utils.asObjectOrNull(tok.termin.tag, GeoReferent) if (g is None): return None if (tok.end_token.next0_ is not None and tok.end_token.next0_.is_char('.')): tok.end_token = tok.end_token.next0_ gg = g.clone() gg.occurrence.clear() return ReferentToken(gg, tok.begin_token, tok.end_token)
def tryAttachToExist(t: 'Token', p1: 'InstrumentParticipant', p2: 'InstrumentParticipant') -> 'ReferentToken': if (t is None): return None if (t.begin_char >= 7674 and (t.begin_char < 7680)): pass pp = ParticipantToken.tryAttach(t, p1, p2, False) p = None rt = None if (pp is None or pp.kind != ParticipantToken.Kinds.PURE): pers = t.getReferent() if ((isinstance(pers, PersonReferent)) or (isinstance(pers, GeoReferent)) or (isinstance(pers, OrganizationReferent))): if (p1 is not None and p1._containsRef(pers)): p = p1 elif (p2 is not None and p2._containsRef(pers)): p = p2 if (p is not None): rt = ReferentToken(p, t, t) else: if (p1 is not None and ParticipantToken.__isTypesEqual(pp.typ, p1.typ)): p = p1 elif (p2 is not None and ParticipantToken.__isTypesEqual(pp.typ, p2.typ)): p = p2 if (p is not None): rt = ReferentToken(p, pp.begin_token, pp.end_token) if (rt.begin_token.previous is not None and rt.begin_token.previous.isValue("ОТ", None)): rt.begin_token = rt.begin_token.previous if (rt is None): return None if (rt.end_token.next0_ is not None and rt.end_token.next0_.isChar(':')): rt1 = ParticipantToken.tryAttachRequisites( rt.end_token.next0_.next0_, p, (p2 if p == p1 else p1), False) if (rt1 is not None): rt1.begin_token = rt.begin_token return rt1 rt.end_token = rt.end_token.next0_ while rt.end_token.next0_ is not None and (isinstance( rt.end_token.next0_.getReferent(), OrganizationReferent)): org0_ = Utils.asObjectOrNull(rt.end_token.next0_.getReferent(), OrganizationReferent) if (rt.referent.findSlot(None, org0_, True) is not None): rt.end_token = rt.end_token.next0_ continue break return rt
def __create_refs( its: typing.List['DateExItemToken'] ) -> typing.List['ReferentToken']: res = list() own = None i = 0 first_pass3074 = True while True: if first_pass3074: first_pass3074 = False else: i += 1 if (not (i < len(its))): break it = its[i] d = DateReferent() if (it.is_value_relate): d.is_relative = True if (own is not None): d.higher = own if (it.typ == DateExToken.DateExItemTokenType.DAY): d.day = it.value elif (it.typ == DateExToken.DateExItemTokenType.DAYOFWEEK): d.day_of_week = it.value elif (it.typ == DateExToken.DateExItemTokenType.HOUR): d.hour = it.value if (((i + 1) < len(its)) and its[i + 1].typ == DateExToken.DateExItemTokenType.MINUTE and not its[i + 1].is_value_relate): d.minute = its[i + 1].value i += 1 elif (it.typ == DateExToken.DateExItemTokenType.MINUTE): d.minute = it.value elif (it.typ == DateExToken.DateExItemTokenType.MONTH): d.month = it.value elif (it.typ == DateExToken.DateExItemTokenType.QUARTAL): d.quartal = it.value elif (it.typ == DateExToken.DateExItemTokenType.WEEK): d.week = it.value elif (it.typ == DateExToken.DateExItemTokenType.YEAR): d.year = it.value else: continue res.append(ReferentToken(d, it.begin_token, it.end_token)) own = d it.src = d if (len(res) > 0): res[0].tag = (own) return res
def __analizeGet2(self, t: 'Token') -> 'ReferentToken': if (t is None): return None tt = t.previous ts = t if (tt is not None and tt.is_comma): tt = tt.previous bef = self.__FindRefBefore(tt) master = None slave = None if (bef is not None and (isinstance(bef.referent, FundsReferent))): slave = bef.referent ts = bef.begin_token tt = t.next0_ if (tt is None): return None te = tt r = tt.getReferent() if ((isinstance(r, PersonReferent)) or (isinstance(r, OrganizationReferent))): master = r if (slave is None and tt.next0_ is not None): r = tt.next0_.getReferent() if ((r) is not None): if ((isinstance(r, FundsReferent)) or (isinstance(r, OrganizationReferent))): slave = (Utils.asObjectOrNull(r, FundsReferent)) te = tt.next0_ if (master is not None and slave is not None): bfr = BusinessFactReferent._new436(BusinessFactKind.HAVE) bfr.who = master if (isinstance(slave, OrganizationReferent)): bfr._addWhat(slave) bfr.typ = "владение компанией" elif (isinstance(slave, FundsReferent)): bfr._addWhat(slave) bfr.typ = "владение ценными бумагами" else: return None return ReferentToken(bfr, ts, te) return None
def __analizeFinance(self, bfi: 'BusinessFactItem') -> 'ReferentToken': bef = self.__FindRefBefore(bfi.begin_token.previous) if (bef is None): return None if (not ((isinstance(bef.referent, OrganizationReferent))) and not ((isinstance(bef.referent, PersonReferent)))): return None whom = None sum0_ = None funds = None t = bfi.end_token.next0_ while t is not None: if (t.is_newline_before or t.isChar('.')): break r = t.getReferent() if (isinstance(r, OrganizationReferent)): if (whom is None): whom = (Utils.asObjectOrNull(t, ReferentToken)) elif (isinstance(r, MoneyReferent)): if (sum0_ is None): sum0_ = (Utils.asObjectOrNull(r, MoneyReferent)) elif (isinstance(r, FundsReferent)): if (funds is None): funds = (Utils.asObjectOrNull(r, FundsReferent)) t = t.next0_ if (whom is None): return None bfr = BusinessFactReferent() if (funds is None): bfr.kind = BusinessFactKind.FINANCE else: bfr.kind = BusinessFactKind.GET bfr.typ = "покупка ценных бумаг" bfr.who = bef.referent bfr.whom = whom.referent if (funds is not None): bfr._addWhat(funds) if (sum0_ is not None): bfr.addSlot(BusinessFactReferent.ATTR_MISC, sum0_, False, 0) self.__findDate(bfr, bef.begin_token) return ReferentToken(bfr, bef.begin_token, whom.end_token)
def process(self, kit: 'AnalysisKit') -> None: ad = kit.get_analyzer_data(self) delta = 100000 parts = math.floor((((len(kit.sofa.text) + delta) - 1)) / delta) if (parts == 0): parts = 1 cur = 0 next_pos = 0 goods_ = list() t = kit.first_token first_pass3180 = True while True: if first_pass3180: first_pass3180 = False else: t = t.next0_ if (not (t is not None)): break if (not t.is_newline_before): continue if (t.begin_char > next_pos): next_pos += delta cur += 1 if (not self._on_progress(cur, parts, kit)): break if (not t.chars.is_letter and t.next0_ is not None): t = t.next0_ rts = GoodAttrToken.try_parse_list(t) if (rts is None or len(rts) == 0): continue good = GoodReferent() for rt in rts: rt.referent = ad.register_referent(rt.referent) if (good.find_slot(GoodReferent.ATTR_ATTR, rt.referent, True) is None): good.add_slot(GoodReferent.ATTR_ATTR, rt.referent, False, 0) kit.embed_token(rt) goods_.append(good) rt0 = ReferentToken(good, rts[0], rts[len(rts) - 1]) kit.embed_token(rt0) t = (rt0) for g in goods_: ad.referents.append(g)
def create_referents(et: 'DateExToken') -> typing.List['ReferentToken']: if (not et.is_diap or len(et.items_to) == 0): li = DateRelHelper.__create_refs(et.items_from) if (li is None or len(li) == 0): return None return li li_fr = DateRelHelper.__create_refs(et.items_from) li_to = DateRelHelper.__create_refs(et.items_to) ra = DateRangeReferent() if (len(li_fr) > 0): ra.date_from = Utils.asObjectOrNull(li_fr[0].tag, DateReferent) if (len(li_to) > 0): ra.date_to = Utils.asObjectOrNull(li_to[0].tag, DateReferent) res = list() res.extend(li_fr) res.extend(li_to) res.append(ReferentToken(ra, et.begin_token, et.end_token)) if (len(res) == 0): return None res[0].tag = (ra) return res
def tryAttachStateUSATerritory(t: 'Token') -> 'ReferentToken': """ Это привязка сокращений штатов Args: t(Token): """ if (t is None or not t.chars.is_latin_letter): return None tok = TerrItemToken._m_geo_abbrs.tryParse(t, TerminParseAttr.NO) if (tok is None): return None g = Utils.asObjectOrNull(tok.termin.tag, GeoReferent) if (g is None): return None if (tok.end_token.next0_ is not None and tok.end_token.next0_.isChar('.')): tok.end_token = tok.end_token.next0_ gg = g.clone() gg.occurrence.clear() return ReferentToken(gg, tok.begin_token, tok.end_token)
def __deserialize_token(stream : Stream, kit : 'AnalysisKit', vers : int) -> 'Token': from pullenti.ner.MetaToken import MetaToken from pullenti.ner.ReferentToken import ReferentToken typ = SerializerHelper.deserialize_short(stream) if (typ == (0)): return None t = None if (typ == (1)): t = (TextToken(None, kit)) elif (typ == (2)): t = (NumberToken(None, None, None, NumberSpellingType.DIGIT, kit)) elif (typ == (3)): t = (ReferentToken(None, None, None, kit)) else: t = (MetaToken(None, None, kit)) t._deserialize(stream, kit, vers) if (isinstance(t, MetaToken)): tt = SerializerHelper.deserialize_tokens(stream, kit, vers) if (tt is not None): t._m_begin_token = tt while tt is not None: t._m_end_token = tt tt = tt.next0_ return t
def __try_attach(toks: typing.List['NamedItemToken']) -> 'ReferentToken': typ = None re = None nams = None ki = NamedEntityKind.UNDEFINED i = 0 i = 0 while i < len(toks): if (toks[i].type_value is not None): if (nams is not None and toks[i].name_value is not None): break if (typ is None): typ = toks[i] ki = typ.kind elif (typ.kind != toks[i].kind): break if (toks[i].name_value is not None): if (typ is not None and toks[i].kind != NamedEntityKind.UNDEFINED and toks[i].kind != typ.kind): break if (nams is None): nams = list() elif (nams[0].is_wellknown != toks[i].is_wellknown): break if (ki == NamedEntityKind.UNDEFINED): ki = toks[i].kind nams.append(toks[i]) if (toks[i].type_value is None and toks[i].name_value is None): break if (re is None and NamedEntityAnalyzer.__can_be_ref(ki, toks[i].ref)): re = toks[i] i += 1 if ((i < len(toks)) and toks[i].ref is not None): if (NamedEntityAnalyzer.__can_be_ref(ki, toks[i].ref)): re = toks[i] i += 1 ok = False if (typ is not None): if (nams is None): if (re is None): ok = False else: ok = True elif ((nams[0].begin_char < typ.end_char) and not nams[0].is_wellknown): if (re is not None): ok = True elif ((nams[0].chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence( nams[0].begin_token) and typ.morph.number != MorphNumber.PLURAL) and typ.morph.case_.is_nominative): ok = True else: ok = True elif (nams is not None): if (len(nams) == 1 and nams[0].chars.is_all_lower): pass elif (nams[0].is_wellknown): ok = True if (not ok or ki == NamedEntityKind.UNDEFINED): return None nam = NamedEntityReferent._new1765(ki) if (typ is not None): nam.add_slot(NamedEntityReferent.ATTR_TYPE, typ.type_value.lower(), False, 0) if (nams is not None): if (len(nams) == 1 and nams[0].is_wellknown and nams[0].type_value is not None): nam.add_slot(NamedEntityReferent.ATTR_TYPE, nams[0].type_value.lower(), False, 0) if (typ is not None and (typ.end_char < nams[0].begin_char)): str0_ = MiscHelper.get_text_value( nams[0].begin_token, nams[len(nams) - 1].end_token, GetTextAttr.NO) nam.add_slot(NamedEntityReferent.ATTR_NAME, str0_, False, 0) tmp = io.StringIO() for n in nams: if (tmp.tell() > 0): print(' ', end="", file=tmp) print(n.name_value, end="", file=tmp) nam.add_slot(NamedEntityReferent.ATTR_NAME, Utils.toStringStringIO(tmp), False, 0) if (re is not None): nam.add_slot(NamedEntityReferent.ATTR_REF, re.ref, False, 0) return ReferentToken(nam, toks[0].begin_token, toks[i - 1].end_token)
def try_parse(t : 'Token') -> 'ReferentToken': if (t is None): return None if (not (isinstance(t, NumberToken)) and t.length_char != 1): return None nex = NumberHelper.try_parse_number_with_postfix(t) if (nex is None or nex.ex_typ != NumberExType.MONEY): if ((isinstance(t, NumberToken)) and (isinstance(t.next0_, TextToken)) and (isinstance(t.next0_.next0_, NumberToken))): if (t.next0_.is_hiphen or t.next0_.morph.class0_.is_preposition): res1 = NumberHelper.try_parse_number_with_postfix(t.next0_.next0_) if (res1 is not None and res1.ex_typ == NumberExType.MONEY): res0 = MoneyReferent() if ((t.next0_.is_hiphen and res1.real_value == 0 and res1.end_token.next0_ is not None) and res1.end_token.next0_.is_char('(')): nex2 = NumberHelper.try_parse_number_with_postfix(res1.end_token.next0_.next0_) if ((nex2 is not None and nex2.ex_typ_param == res1.ex_typ_param and nex2.end_token.next0_ is not None) and nex2.end_token.next0_.is_char(')')): if (nex2.value == t.value): res0.currency = nex2.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0) return ReferentToken(res0, t, nex2.end_token.next0_) if (isinstance(t.previous, NumberToken)): if (nex2.value == (((t.previous.real_value * (1000)) + t.value))): res0.currency = nex2.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0) return ReferentToken(res0, t.previous, nex2.end_token.next0_) elif (isinstance(t.previous.previous, NumberToken)): if (nex2.real_value == (((t.previous.previous.real_value * (1000000)) + (t.previous.real_value * (1000)) + t.real_value))): res0.currency = nex2.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, nex2.value, True, 0) return ReferentToken(res0, t.previous.previous, nex2.end_token.next0_) res0.currency = res1.ex_typ_param res0.add_slot(MoneyReferent.ATTR_VALUE, t.value, False, 0) return ReferentToken(res0, t, t) return None res = MoneyReferent() res.currency = nex.ex_typ_param val = nex.value if (val.find('.') > 0): val = val[0:0+val.find('.')] res.add_slot(MoneyReferent.ATTR_VALUE, val, True, 0) re = math.floor(round(((nex.real_value - res.value)) * (100), 6)) if (re != 0): res.add_slot(MoneyReferent.ATTR_REST, str(re), True, 0) if (nex.real_value != nex.alt_real_value): if (math.floor(res.value) != math.floor(nex.alt_real_value)): val = NumberHelper.double_to_string(nex.alt_real_value) if (val.find('.') > 0): val = val[0:0+val.find('.')] res.add_slot(MoneyReferent.ATTR_ALTVALUE, val, True, 0) re = (math.floor(round(((nex.alt_real_value - (math.floor(nex.alt_real_value)))) * (100), 6))) if (re != res.rest and re != 0): res.add_slot(MoneyReferent.ATTR_ALTREST, str(re), True, 0) if (nex.alt_rest_money > 0): res.add_slot(MoneyReferent.ATTR_ALTREST, str(nex.alt_rest_money), True, 0) t1 = nex.end_token if (t1.next0_ is not None and t1.next0_.is_char('(')): rt = MoneyAnalyzer.try_parse(t1.next0_.next0_) if ((rt is not None and rt.referent.can_be_equals(res, ReferentsEqualType.WITHINONETEXT) and rt.end_token.next0_ is not None) and rt.end_token.next0_.is_char(')')): t1 = rt.end_token.next0_ else: rt = MoneyAnalyzer.try_parse(t1.next0_) if (rt is not None and rt.referent.can_be_equals(res, ReferentsEqualType.WITHINONETEXT)): t1 = rt.end_token if (res.alt_value is not None and res.alt_value > res.value): if (t.whitespaces_before_count == 1 and (isinstance(t.previous, NumberToken))): delt = math.floor((res.alt_value - res.value)) if ((((res.value < 1000) and ((delt % 1000)) == 0)) or (((res.value < 1000000) and ((delt % 1000000)) == 0))): t = t.previous res.add_slot(MoneyReferent.ATTR_VALUE, res.get_string_value(MoneyReferent.ATTR_ALTVALUE), True, 0) res.add_slot(MoneyReferent.ATTR_ALTVALUE, None, True, 0) return ReferentToken(res, t, t1)
def _createReferentToken(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken': from pullenti.ner.person.internal.PersonIdentityToken import PersonIdentityToken if (p is None): return None has_prefix = False if (attrs is not None): for a in attrs: if (a.typ == PersonAttrTerminType.BESTREGARDS): has_prefix = True else: if (a.begin_char < begin.begin_char): begin = a.begin_token if (a.typ != PersonAttrTerminType.PREFIX): if (a.age is not None): p.addSlot(PersonReferent.ATTR_AGE, a.age, False, 0) if (a.prop_ref is None): p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0) else: p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) elif (a.gender == MorphGender.FEMINIE and not p.is_female): p.is_female = True elif (a.gender == MorphGender.MASCULINE and not p.is_male): p.is_male = True elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): if ((begin.previous).term == "ИП"): a = PersonAttrToken(begin.previous, begin.previous) a.prop_ref = PersonPropertyReferent() a.prop_ref.name = "индивидуальный предприниматель" p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) begin = begin.previous m0 = MorphCollection() for it in morph_.items: bi = MorphBaseInfo(it) bi.number = MorphNumber.SINGULAR if (bi.gender == MorphGender.UNDEFINED): if (p.is_male and not p.is_female): bi.gender = MorphGender.MASCULINE if (not p.is_male and p.is_female): bi.gender = MorphGender.FEMINIE m0.addItem(bi) morph_ = m0 if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): morph_.case_ = attrs[0].morph.case_ if (attrs[0].morph.number == MorphNumber.SINGULAR): morph_.number = MorphNumber.SINGULAR if (p.is_male and not p.is_female): morph_.gender = MorphGender.MASCULINE elif (p.is_female): morph_.gender = MorphGender.FEMINIE if (begin.previous is not None): ttt = begin.previous if (ttt.isValue("ИМЕНИ", "ІМЕНІ")): for_attribute = True else: if (ttt.isChar('.') and ttt.previous is not None): ttt = ttt.previous if (ttt.whitespaces_after_count < 3): if (ttt.isValue("ИМ", "ІМ")): for_attribute = True if (for_attribute): return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ) if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.getReferent(), PersonReferent))): rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken) ttt = rt00 while ttt is not None: if (ttt.previous is None or not ((isinstance(ttt.previous.previous, ReferentToken)))): break if (not ttt.previous.is_comma_and or not ((isinstance(ttt.previous.previous.getReferent(), PersonReferent)))): break rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken)) ttt = (rt00) if (isinstance(rt00.begin_token.getReferent(), PersonPropertyReferent)): ok = False if ((rt00.begin_token).end_token.next0_ is not None and (rt00.begin_token).end_token.next0_.isChar(':')): ok = True elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): ok = True if (ok): p.addSlot(PersonReferent.ATTR_ATTR, rt00.begin_token.getReferent(), False, 0) if (ad is not None): if (ad.overflow_level > 10): return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ) ad.overflow_level += 1 attrs1 = None has_position = False open_br = False t = end.next0_ first_pass3095 = True while True: if first_pass3095: first_pass3095 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_newline_before): if (t.newlines_before_count > 2): break if (attrs1 is not None and len(attrs1) > 0): break ml = MailLine.parse(t, 0) if (ml is not None and ml.typ == MailLine.Types.FROM): break if (t.chars.is_capital_upper): attr1 = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO) ok1 = False if (attr1 is not None): if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): ok1 = True else: tt2 = t.next0_ while tt2 is not None and tt2.end_char <= attr1.end_char: if (tt2.is_whitespace_before): ok1 = True tt2 = tt2.next0_ else: ttt = PersonHelper.__correctTailAttributes(p, t) if (ttt is not None and ttt != t): t = ttt end = t continue if (not ok1): break if (t.is_hiphen or t.isCharOf("_>|")): continue if (t.isValue("МОДЕЛЬ", None)): break tt = PersonHelper.__correctTailAttributes(p, t) if (tt != t and tt is not None): t = tt end = t continue is_be = False if (t.isChar('(') and t == end.next0_): open_br = True t = t.next0_ if (t is None): break pit1 = PersonItemToken.tryAttach(t, None, PersonItemToken.ParseAttr.NO, None) if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.isChar(')')): if (pit1.lastname is not None): inf = MorphBaseInfo._new2321(MorphCase.NOMINATIVE) if (p.is_male): inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender) if (p.is_female): inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender) sur = PersonIdentityToken.createLastname(pit1, inf) if (sur is not None): p._addFioIdentity(sur, None, None) t = pit1.end_token.next0_ end = t continue elif (t.is_comma): t = t.next0_ if ((isinstance(t, TextToken)) and (t).isValue("WHO", None)): continue elif ((isinstance(t, TextToken)) and (t).is_verb_be): t = t.next0_ elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): if (t == end.next0_): break t = t.next0_ elif (t.is_hiphen and t == end.next0_): t = t.next0_ elif (t.isChar('.') and t == end.next0_ and has_prefix): t = t.next0_ ttt2 = PersonHelper.createNickname(p, t) if (ttt2 is not None): end = ttt2 t = end continue if (t is None): break attr = None attr = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO) if (attr is None): if ((t is not None and t.getReferent() is not None and t.getReferent().type_name == "GEO") and attrs1 is not None and open_br): continue if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.isChar(')')): if (p.findSlot(PersonReferent.ATTR_LASTNAME, None, True) is None): p.addSlot(PersonReferent.ATTR_LASTNAME, t.getSourceText().upper(), False, 0) t = t.next0_ end = t if (t is not None and t.isValue("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): p.is_female = True p._correctData() elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): p.is_male = True p._correctData() break if (attr.morph.number == MorphNumber.PLURAL): break if (attr.typ == PersonAttrTerminType.BESTREGARDS): break if (attr.is_doubt): if (has_prefix): pass elif (t.is_newline_before and attr.is_newline_after): pass elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.isChar(':')))): pass else: break if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): break if (open_br): if (PersonAnalyzer._tryAttachPerson(t, ad, False, 0, True) is not None): break if (attrs1 is None): if (t.previous.is_comma and t.previous == end.next0_): ttt = attr.end_token.next0_ if (ttt is not None): if (ttt.morph.class0_.is_verb): if (MiscHelper.canBeStartOfSentence(begin)): pass else: break attrs1 = list() attrs1.append(attr) if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): if (not is_be): has_position = True elif (attr.typ != PersonAttrTerminType.PREFIX): if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): pass else: attrs1 = (None) break t = attr.end_token if (attrs1 is not None and has_position and attrs is not None): te1 = attrs[len(attrs) - 1].end_token.next0_ te2 = attrs1[0].begin_token if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): pass elif (attrs1[0].age is not None): pass elif (((te1.is_hiphen or te1.isChar(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): pass else: for a in attrs: if (a.typ == PersonAttrTerminType.POSITION): te = attrs1[len(attrs1) - 1].end_token if (te.next0_ is not None): if (not te.next0_.isChar('.')): attrs1 = (None) break if (attrs1 is not None and not has_prefix): attr = attrs1[len(attrs1) - 1] ok = False if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): ok = True else: rt = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False) if (rt is not None and (isinstance(rt.referent, PersonReferent))): ok = True if (ok): if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): attrs1 = (None) elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): rt1 = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False) if (rt1 is not None): attrs1 = (None) if (attrs1 is not None): for a in attrs1: if (a.typ != PersonAttrTerminType.PREFIX): if (a.age is not None): p.addSlot(PersonReferent.ATTR_AGE, a.age, True, 0) elif (a.prop_ref is None): p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0) else: p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0) end = a.end_token if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): if (a.gender == MorphGender.MASCULINE and not p.is_male): p.is_male = True p._correctData() elif (a.gender == MorphGender.FEMINIE and not p.is_female): p.is_female = True p._correctData() if (open_br): if (end.next0_ is not None and end.next0_.isChar(')')): end = end.next0_ crlf_cou = 0 t = end.next0_ first_pass3096 = True while True: if first_pass3096: first_pass3096 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_newline_before): ml = MailLine.parse(t, 0) if (ml is not None and ml.typ == MailLine.Types.FROM): break crlf_cou += 1 if (t.isCharOf(":,(") or t.is_hiphen): continue if (t.isChar('.') and t == end.next0_): continue r = t.getReferent() if (r is not None): if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): ty = r.getStringValue("SCHEME") if (r.type_name == "URI"): if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): break p._addContact(r) end = t crlf_cou = 0 continue if (isinstance(r, PersonIdentityReferent)): p.addSlot(PersonReferent.ATTR_IDDOC, r, False, 0) end = t crlf_cou = 0 continue if (r is not None and r.type_name == "ORGANIZATION"): if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): break if (begin.previous is not None and begin.previous.morph.class0_.is_verb): break if (t.whitespaces_after_count == 1): break exist = False for s in p.slots: if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent) if (pr.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): exist = True break elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): pr = Utils.asObjectOrNull(s.value, PersonAttrToken) if (pr.referent.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): exist = True break if (not exist): pat = PersonAttrToken(t, t) pat.prop_ref = PersonPropertyReferent._new2291("сотрудник") pat.prop_ref.addSlot(PersonPropertyReferent.ATTR_REF, r, False, 0) p.addSlot(PersonReferent.ATTR_ATTR, pat, False, 0) continue if (r is not None): break if (not has_prefix or crlf_cou >= 2): break rt = t.kit.processReferent("PERSON", t) if (rt is not None): break if (ad is not None): ad.overflow_level -= 1 return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
def process(self, kit: 'AnalysisKit') -> None: ad = kit.getAnalyzerData(self) models = TerminCollection() objs_by_model = dict() obj_by_names = TerminCollection() t = kit.first_token first_pass3158 = True while True: if first_pass3158: first_pass3158 = False else: t = t.next0_ if (not (t is not None)): break its = WeaponItemToken.tryParseList(t, 10) if (its is None): continue rts = self.__tryAttach(its, False) if (rts is not None): for rt in rts: rt.referent = ad.registerReferent(rt.referent) kit.embedToken(rt) t = (rt) for s in rt.referent.slots: if (s.type_name == WeaponReferent.ATTR_MODEL): mod = str(s.value) for k in range(2): if (not str.isdigit(mod[0])): li = [] wrapli2638 = RefOutArgWrapper(None) inoutres2639 = Utils.tryGetValue( objs_by_model, mod, wrapli2638) li = wrapli2638.value if (not inoutres2639): li = list() objs_by_model[mod] = li if (not rt.referent in li): li.append(rt.referent) models.addStr(mod, li, None, False) if (k > 0): break brand = rt.referent.getStringValue( WeaponReferent.ATTR_BRAND) if (brand is None): break mod = "{0} {1}".format(brand, mod) elif (s.type_name == WeaponReferent.ATTR_NAME): obj_by_names.add( Termin._new117(str(s.value), rt.referent)) if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): return t = kit.first_token first_pass3159 = True while True: if first_pass3159: first_pass3159 = False else: t = t.next0_ if (not (t is not None)): break br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10) if (br is not None): toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO) if (toks is not None and toks.end_token.next0_ == br.end_token): rt0 = ReferentToken( Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token) kit.embedToken(rt0) t = (rt0) continue if (not ((isinstance(t, TextToken)))): continue if (not t.chars.is_letter): continue tok = models.tryParse(t, TerminParseAttr.NO) if (tok is None): if (not t.chars.is_all_lower): tok = obj_by_names.tryParse(t, TerminParseAttr.NO) if (tok is None): continue if (not tok.is_whitespace_after): if (tok.end_token.next0_ is None or not tok.end_token.next0_.isCharOf(",.)")): if (not BracketHelper.isBracket(tok.end_token.next0_, False)): continue tr = None li = Utils.asObjectOrNull(tok.termin.tag, list) if (li is not None and len(li) == 1): tr = li[0] else: tr = (Utils.asObjectOrNull(tok.termin.tag, Referent)) if (tr is not None): tit = WeaponItemToken.tryParse(tok.begin_token.previous, None, False, True) if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND): tr.addSlot(WeaponReferent.ATTR_BRAND, tit.value, False, 0) tok.begin_token = tit.begin_token rt0 = ReferentToken(tr, tok.begin_token, tok.end_token) kit.embedToken(rt0) t = (rt0) continue
def __tryAttach(self, its: typing.List['WeaponItemToken'], attach: bool) -> typing.List['ReferentToken']: tr = WeaponReferent() t1 = None noun = None brand = None model = None i = 0 first_pass3160 = True while True: if first_pass3160: first_pass3160 = False else: i += 1 if (not (i < len(its))): break if (its[i].typ == WeaponItemToken.Typs.NOUN): if (len(its) == 1): return None if (tr.findSlot(WeaponReferent.ATTR_TYPE, None, True) is not None): if (tr.findSlot(WeaponReferent.ATTR_TYPE, its[i].value, True) is None): break if (not its[i].is_internal): noun = its[i] tr.addSlot(WeaponReferent.ATTR_TYPE, its[i].value, False, 0) if (its[i].alt_value is not None): tr.addSlot(WeaponReferent.ATTR_TYPE, its[i].alt_value, False, 0) t1 = its[i].end_token continue if (its[i].typ == WeaponItemToken.Typs.BRAND): if (tr.findSlot(WeaponReferent.ATTR_BRAND, None, True) is not None): if (tr.findSlot(WeaponReferent.ATTR_BRAND, its[i].value, True) is None): break if (not its[i].is_internal): if (noun is not None and noun.is_doubt): noun.is_doubt = False brand = its[i] tr.addSlot(WeaponReferent.ATTR_BRAND, its[i].value, False, 0) t1 = its[i].end_token continue if (its[i].typ == WeaponItemToken.Typs.MODEL): if (tr.findSlot(WeaponReferent.ATTR_MODEL, None, True) is not None): if (tr.findSlot(WeaponReferent.ATTR_MODEL, its[i].value, True) is None): break model = its[i] tr.addSlot(WeaponReferent.ATTR_MODEL, its[i].value, False, 0) if (its[i].alt_value is not None): tr.addSlot(WeaponReferent.ATTR_MODEL, its[i].alt_value, False, 0) t1 = its[i].end_token continue if (its[i].typ == WeaponItemToken.Typs.NAME): if (tr.findSlot(WeaponReferent.ATTR_NAME, None, True) is not None): break tr.addSlot(WeaponReferent.ATTR_NAME, its[i].value, False, 0) if (its[i].alt_value is not None): tr.addSlot(WeaponReferent.ATTR_NAME, its[i].alt_value, False, 0) t1 = its[i].end_token continue if (its[i].typ == WeaponItemToken.Typs.NUMBER): if (tr.findSlot(WeaponReferent.ATTR_NUMBER, None, True) is not None): break tr.addSlot(WeaponReferent.ATTR_NUMBER, its[i].value, False, 0) if (its[i].alt_value is not None): tr.addSlot(WeaponReferent.ATTR_NUMBER, its[i].alt_value, False, 0) t1 = its[i].end_token continue if (its[i].typ == WeaponItemToken.Typs.DATE): if (tr.findSlot(WeaponReferent.ATTR_DATE, None, True) is not None): break tr.addSlot(WeaponReferent.ATTR_DATE, its[i].ref, True, 0) t1 = its[i].end_token continue has_good_noun = (False if noun is None else not noun.is_doubt) prev = None if (noun is None): tt = its[0].begin_token.previous while tt is not None: prev = Utils.asObjectOrNull(tt.getReferent(), WeaponReferent) if ((prev) is not None): add_slots = list() for s in prev.slots: if (s.type_name == WeaponReferent.ATTR_TYPE): tr.addSlot(s.type_name, s.value, False, 0) elif (s.type_name == WeaponReferent.ATTR_BRAND or s.type_name == WeaponReferent.ATTR_BRAND or s.type_name == WeaponReferent.ATTR_MODEL): if (tr.findSlot(s.type_name, None, True) is None): add_slots.append(s) for s in add_slots: tr.addSlot(s.type_name, s.value, False, 0) has_good_noun = True break elif ((isinstance(tt, TextToken)) and ((not tt.chars.is_letter or tt.morph.class0_.is_conjunction))): pass else: break tt = tt.previous if (noun is None and model is not None): cou = 0 tt = its[0].begin_token.previous first_pass3161 = True while True: if first_pass3161: first_pass3161 = False else: tt = tt.previous cou += 1 if (not (tt is not None and (cou < 100))): break prev = Utils.asObjectOrNull(tt.getReferent(), WeaponReferent) if ((prev) is not None): if (prev.findSlot(WeaponReferent.ATTR_MODEL, model.value, True) is None): continue add_slots = list() for s in prev.slots: if (s.type_name == WeaponReferent.ATTR_TYPE): tr.addSlot(s.type_name, s.value, False, 0) elif (s.type_name == WeaponReferent.ATTR_BRAND or s.type_name == WeaponReferent.ATTR_BRAND): if (tr.findSlot(s.type_name, None, True) is None): add_slots.append(s) for s in add_slots: tr.addSlot(s.type_name, s.value, False, 0) has_good_noun = True break if (has_good_noun): pass elif (noun is not None): if (model is not None or ((brand is not None and not brand.is_doubt))): pass else: return None else: if (model is None): return None cou = 0 ok = False tt = t1.previous while tt is not None and (cou < 20): if ((tt.isValue("ОРУЖИЕ", None) or tt.isValue( "ВООРУЖЕНИЕ", None) or tt.isValue("ВЫСТРЕЛ", None)) or tt.isValue("ВЫСТРЕЛИТЬ", None)): ok = True break tt = tt.previous cou += 1 if (not ok): return None res = list() res.append(ReferentToken(tr, its[0].begin_token, t1)) return res
def try_attach(t: 'Token') -> 'ReferentToken': if (t is None or not t.chars.is_letter): return None noun = PersonIdToken.__try_parse(t, None) if (noun is None): return None li = list() t = noun.end_token.next0_ first_pass3371 = True while True: if first_pass3371: first_pass3371 = False else: t = t.next0_ if (not (t is not None)): break if (t.is_table_control_char): break if (t.is_char_of(",:")): continue idt = PersonIdToken.__try_parse( t, (li[len(li) - 1] if len(li) > 0 else noun)) if (idt is None): if (t.is_value("ОТДЕЛ", None) or t.is_value("ОТДЕЛЕНИЕ", None)): continue break if (idt.typ == PersonIdToken.Typs.KEYWORD): break li.append(idt) t = idt.end_token if (len(li) == 0): return None num = None i = 0 if (li[0].typ == PersonIdToken.Typs.NUMBER): if (len(li) > 1 and li[1].typ == PersonIdToken.Typs.NUMBER and li[1].has_prefix): num = (li[0].value + li[1].value) i = 2 else: num = li[0].value i = 1 elif (li[0].typ == PersonIdToken.Typs.SERIA and len(li) > 1 and li[1].typ == PersonIdToken.Typs.NUMBER): num = (li[0].value + li[1].value) i = 2 elif (li[0].typ == PersonIdToken.Typs.SERIA and len(li[0].value) > 5): num = li[0].value i = 1 else: return None pid = PersonIdentityReferent() pid.typ = noun.value.lower() pid.number = num if (isinstance(noun.referent, GeoReferent)): pid.state = noun.referent while i < len(li): if (li[i].typ == PersonIdToken.Typs.VIDAN or li[i].typ == PersonIdToken.Typs.CODE): pass elif (li[i].typ == PersonIdToken.Typs.DATE and li[i].referent is not None): if (pid.find_slot(PersonIdentityReferent.ATTR_DATE, None, True) is not None): break pid.add_slot(PersonIdentityReferent.ATTR_DATE, li[i].referent, False, 0) elif (li[i].typ == PersonIdToken.Typs.ADDRESS and li[i].referent is not None): if (pid.find_slot(PersonIdentityReferent.ATTR_ADDRESS, None, True) is not None): break pid.add_slot(PersonIdentityReferent.ATTR_ADDRESS, li[i].referent, False, 0) elif (li[i].typ == PersonIdToken.Typs.ORG and li[i].referent is not None): if (pid.find_slot(PersonIdentityReferent.ATTR_ORG, None, True) is not None): break pid.add_slot(PersonIdentityReferent.ATTR_ORG, li[i].referent, False, 0) else: break i += 1 return ReferentToken(pid, noun.begin_token, li[i - 1].end_token)