def process(self, kit: 'AnalysisKit') -> None: """ Основная функция выделения телефонов Args: cnt: stage: """ ad = kit.getAnalyzerData(self) addunits = None if (kit.ontology is not None): addunits = TerminCollection() for r in kit.ontology.items: uu = Utils.asObjectOrNull(r.referent, UnitReferent) if (uu is None): continue if (uu._m_unit is not None): continue for s in uu.slots: if (s.type_name == UnitReferent.ATTR_NAME or s.type_name == UnitReferent.ATTR_FULLNAME): addunits.add( Termin._new117(Utils.asObjectOrNull(s.value, str), uu)) t = kit.first_token first_pass3040 = True while True: if first_pass3040: first_pass3040 = False else: t = t.next0_ if (not (t is not None)): break mt = MeasureToken.tryParseMinimal(t, addunits, False) if (mt is None): mt = MeasureToken.tryParse(t, addunits, True, False) if (mt is None): continue rts = mt.createRefenetsTokensWithRegister(ad, True) if (rts is None): continue i = 0 while i < len(rts): rt = rts[i] t.kit.embedToken(rt) t = (rt) j = i + 1 while j < len(rts): if (rts[j].begin_token == rt.begin_token): rts[j].begin_token = t if (rts[j].end_token == rt.end_token): rts[j].end_token = t j += 1 i += 1 if (kit.ontology is not None): for e0_ in ad.referents: u = Utils.asObjectOrNull(e0_, UnitReferent) if (u is None): continue for r in kit.ontology.items: uu = Utils.asObjectOrNull(r.referent, UnitReferent) if (uu is None): continue ok = False for s in uu.slots: if (s.type_name == UnitReferent.ATTR_NAME or s.type_name == UnitReferent.ATTR_FULLNAME): if (u.findSlot(None, s.value, True) is not None): ok = True break if (ok): u.ontology_items = list() u.ontology_items.append(r) break
def process(self, kit: 'AnalysisKit') -> None: ad = kit.getAnalyzerData(self) models = TerminCollection() objs_by_model = dict() obj_by_names = TerminCollection() t = kit.first_token first_pass3158 = True while True: if first_pass3158: first_pass3158 = False else: t = t.next0_ if (not (t is not None)): break its = WeaponItemToken.tryParseList(t, 10) if (its is None): continue rts = self.__tryAttach(its, False) if (rts is not None): for rt in rts: rt.referent = ad.registerReferent(rt.referent) kit.embedToken(rt) t = (rt) for s in rt.referent.slots: if (s.type_name == WeaponReferent.ATTR_MODEL): mod = str(s.value) for k in range(2): if (not str.isdigit(mod[0])): li = [] wrapli2638 = RefOutArgWrapper(None) inoutres2639 = Utils.tryGetValue( objs_by_model, mod, wrapli2638) li = wrapli2638.value if (not inoutres2639): li = list() objs_by_model[mod] = li if (not rt.referent in li): li.append(rt.referent) models.addStr(mod, li, None, False) if (k > 0): break brand = rt.referent.getStringValue( WeaponReferent.ATTR_BRAND) if (brand is None): break mod = "{0} {1}".format(brand, mod) elif (s.type_name == WeaponReferent.ATTR_NAME): obj_by_names.add( Termin._new117(str(s.value), rt.referent)) if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): return t = kit.first_token first_pass3159 = True while True: if first_pass3159: first_pass3159 = False else: t = t.next0_ if (not (t is not None)): break br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10) if (br is not None): toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO) if (toks is not None and toks.end_token.next0_ == br.end_token): rt0 = ReferentToken( Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token) kit.embedToken(rt0) t = (rt0) continue if (not ((isinstance(t, TextToken)))): continue if (not t.chars.is_letter): continue tok = models.tryParse(t, TerminParseAttr.NO) if (tok is None): if (not t.chars.is_all_lower): tok = obj_by_names.tryParse(t, TerminParseAttr.NO) if (tok is None): continue if (not tok.is_whitespace_after): if (tok.end_token.next0_ is None or not tok.end_token.next0_.isCharOf(",.)")): if (not BracketHelper.isBracket(tok.end_token.next0_, False)): continue tr = None li = Utils.asObjectOrNull(tok.termin.tag, list) if (li is not None and len(li) == 1): tr = li[0] else: tr = (Utils.asObjectOrNull(tok.termin.tag, Referent)) if (tr is not None): tit = WeaponItemToken.tryParse(tok.begin_token.previous, None, False, True) if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND): tr.addSlot(WeaponReferent.ATTR_BRAND, tit.value, False, 0) tok.begin_token = tit.begin_token rt0 = ReferentToken(tr, tok.begin_token, tok.end_token) kit.embedToken(rt0) t = (rt0) continue
def process(self, kit : 'AnalysisKit') -> None: ad = kit.get_analyzer_data(self) models = TerminCollection() objs_by_model = dict() obj_by_names = TerminCollection() t = kit.first_token first_pass3406 = True while True: if first_pass3406: first_pass3406 = False else: t = t.next0_ if (not (t is not None)): break its = TransItemToken.try_parse_list(t, 10) if (its is None): continue rts = self.__try_attach(its, False) if (rts is not None): for rt in rts: cou = 0 tt = t.previous first_pass3407 = True while True: if first_pass3407: first_pass3407 = False else: tt = tt.previous; cou += 1 if (not (tt is not None and (cou < 1000))): break tr = Utils.asObjectOrNull(tt.get_referent(), TransportReferent) if (tr is None): continue ok = True for s in rt.referent.slots: if (tr.find_slot(s.type_name, s.value, True) is None): ok = False break if (ok): rt.referent = (tr) break rt.referent = ad.register_referent(rt.referent) kit.embed_token(rt) t = (rt) for s in rt.referent.slots: if (s.type_name == TransportReferent.ATTR_MODEL): mod = str(s.value) for k in range(2): if (not str.isdigit(mod[0])): li = [ ] wrapli2702 = RefOutArgWrapper(None) inoutres2703 = Utils.tryGetValue(objs_by_model, mod, wrapli2702) li = wrapli2702.value if (not inoutres2703): li = list() objs_by_model[mod] = li if (not rt.referent in li): li.append(rt.referent) models.add_string(mod, li, None, False) if (k > 0): break brand = rt.referent.get_string_value(TransportReferent.ATTR_BRAND) if (brand is None): break mod = "{0} {1}".format(brand, mod) elif (s.type_name == TransportReferent.ATTR_NAME): obj_by_names.add(Termin._new100(str(s.value), rt.referent)) if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): return t = kit.first_token first_pass3408 = True while True: if first_pass3408: first_pass3408 = False else: t = t.next0_ if (not (t is not None)): break br = BracketHelper.try_parse(t, BracketParseAttr.NO, 10) if (br is not None): toks = obj_by_names.try_parse(t.next0_, TerminParseAttr.NO) if (toks is not None and toks.end_token.next0_ == br.end_token): rt0 = ReferentToken(Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token) kit.embed_token(rt0) t = (rt0) continue if (not (isinstance(t, TextToken))): continue if (not t.chars.is_letter): continue tok = models.try_parse(t, TerminParseAttr.NO) if (tok is None): if (not t.chars.is_all_lower): tok = obj_by_names.try_parse(t, TerminParseAttr.NO) if (tok is None): continue if (not tok.is_whitespace_after): if (tok.end_token.next0_ is None or not tok.end_token.next0_.is_char_of(",.)")): if (not BracketHelper.is_bracket(tok.end_token.next0_, False)): continue tr = None li = Utils.asObjectOrNull(tok.termin.tag, list) if (li is not None and len(li) == 1): tr = li[0] else: tr = (Utils.asObjectOrNull(tok.termin.tag, Referent)) if (tr is not None): tit = TransItemToken.try_parse(tok.begin_token.previous, None, False, True) if (tit is not None and tit.typ == TransItemToken.Typs.BRAND): tr.add_slot(TransportReferent.ATTR_BRAND, tit.value, False, 0) tok.begin_token = tit.begin_token rt0 = ReferentToken(tr, tok.begin_token, tok.end_token) kit.embed_token(rt0) t = (rt0) continue
class IntOntologyCollection: # Внутренний онтологический словарь. По сути, некоторая надстройка над TerminCollection. # Не помню уже, зачем был введён, но для чего-то нужен. class OntologyTermin(Termin): def __init__(self) -> None: super().__init__(None, None, False) self.owner = None; @staticmethod def _new489(_arg1 : 'IntOntologyItem', _arg2 : object) -> 'OntologyTermin': res = IntOntologyCollection.OntologyTermin() res.owner = _arg1 res.tag = _arg2 return res def __init__(self) -> None: self.is_ext_ontology = False self.__m_items = list() self.__m_termins = TerminCollection() @property def items(self) -> typing.List['IntOntologyItem']: return self.__m_items def add_item(self, di : 'IntOntologyItem') -> None: self.__m_items.append(di) di.owner = self i = 0 while i < len(di.termins): if (isinstance(di.termins[i], IntOntologyCollection.OntologyTermin)): di.termins[i].owner = di self.__m_termins.add(di.termins[i]) else: nt = IntOntologyCollection.OntologyTermin._new489(di, di.termins[i].tag) di.termins[i].copy_to(nt) self.__m_termins.add(nt) di.termins[i] = (nt) i += 1 def add_referent(self, referent : 'Referent') -> bool: if (referent is None): return False oi = None if (referent._int_ontology_item is not None and referent._int_ontology_item.owner == self): oi1 = referent.create_ontology_item() if (oi1 is None or len(oi1.termins) == len(referent._int_ontology_item.termins)): return True for t in referent._int_ontology_item.termins: self.__m_termins.remove(t) i = Utils.indexOfList(self.__m_items, referent._int_ontology_item, 0) if (i >= 0): del self.__m_items[i] oi = oi1 else: oi = referent.create_ontology_item() if (oi is None): return False oi.referent = referent referent._int_ontology_item = oi self.add_item(oi) return True def add_termin(self, di : 'IntOntologyItem', t : 'Termin') -> None: nt = IntOntologyCollection.OntologyTermin._new489(di, t.tag) t.copy_to(nt) self.__m_termins.add(nt) def add(self, t : 'Termin') -> None: self.__m_termins.add(t) def find_termin_by_canonic_text(self, text : str) -> typing.List['Termin']: return self.__m_termins.find_termins_by_canonic_text(text) def try_attach(self, t : 'Token', referent_type_name : str=None, can_be_geo_object : bool=False) -> typing.List['IntOntologyToken']: tts = self.__m_termins.try_parse_all(t, (TerminParseAttr.CANBEGEOOBJECT if can_be_geo_object else TerminParseAttr.NO)) if (tts is None): return None res = list() dis = list() for tt in tts: di = None if (isinstance(tt.termin, IntOntologyCollection.OntologyTermin)): di = tt.termin.owner if (di is not None): if (di.referent is not None and referent_type_name is not None): if (di.referent.type_name != referent_type_name): continue if (di in dis): continue dis.append(di) res.append(IntOntologyToken._new491(tt.begin_token, tt.end_token, di, tt.termin, tt.morph)) return (None if len(res) == 0 else res) def try_attach_by_item(self, item : 'IntOntologyItem') -> typing.List['IntOntologyItem']: if (item is None): return None res = None for t in item.termins: li = self.__m_termins.find_termins_by_termin(t) if (li is not None): for tt in li: if (isinstance(tt, IntOntologyCollection.OntologyTermin)): oi = tt.owner if (res is None): res = list() if (not oi in res): res.append(oi) return res def try_attach_by_referent(self, referent : 'Referent', item : 'IntOntologyItem'=None, must_be_single : bool=False) -> typing.List['Referent']: if (referent is None): return None if (item is None): item = referent.create_ontology_item() if (item is None): return None li = self.try_attach_by_item(item) if (li is None): return None res = None for oi in li: r = Utils.ifNotNull(oi.referent, Utils.asObjectOrNull(oi.tag, Referent)) if (r is not None): if (referent.can_be_equals(r, ReferentsEqualType.WITHINONETEXT)): if (res is None): res = list() if (not r in res): res.append(r) if (must_be_single): if (res is not None and len(res) > 1): i = 0 while i < (len(res) - 1): j = i + 1 while j < len(res): if (not res[i].can_be_equals(res[j], ReferentsEqualType.FORMERGING)): return None j += 1 i += 1 return res def remove(self, r : 'Referent') -> None: i = 0 i = 0 while i < len(self.__m_items): if (self.__m_items[i].referent == r): oi = self.__m_items[i] oi.referent = (None) r._int_ontology_item = (None) del self.__m_items[i] for t in oi.termins: self.__m_termins.remove(t) break i += 1 @staticmethod def _new2815(_arg1 : bool) -> 'IntOntologyCollection': res = IntOntologyCollection() res.is_ext_ontology = _arg1 return res
class IntOntologyCollection: """ Онтологический словарь """ class OntologyTermin(Termin): def __init__(self) -> None: super().__init__(None, None, False) self.owner = None; @staticmethod def _new546(_arg1 : 'IntOntologyItem', _arg2 : object) -> 'OntologyTermin': res = IntOntologyCollection.OntologyTermin() res.owner = _arg1 res.tag = _arg2 return res def __init__(self) -> None: self.is_ext_ontology = False self.__m_items = list() self.__m_termins = TerminCollection() @property def items(self) -> typing.List['IntOntologyItem']: """ Список элементов онтологии """ return self.__m_items def addItem(self, di : 'IntOntologyItem') -> None: """ Добавить элемент (внимание, после добавления нельзя менять термины у элемента) Args: di(IntOntologyItem): """ self.__m_items.append(di) di.owner = self i = 0 while i < len(di.termins): if (isinstance(di.termins[i], IntOntologyCollection.OntologyTermin)): (di.termins[i]).owner = di self.__m_termins.add(di.termins[i]) else: nt = IntOntologyCollection.OntologyTermin._new546(di, di.termins[i].tag) di.termins[i].copyTo(nt) self.__m_termins.add(nt) di.termins[i] = (nt) i += 1 def addReferent(self, referent : 'Referent') -> bool: """ Добавить в онтологию сущность Args: referent(Referent): """ if (referent is None): return False oi = None if (referent._int_ontology_item is not None and referent._int_ontology_item.owner == self): oi1 = referent.createOntologyItem() if (oi1 is None or len(oi1.termins) == len(referent._int_ontology_item.termins)): return True for t in referent._int_ontology_item.termins: self.__m_termins.remove(t) i = Utils.indexOfList(self.__m_items, referent._int_ontology_item, 0) if (i >= 0): del self.__m_items[i] oi = oi1 else: oi = referent.createOntologyItem() if (oi is None): return False oi.referent = referent referent._int_ontology_item = oi self.addItem(oi) return True def addTermin(self, di : 'IntOntologyItem', t : 'Termin') -> None: """ Добавить термин в существующий элемент Args: di(IntOntologyItem): t(Termin): """ nt = IntOntologyCollection.OntologyTermin._new546(di, t.tag) t.copyTo(nt) self.__m_termins.add(nt) def add(self, t : 'Termin') -> None: """ Добавить отдельный термин (после добавления нельзя изменять свойства термина) Args: t(Termin): """ self.__m_termins.add(t) def findTerminByCanonicText(self, text : str) -> typing.List['Termin']: return self.__m_termins.findTerminByCanonicText(text) def tryAttach(self, t : 'Token', referent_type_name : str=None, can_be_geo_object : bool=False) -> typing.List['IntOntologyToken']: """ Привязать с указанной позиции Args: t(Token): can_be_geo_object(bool): при True внутри может быть географический объект (Министерство РФ по делам ...) """ tts = self.__m_termins.tryParseAll(t, (TerminParseAttr.CANBEGEOOBJECT if can_be_geo_object else TerminParseAttr.NO)) if (tts is None): return None res = list() dis = list() for tt in tts: di = None if (isinstance(tt.termin, IntOntologyCollection.OntologyTermin)): di = (tt.termin).owner if (di is not None): if (di.referent is not None and referent_type_name is not None): if (di.referent.type_name != referent_type_name): continue if (di in dis): continue dis.append(di) res.append(IntOntologyToken._new548(tt.begin_token, tt.end_token, di, tt.termin, tt.morph)) return (None if len(res) == 0 else res) def tryAttachByItem(self, item : 'IntOntologyItem') -> typing.List['IntOntologyItem']: """ Найти похожие онтологические объекты Args: item(IntOntologyItem): """ if (item is None): return None res = None for t in item.termins: li = self.__m_termins.tryAttach(t) if (li is not None): for tt in li: if (isinstance(tt, IntOntologyCollection.OntologyTermin)): oi = (tt).owner if (res is None): res = list() if (not oi in res): res.append(oi) return res def tryAttachByReferent(self, referent : 'Referent', item : 'IntOntologyItem'=None, must_be_single : bool=False) -> typing.List['Referent']: """ Найти эквивалентные сущности через онтологические объекты Args: item(IntOntologyItem): referent(Referent): """ if (referent is None): return None if (item is None): item = referent.createOntologyItem() if (item is None): return None li = self.tryAttachByItem(item) if (li is None): return None res = None for oi in li: r = Utils.ifNotNull(oi.referent, (Utils.asObjectOrNull(oi.tag, Referent))) if (r is not None): if (referent.canBeEquals(r, Referent.EqualType.WITHINONETEXT)): if (res is None): res = list() if (not r in res): res.append(r) if (must_be_single): if (res is not None and len(res) > 1): i = 0 while i < (len(res) - 1): j = i + 1 while j < len(res): if (not res[i].canBeEquals(res[j], Referent.EqualType.FORMERGING)): return None j += 1 i += 1 return res def remove(self, r : 'Referent') -> None: """ Произвести привязку, если элемент найдётся, то установить ссылку на OntologyElement Args: referent: mergeSlots: Удалить всё, что связано с сущностью r(Referent): """ i = 0 while i < len(self.__m_items): if (self.__m_items[i].referent == r): oi = self.__m_items[i] oi.referent = (None) r._int_ontology_item = (None) del self.__m_items[i] for t in oi.termins: self.__m_termins.remove(t) break i += 1 @staticmethod def _new2649(_arg1 : bool) -> 'IntOntologyCollection': res = IntOntologyCollection() res.is_ext_ontology = _arg1 return res