Example #1
0
 def process(self, kit: 'AnalysisKit') -> None:
     """ Основная функция выделения телефонов
     
     Args:
         cnt: 
         stage: 
     
     """
     ad = kit.getAnalyzerData(self)
     addunits = None
     if (kit.ontology is not None):
         addunits = TerminCollection()
         for r in kit.ontology.items:
             uu = Utils.asObjectOrNull(r.referent, UnitReferent)
             if (uu is None):
                 continue
             if (uu._m_unit is not None):
                 continue
             for s in uu.slots:
                 if (s.type_name == UnitReferent.ATTR_NAME
                         or s.type_name == UnitReferent.ATTR_FULLNAME):
                     addunits.add(
                         Termin._new117(Utils.asObjectOrNull(s.value, str),
                                        uu))
     t = kit.first_token
     first_pass3040 = True
     while True:
         if first_pass3040: first_pass3040 = False
         else: t = t.next0_
         if (not (t is not None)): break
         mt = MeasureToken.tryParseMinimal(t, addunits, False)
         if (mt is None):
             mt = MeasureToken.tryParse(t, addunits, True, False)
         if (mt is None):
             continue
         rts = mt.createRefenetsTokensWithRegister(ad, True)
         if (rts is None):
             continue
         i = 0
         while i < len(rts):
             rt = rts[i]
             t.kit.embedToken(rt)
             t = (rt)
             j = i + 1
             while j < len(rts):
                 if (rts[j].begin_token == rt.begin_token):
                     rts[j].begin_token = t
                 if (rts[j].end_token == rt.end_token):
                     rts[j].end_token = t
                 j += 1
             i += 1
     if (kit.ontology is not None):
         for e0_ in ad.referents:
             u = Utils.asObjectOrNull(e0_, UnitReferent)
             if (u is None):
                 continue
             for r in kit.ontology.items:
                 uu = Utils.asObjectOrNull(r.referent, UnitReferent)
                 if (uu is None):
                     continue
                 ok = False
                 for s in uu.slots:
                     if (s.type_name == UnitReferent.ATTR_NAME
                             or s.type_name == UnitReferent.ATTR_FULLNAME):
                         if (u.findSlot(None, s.value, True) is not None):
                             ok = True
                             break
                 if (ok):
                     u.ontology_items = list()
                     u.ontology_items.append(r)
                     break
Example #2
0
 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.getAnalyzerData(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3158 = True
     while True:
         if first_pass3158: first_pass3158 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = WeaponItemToken.tryParseList(t, 10)
         if (its is None):
             continue
         rts = self.__tryAttach(its, False)
         if (rts is not None):
             for rt in rts:
                 rt.referent = ad.registerReferent(rt.referent)
                 kit.embedToken(rt)
                 t = (rt)
                 for s in rt.referent.slots:
                     if (s.type_name == WeaponReferent.ATTR_MODEL):
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])):
                                 li = []
                                 wrapli2638 = RefOutArgWrapper(None)
                                 inoutres2639 = Utils.tryGetValue(
                                     objs_by_model, mod, wrapli2638)
                                 li = wrapli2638.value
                                 if (not inoutres2639):
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li):
                                     li.append(rt.referent)
                                 models.addStr(mod, li, None, False)
                             if (k > 0):
                                 break
                             brand = rt.referent.getStringValue(
                                 WeaponReferent.ATTR_BRAND)
                             if (brand is None):
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == WeaponReferent.ATTR_NAME):
                         obj_by_names.add(
                             Termin._new117(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0):
         return
     t = kit.first_token
     first_pass3159 = True
     while True:
         if first_pass3159: first_pass3159 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10)
         if (br is not None):
             toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO)
             if (toks is not None
                     and toks.end_token.next0_ == br.end_token):
                 rt0 = ReferentToken(
                     Utils.asObjectOrNull(toks.termin.tag, Referent),
                     br.begin_token, br.end_token)
                 kit.embedToken(rt0)
                 t = (rt0)
                 continue
         if (not ((isinstance(t, TextToken)))):
             continue
         if (not t.chars.is_letter):
             continue
         tok = models.tryParse(t, TerminParseAttr.NO)
         if (tok is None):
             if (not t.chars.is_all_lower):
                 tok = obj_by_names.tryParse(t, TerminParseAttr.NO)
             if (tok is None):
                 continue
         if (not tok.is_whitespace_after):
             if (tok.end_token.next0_ is None
                     or not tok.end_token.next0_.isCharOf(",.)")):
                 if (not BracketHelper.isBracket(tok.end_token.next0_,
                                                 False)):
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1):
             tr = li[0]
         else:
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None):
             tit = WeaponItemToken.tryParse(tok.begin_token.previous, None,
                                            False, True)
             if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND):
                 tr.addSlot(WeaponReferent.ATTR_BRAND, tit.value, False, 0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embedToken(rt0)
             t = (rt0)
             continue
Example #3
0
 def process(self, kit : 'AnalysisKit') -> None:
     ad = kit.get_analyzer_data(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3406 = True
     while True:
         if first_pass3406: first_pass3406 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = TransItemToken.try_parse_list(t, 10)
         if (its is None): 
             continue
         rts = self.__try_attach(its, False)
         if (rts is not None): 
             for rt in rts: 
                 cou = 0
                 tt = t.previous
                 first_pass3407 = True
                 while True:
                     if first_pass3407: first_pass3407 = False
                     else: tt = tt.previous; cou += 1
                     if (not (tt is not None and (cou < 1000))): break
                     tr = Utils.asObjectOrNull(tt.get_referent(), TransportReferent)
                     if (tr is None): 
                         continue
                     ok = True
                     for s in rt.referent.slots: 
                         if (tr.find_slot(s.type_name, s.value, True) is None): 
                             ok = False
                             break
                     if (ok): 
                         rt.referent = (tr)
                         break
                 rt.referent = ad.register_referent(rt.referent)
                 kit.embed_token(rt)
                 t = (rt)
                 for s in rt.referent.slots: 
                     if (s.type_name == TransportReferent.ATTR_MODEL): 
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])): 
                                 li = [ ]
                                 wrapli2702 = RefOutArgWrapper(None)
                                 inoutres2703 = Utils.tryGetValue(objs_by_model, mod, wrapli2702)
                                 li = wrapli2702.value
                                 if (not inoutres2703): 
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li): 
                                     li.append(rt.referent)
                                 models.add_string(mod, li, None, False)
                             if (k > 0): 
                                 break
                             brand = rt.referent.get_string_value(TransportReferent.ATTR_BRAND)
                             if (brand is None): 
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == TransportReferent.ATTR_NAME): 
                         obj_by_names.add(Termin._new100(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0): 
         return
     t = kit.first_token
     first_pass3408 = True
     while True:
         if first_pass3408: first_pass3408 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 10)
         if (br is not None): 
             toks = obj_by_names.try_parse(t.next0_, TerminParseAttr.NO)
             if (toks is not None and toks.end_token.next0_ == br.end_token): 
                 rt0 = ReferentToken(Utils.asObjectOrNull(toks.termin.tag, Referent), br.begin_token, br.end_token)
                 kit.embed_token(rt0)
                 t = (rt0)
                 continue
         if (not (isinstance(t, TextToken))): 
             continue
         if (not t.chars.is_letter): 
             continue
         tok = models.try_parse(t, TerminParseAttr.NO)
         if (tok is None): 
             if (not t.chars.is_all_lower): 
                 tok = obj_by_names.try_parse(t, TerminParseAttr.NO)
             if (tok is None): 
                 continue
         if (not tok.is_whitespace_after): 
             if (tok.end_token.next0_ is None or not tok.end_token.next0_.is_char_of(",.)")): 
                 if (not BracketHelper.is_bracket(tok.end_token.next0_, False)): 
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1): 
             tr = li[0]
         else: 
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None): 
             tit = TransItemToken.try_parse(tok.begin_token.previous, None, False, True)
             if (tit is not None and tit.typ == TransItemToken.Typs.BRAND): 
                 tr.add_slot(TransportReferent.ATTR_BRAND, tit.value, False, 0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embed_token(rt0)
             t = (rt0)
             continue
Example #4
0
class IntOntologyCollection:
    # Внутренний онтологический словарь. По сути, некоторая надстройка над TerminCollection.
    # Не помню уже, зачем был введён, но для чего-то нужен.
    
    class OntologyTermin(Termin):
        
        def __init__(self) -> None:
            super().__init__(None, None, False)
            self.owner = None;
        
        @staticmethod
        def _new489(_arg1 : 'IntOntologyItem', _arg2 : object) -> 'OntologyTermin':
            res = IntOntologyCollection.OntologyTermin()
            res.owner = _arg1
            res.tag = _arg2
            return res
    
    def __init__(self) -> None:
        self.is_ext_ontology = False
        self.__m_items = list()
        self.__m_termins = TerminCollection()
    
    @property
    def items(self) -> typing.List['IntOntologyItem']:
        return self.__m_items
    
    def add_item(self, di : 'IntOntologyItem') -> None:
        self.__m_items.append(di)
        di.owner = self
        i = 0
        while i < len(di.termins): 
            if (isinstance(di.termins[i], IntOntologyCollection.OntologyTermin)): 
                di.termins[i].owner = di
                self.__m_termins.add(di.termins[i])
            else: 
                nt = IntOntologyCollection.OntologyTermin._new489(di, di.termins[i].tag)
                di.termins[i].copy_to(nt)
                self.__m_termins.add(nt)
                di.termins[i] = (nt)
            i += 1
    
    def add_referent(self, referent : 'Referent') -> bool:
        if (referent is None): 
            return False
        oi = None
        if (referent._int_ontology_item is not None and referent._int_ontology_item.owner == self): 
            oi1 = referent.create_ontology_item()
            if (oi1 is None or len(oi1.termins) == len(referent._int_ontology_item.termins)): 
                return True
            for t in referent._int_ontology_item.termins: 
                self.__m_termins.remove(t)
            i = Utils.indexOfList(self.__m_items, referent._int_ontology_item, 0)
            if (i >= 0): 
                del self.__m_items[i]
            oi = oi1
        else: 
            oi = referent.create_ontology_item()
        if (oi is None): 
            return False
        oi.referent = referent
        referent._int_ontology_item = oi
        self.add_item(oi)
        return True
    
    def add_termin(self, di : 'IntOntologyItem', t : 'Termin') -> None:
        nt = IntOntologyCollection.OntologyTermin._new489(di, t.tag)
        t.copy_to(nt)
        self.__m_termins.add(nt)
    
    def add(self, t : 'Termin') -> None:
        self.__m_termins.add(t)
    
    def find_termin_by_canonic_text(self, text : str) -> typing.List['Termin']:
        return self.__m_termins.find_termins_by_canonic_text(text)
    
    def try_attach(self, t : 'Token', referent_type_name : str=None, can_be_geo_object : bool=False) -> typing.List['IntOntologyToken']:
        tts = self.__m_termins.try_parse_all(t, (TerminParseAttr.CANBEGEOOBJECT if can_be_geo_object else TerminParseAttr.NO))
        if (tts is None): 
            return None
        res = list()
        dis = list()
        for tt in tts: 
            di = None
            if (isinstance(tt.termin, IntOntologyCollection.OntologyTermin)): 
                di = tt.termin.owner
            if (di is not None): 
                if (di.referent is not None and referent_type_name is not None): 
                    if (di.referent.type_name != referent_type_name): 
                        continue
                if (di in dis): 
                    continue
                dis.append(di)
            res.append(IntOntologyToken._new491(tt.begin_token, tt.end_token, di, tt.termin, tt.morph))
        return (None if len(res) == 0 else res)
    
    def try_attach_by_item(self, item : 'IntOntologyItem') -> typing.List['IntOntologyItem']:
        if (item is None): 
            return None
        res = None
        for t in item.termins: 
            li = self.__m_termins.find_termins_by_termin(t)
            if (li is not None): 
                for tt in li: 
                    if (isinstance(tt, IntOntologyCollection.OntologyTermin)): 
                        oi = tt.owner
                        if (res is None): 
                            res = list()
                        if (not oi in res): 
                            res.append(oi)
        return res
    
    def try_attach_by_referent(self, referent : 'Referent', item : 'IntOntologyItem'=None, must_be_single : bool=False) -> typing.List['Referent']:
        if (referent is None): 
            return None
        if (item is None): 
            item = referent.create_ontology_item()
        if (item is None): 
            return None
        li = self.try_attach_by_item(item)
        if (li is None): 
            return None
        res = None
        for oi in li: 
            r = Utils.ifNotNull(oi.referent, Utils.asObjectOrNull(oi.tag, Referent))
            if (r is not None): 
                if (referent.can_be_equals(r, ReferentsEqualType.WITHINONETEXT)): 
                    if (res is None): 
                        res = list()
                    if (not r in res): 
                        res.append(r)
        if (must_be_single): 
            if (res is not None and len(res) > 1): 
                i = 0
                while i < (len(res) - 1): 
                    j = i + 1
                    while j < len(res): 
                        if (not res[i].can_be_equals(res[j], ReferentsEqualType.FORMERGING)): 
                            return None
                        j += 1
                    i += 1
        return res
    
    def remove(self, r : 'Referent') -> None:
        i = 0
        i = 0
        while i < len(self.__m_items): 
            if (self.__m_items[i].referent == r): 
                oi = self.__m_items[i]
                oi.referent = (None)
                r._int_ontology_item = (None)
                del self.__m_items[i]
                for t in oi.termins: 
                    self.__m_termins.remove(t)
                break
            i += 1
    
    @staticmethod
    def _new2815(_arg1 : bool) -> 'IntOntologyCollection':
        res = IntOntologyCollection()
        res.is_ext_ontology = _arg1
        return res
class IntOntologyCollection:
    """ Онтологический словарь """
    
    class OntologyTermin(Termin):
        
        def __init__(self) -> None:
            super().__init__(None, None, False)
            self.owner = None;
        
        @staticmethod
        def _new546(_arg1 : 'IntOntologyItem', _arg2 : object) -> 'OntologyTermin':
            res = IntOntologyCollection.OntologyTermin()
            res.owner = _arg1
            res.tag = _arg2
            return res
    
    def __init__(self) -> None:
        self.is_ext_ontology = False
        self.__m_items = list()
        self.__m_termins = TerminCollection()
    
    @property
    def items(self) -> typing.List['IntOntologyItem']:
        """ Список элементов онтологии """
        return self.__m_items
    
    def addItem(self, di : 'IntOntologyItem') -> None:
        """ Добавить элемент (внимание, после добавления нельзя менять термины у элемента)
        
        Args:
            di(IntOntologyItem): 
        """
        self.__m_items.append(di)
        di.owner = self
        i = 0
        while i < len(di.termins): 
            if (isinstance(di.termins[i], IntOntologyCollection.OntologyTermin)): 
                (di.termins[i]).owner = di
                self.__m_termins.add(di.termins[i])
            else: 
                nt = IntOntologyCollection.OntologyTermin._new546(di, di.termins[i].tag)
                di.termins[i].copyTo(nt)
                self.__m_termins.add(nt)
                di.termins[i] = (nt)
            i += 1
    
    def addReferent(self, referent : 'Referent') -> bool:
        """ Добавить в онтологию сущность
        
        Args:
            referent(Referent): 
        
        """
        if (referent is None): 
            return False
        oi = None
        if (referent._int_ontology_item is not None and referent._int_ontology_item.owner == self): 
            oi1 = referent.createOntologyItem()
            if (oi1 is None or len(oi1.termins) == len(referent._int_ontology_item.termins)): 
                return True
            for t in referent._int_ontology_item.termins: 
                self.__m_termins.remove(t)
            i = Utils.indexOfList(self.__m_items, referent._int_ontology_item, 0)
            if (i >= 0): 
                del self.__m_items[i]
            oi = oi1
        else: 
            oi = referent.createOntologyItem()
        if (oi is None): 
            return False
        oi.referent = referent
        referent._int_ontology_item = oi
        self.addItem(oi)
        return True
    
    def addTermin(self, di : 'IntOntologyItem', t : 'Termin') -> None:
        """ Добавить термин в существующий элемент
        
        Args:
            di(IntOntologyItem): 
            t(Termin): 
        """
        nt = IntOntologyCollection.OntologyTermin._new546(di, t.tag)
        t.copyTo(nt)
        self.__m_termins.add(nt)
    
    def add(self, t : 'Termin') -> None:
        """ Добавить отдельный термин (после добавления нельзя изменять свойства термина)
        
        Args:
            t(Termin): 
        """
        self.__m_termins.add(t)
    
    def findTerminByCanonicText(self, text : str) -> typing.List['Termin']:
        return self.__m_termins.findTerminByCanonicText(text)
    
    def tryAttach(self, t : 'Token', referent_type_name : str=None, can_be_geo_object : bool=False) -> typing.List['IntOntologyToken']:
        """ Привязать с указанной позиции
        
        Args:
            t(Token): 
            can_be_geo_object(bool): при True внутри может быть географический объект (Министерство РФ по делам ...)
        
        """
        tts = self.__m_termins.tryParseAll(t, (TerminParseAttr.CANBEGEOOBJECT if can_be_geo_object else TerminParseAttr.NO))
        if (tts is None): 
            return None
        res = list()
        dis = list()
        for tt in tts: 
            di = None
            if (isinstance(tt.termin, IntOntologyCollection.OntologyTermin)): 
                di = (tt.termin).owner
            if (di is not None): 
                if (di.referent is not None and referent_type_name is not None): 
                    if (di.referent.type_name != referent_type_name): 
                        continue
                if (di in dis): 
                    continue
                dis.append(di)
            res.append(IntOntologyToken._new548(tt.begin_token, tt.end_token, di, tt.termin, tt.morph))
        return (None if len(res) == 0 else res)
    
    def tryAttachByItem(self, item : 'IntOntologyItem') -> typing.List['IntOntologyItem']:
        """ Найти похожие онтологические объекты
        
        Args:
            item(IntOntologyItem): 
        
        """
        if (item is None): 
            return None
        res = None
        for t in item.termins: 
            li = self.__m_termins.tryAttach(t)
            if (li is not None): 
                for tt in li: 
                    if (isinstance(tt, IntOntologyCollection.OntologyTermin)): 
                        oi = (tt).owner
                        if (res is None): 
                            res = list()
                        if (not oi in res): 
                            res.append(oi)
        return res
    
    def tryAttachByReferent(self, referent : 'Referent', item : 'IntOntologyItem'=None, must_be_single : bool=False) -> typing.List['Referent']:
        """ Найти эквивалентные сущности через онтологические объекты
        
        Args:
            item(IntOntologyItem): 
            referent(Referent): 
        
        """
        if (referent is None): 
            return None
        if (item is None): 
            item = referent.createOntologyItem()
        if (item is None): 
            return None
        li = self.tryAttachByItem(item)
        if (li is None): 
            return None
        res = None
        for oi in li: 
            r = Utils.ifNotNull(oi.referent, (Utils.asObjectOrNull(oi.tag, Referent)))
            if (r is not None): 
                if (referent.canBeEquals(r, Referent.EqualType.WITHINONETEXT)): 
                    if (res is None): 
                        res = list()
                    if (not r in res): 
                        res.append(r)
        if (must_be_single): 
            if (res is not None and len(res) > 1): 
                i = 0
                while i < (len(res) - 1): 
                    j = i + 1
                    while j < len(res): 
                        if (not res[i].canBeEquals(res[j], Referent.EqualType.FORMERGING)): 
                            return None
                        j += 1
                    i += 1
        return res
    
    def remove(self, r : 'Referent') -> None:
        """ Произвести привязку, если элемент найдётся, то установить ссылку на OntologyElement
        
        Args:
            referent: 
            mergeSlots: 
        
        Удалить всё, что связано с сущностью
            r(Referent): 
        """
        i = 0
        while i < len(self.__m_items): 
            if (self.__m_items[i].referent == r): 
                oi = self.__m_items[i]
                oi.referent = (None)
                r._int_ontology_item = (None)
                del self.__m_items[i]
                for t in oi.termins: 
                    self.__m_termins.remove(t)
                break
            i += 1
    
    @staticmethod
    def _new2649(_arg1 : bool) -> 'IntOntologyCollection':
        res = IntOntologyCollection()
        res.is_ext_ontology = _arg1
        return res