def cleaner(self):
     private_nets = [
         '0.%', '127.%', '192.168.%', '10.%', '172.16.%', '172.17.%',
         '172.18.%', '172.19.%', '172.20.%', '172.21.%', '172.22.%',
         '172.23.%', '172.24.%', '172.25.%', '172.26.%', '172.27.%',
         '172.28.%', '172.29.%', '172.30.%', '172.31.%'
     ]
     logger.info('Dump cleaner run')
     # history = History.select(History.id).order_by(History.id.desc()).limit(self.cfg.DiffCount())
     # Item.delete().where(~(Item.purge << history)).execute()
     history_clear = History.select(History.id).order_by(
         History.id.desc()).offset(self.cfg.DiffCount())
     item_del = Item.delete().where(Item.purge << history_clear).execute()
     logger.info('Item deleted: %d', item_del)
     ip_del = IP.delete().where(IP.purge << history_clear).execute()
     logger.info('IP deleted: %d', ip_del)
     domain_del = Domain.delete().where(
         Domain.purge << history_clear).execute()
     logger.info('Domain deleted: %d', domain_del)
     url_del = URL.delete().where(URL.purge << history_clear).execute()
     logger.info('URL deleted: %d', url_del)
     history_rm = History.select(History.id).order_by(
         History.id.desc()).offset(self.cfg.HistoryCount())
     hist_del = History.delete().where(History.id << history_rm).execute()
     logger.info('History deleted: %d', hist_del)
     for net in private_nets:
         ip_count = IP.delete().where(IP.ip % net).execute()
         if ip_count:
             logger.info('IP error LIKE %s, count %d', net, ip_count)
    def check_diff(self):
        idx_list = [
            idx.id for idx in History.select(History.id).order_by(
                History.id.desc()).limit(self.cfg.DiffCount())
        ]
        ip_diff_add_sql = IP.select(fn.Count(fn.Distinct(
            IP.ip))).join(Item).where(IP.add == idx_list[0]).scalar()
        ip_diff_purge_sql = IP.select(fn.Count(fn.Distinct(
            IP.ip))).join(Item).where(IP.purge == idx_list[0]).scalar()
        domain_diff_add_sql = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\
            .join(Item).where(Domain.add == idx_list[0]).scalar()
        domain_diff_purge_sql = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\
            .join(Item).where(Domain.purge == idx_list[0]).scalar()
        url_diff_add_sql = URL.select(fn.Count(fn.Distinct(URL.url)))\
            .join(Item).where(URL.add == idx_list[0]).scalar()
        url_diff_purge_sql = URL.select(fn.Count(fn.Distinct(URL.url)))\
            .join(Item).where(URL.purge == idx_list[0]).scalar()

        if ip_diff_add_sql or ip_diff_purge_sql or domain_diff_add_sql or \
                domain_diff_purge_sql or url_diff_add_sql or url_diff_purge_sql:
            History.update(dump=True).where(
                History.id == idx_list[0]).execute()
            return True
        else:
            # History.update(dump=False).where(History.id == idx_list[0]).execute()
            return False
Beispiel #3
0
 def cleaner(self):
     logger.info('cleaner run')
     # history = History.select(History.id).order_by(History.id.desc()).limit(self.cfg.DiffCount())
     # Item.delete().where(~(Item.purge << history)).execute()
     history_del = History.select(History.id).order_by(History.id.desc()).offset(self.cfg.DiffCount())
     Item.delete().where(Item.purge << history_del).execute()
     IP.delete().where(IP.purge << history_del).execute()
     Domain.delete().where(Domain.purge << history_del).execute()
     URL.delete().where(URL.purge << history_del).execute()
 def _ip_rollback_sql(self, rollback, bt):
     rb_list = self.idx_list[:rollback]
     if bt == 'ignore':
         ip_sql = IP.select(fn.Distinct(IP.ip), IP.mask)\
             .where(~(IP.add << rb_list) & ((IP.purge >> None) | (IP.purge << rb_list)))
         return ip_sql
     elif bt == 'ip' or bt == 'default' or bt == 'domain' or bt == 'domain-mask':
         ip_sql = IP.select(fn.Distinct(IP.ip), IP.mask)\
             .join(Item).where((Item.blockType == bt) & ~(IP.add << rb_list) &
                               ((IP.purge >> None) | (IP.purge << rb_list)))
         return ip_sql
    def _ip_diff_sql(self, diff, bt, stat):
        if stat and (bt == 'ip' or bt == 'default' or bt == 'domain' or bt == 'domain-mask'):
            ip_sql = IP.select(fn.Distinct(IP.ip), IP.mask).join(Item)\
                     .where(Item.blockType == bt, IP.add == self.idx_list[diff])
            return ip_sql
        elif not stat and (bt == 'ip' or bt == 'default' or bt == 'domain' or bt == 'domain-mask'):
            ip_sql = IP.select(fn.Distinct(IP.ip), IP.mask).join(Item)\
                     .where(Item.blockType == bt, IP.purge == self.idx_list[diff])
            return ip_sql
        elif stat and bt == 'ignore':
            ip_sql = IP.select(fn.Distinct(IP.ip), IP.mask).where(IP.add == self.idx_list[diff])
            return ip_sql

        elif not stat and bt == 'ignore':
            ip_sql = IP.select(fn.Distinct(IP.ip), IP.mask).where(IP.purge == self.idx_list[diff])
            return ip_sql
Beispiel #6
0
    def check_diff(self):
        idx_list = [idx.id for idx in History.select(History.id).where(History.diff == True)
                    .order_by(History.id.desc()).limit(self.cfg.DiffCount())]
        ip_diff_add_sql = IP.select(fn.Count(fn.Distinct(IP.ip))).join(Item).where(IP.add == idx_list[0]).scalar()
        ip_diff_purge_sql = IP.select(fn.Count(fn.Distinct(IP.ip))).join(Item).where(IP.purge == idx_list[0]).scalar()
        domain_diff_add_sql = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\
            .join(Item).where(Domain.add == idx_list[0]).scalar()
        domain_diff_purge_sql = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\
            .join(Item).where(Domain.purge == idx_list[0]).scalar()
        url_diff_add_sql = URL.select(fn.Count(fn.Distinct(URL.url)))\
            .join(Item).where(URL.add == idx_list[0]).scalar()
        url_diff_purge_sql = URL.select(fn.Count(fn.Distinct(URL.url)))\
            .join(Item).where(URL.purge == idx_list[0]).scalar()

        if ip_diff_add_sql or ip_diff_purge_sql or domain_diff_add_sql or \
                domain_diff_purge_sql or url_diff_add_sql or url_diff_purge_sql:
            return True
        else:
            History.update(diff=False).where(History.id == idx_list[0]).execute()
            return False
    def statistics_show(self, diff=0, stdout=False):

        date_time = datetime.fromtimestamp(int(Dump.get(Dump.param == 'lastDumpDate')
                                               .value)).strftime('%Y-%m-%d %H:%M:%S')

        message = 'vigruzki.rkn.gov.ru update: ' + date_time + '\n'

        url_add_sql = self._url_diff_sql(diff, 'ignore', 1)
        message += '\nURLs added: \n\n'
        for url_add in url_add_sql:
            message += url_add.url + '\n'

        ip_add_sql = self._ip_diff_sql(diff, 'ignore', 1)
        message += '\nIPs added: \n\n'
        for ip_add in ip_add_sql:
            if ip_add.mask < 32:
                message += ip_add.ip + '/' + str(ip_add.mask)
            else:
                message += ip_add.ip + '\n'

        domain_add_sql = self._domain_diff_sql(diff, 'ignore', 1)
        message += '\nDOMAINs added: \n\n'
        for domain_add in domain_add_sql:
            message += domain_add.domain + '\n'

        url_del_sql = self._url_diff_sql(diff, 'ignore', 0)
        message += '\nURLs deleted: \n\n'
        for url_del in url_del_sql:
            message += url_del.url + '\n'

        ip_del_sql = self._ip_diff_sql(diff, 'ignore', 0)
        message += '\nIPs deleted: \n\n'
        for ip_del in ip_del_sql:
            if ip_del.mask < 32:
                message += ip_del.ip + '/' + str(ip_del.mask)
            else:
                message += ip_del.ip + '\n'

        domain_del_sql = self._domain_diff_sql(diff, 'ignore', 0)
        message += '\nDOMAINs deleted: \n\n'
        for domain_del in domain_del_sql:
            message += domain_del.domain + '\n'

        rb_list = self.idx_list[:diff]
        domain_count = Domain.select(fn.Count(fn.Distinct(Domain.domain)))\
            .where(~(Domain.add << rb_list) & ((Domain.purge >> None) | (Domain.purge << rb_list))).scalar()
        url_count = URL.select(fn.Count(fn.Distinct(URL.url)))\
            .where(~(URL.add << rb_list) & ((URL.purge >> None) | (URL.purge << rb_list))).scalar()
        ip_count = IP.select(fn.Count(fn.Distinct(IP.ip)))\
            .where(~(IP.add << rb_list) & ((IP.purge >> None) | (IP.purge << rb_list))).scalar()
        id_count = Item.select(fn.Count(fn.Distinct(Item.content_id)))\
            .where(~(Item.add << rb_list) & ((Item.purge >> None) | (Item.purge << rb_list))).scalar()

        message += '\nURLs count: ' + str(url_count) + '\n'
        message += 'IPs count: ' + str(ip_count) + '\n'
        message += 'DOMAINs count: ' + str(domain_count) + '\n'
        message += 'Item count: ' + str(id_count) + '\n'

        if stdout:
            print(message)
            return False
        else:
            return message
    def _ip_dedup_sql(self, diff, bt, stat):
        rb_list_add = self.idx_list[:diff+1]
        rb_list_purge = self.idx_list[:diff]
        if stat and bt == 'ignore':
            ip_diff_sql = IP.select(fn.Distinct(IP.ip)).where(IP.add == self.idx_list[diff])
            ip_dup_sql = IP.select(fn.Distinct(IP.ip))\
                .where(~(IP.add << rb_list_add) & ((IP.purge >> None) | (IP.purge << rb_list_add)) &
                       (IP.ip << ip_diff_sql))
            ip_dedup_sql = IP.select(fn.Distinct(IP.ip), IP.mask).where((IP.add == self.idx_list[diff]) &
                                                                        ~(IP.ip << ip_dup_sql))
            return ip_dedup_sql
        elif not stat and bt == 'ignore':
            ip_diff_sql = IP.select(fn.Distinct(IP.ip)).where(IP.purge == self.idx_list[diff])
            ip_dup_sql = IP.select(fn.Distinct(IP.ip))\
                .where(~(IP.add << rb_list_purge) & (IP.purge >> None) &
                       (IP.ip << ip_diff_sql))
            ip_dedup_sql = IP.select(fn.Distinct(IP.ip), IP.mask).where((IP.purge == self.idx_list[diff]) &
                                                                        ~(IP.ip << ip_dup_sql))
            return ip_dedup_sql
        elif stat and (bt == 'ip' or bt == 'default' or bt == 'domain' or bt == 'domain-mask'):
            ip_diff_sql = IP.select(fn.Distinct(IP.ip)).join(Item)\
                .where(Item.blockType == bt, IP.add == self.idx_list[diff])
            ip_dup_sql = IP.select(fn.Distinct(IP.ip)).join(Item)\
                .where((Item.blockType == bt) & ~(IP.add << rb_list_add) &
                       (IP.purge >> None) & (IP.ip << ip_diff_sql))

            ip_dedup_sql = IP.select(fn.Distinct(IP.ip), IP.mask).join(Item)\
                .where((Item.blockType == bt) & (IP.add == self.idx_list[diff]) &
                       ~(IP.ip << ip_dup_sql))
            return ip_dedup_sql
        elif not stat and (bt == 'ip' or bt == 'default' or bt == 'domain' or bt == 'domain-mask'):
            ip_diff_sql = IP.select(fn.Distinct(IP.ip)).join(Item)\
                .where(Item.blockType == bt, IP.purge == self.idx_list[diff])
            ip_dup_sql = IP.select(fn.Distinct(IP.ip)).join(Item)\
                .where((Item.blockType == bt) & ~(IP.add << rb_list_purge) &
                       (IP.purge >> None) & (IP.ip << ip_diff_sql))
            ip_dedup_sql = IP.select(fn.Distinct(IP.ip), IP.mask).join(Item)\
                .where((Item.blockType == bt) & (IP.purge == self.idx_list[diff]) &
                       ~(IP.ip << ip_dup_sql))
            return ip_dedup_sql
Beispiel #9
0
    def parse_dump(self):
        if not os.path.exists(self.path_py + '/dump.xml'):
            logger.info('dump.xml not found: s%', self.path_py + '/dump.xml')
            return 0
        logger.info('dump.xml already exists.')
        tree_xml = ElementTree().parse(self.path_py + '/dump.xml')

        dt = datetime.strptime(tree_xml.attrib['updateTime'][:19], '%Y-%m-%dT%H:%M:%S')
        update_time = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time).where(Dump.param == 'lastDumpDate').execute()
        logger.info('Got updateTime: %s.', update_time)

        dt = datetime.strptime(tree_xml.attrib['updateTimeUrgently'][:19], '%Y-%m-%dT%H:%M:%S')
        update_time_urgently = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time_urgently).where(Dump.param == 'lastDumpDateUrgently').execute()
        logger.info('Got updateTimeUrgently: %s.', update_time_urgently)

        list_xml = tree_xml.findall(".//*[@id]")
        id_set_dump = set()
        id_set_db = set()
        for content_xml in list_xml:
            # print(content_xml.tag, content_xml.attrib, content_xml.text)
            id_set_dump.add(int(content_xml.attrib['id']))

        select_content_id_db = Item.select(Item.content_id).where(Item.purge >> None)
        for content_db in select_content_id_db:
            id_set_db.add(content_db.content_id)

        common_id_set = id_set_dump.intersection(id_set_db)
        delete_id_set = id_set_db.difference(common_id_set)
        add_id_set = id_set_dump.difference(common_id_set)
        # print(delete_id_set)
        # print(add_id_set)

        if len(delete_id_set) > 0:
            with self.transact.atomic():
                for del_item in delete_id_set:
                    logger.info('Full delete Item, IP, Domain, URL id: %s.', del_item)

                    Item.update(purge=self.code_id).where(Item.content_id == del_item, Item.purge >> None).execute()
                    Domain.update(purge=self.code_id).where(Domain.content_id == del_item,
                                                            Domain.purge >> None).execute()
                    URL.update(purge=self.code_id).where(URL.content_id == del_item, URL.purge >> None).execute()
                    IP.update(purge=self.code_id).where(IP.content_id == del_item, IP.purge >> None).execute()

        if len(add_id_set) > 0:
            include_time = str()
            urgency_type = int()
            entry_type = int()
            block_type = str()
            hash_value = str()
            with self.transact.atomic():
                for new_item in add_id_set:
                    logger.info('New Item, IP, Domain, URL id: %s.', new_item)
                    new_item_xml = tree_xml.find(".//content[@id='" + str(new_item) + "']")
                    for data_xml in new_item_xml.iter():
                        if data_xml.tag == 'content':
                            content_id = int(data_xml.attrib['id'])
                            try:
                                urgency_type = int(data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            hash_value = data_xml.attrib['hash']
                        if data_xml.tag == 'decision':
                            decision_date = data_xml.attrib['date']
                            decision_number = data_xml.attrib['number']
                            decision_org = data_xml.attrib['org']
                            item_new = Item(content_id=content_id, includeTime=include_time,
                                            urgencyType=urgency_type, entryType=entry_type, blockType=block_type,
                                            hashRecord=hash_value, decision_date=decision_date,
                                            decision_num=decision_number, decision_org=decision_org,
                                            add=self.code_id)
                            item_new.save()
                        if data_xml.tag == 'url':
                            if not self.only_ascii(data_xml.text):
                                url_split = str(data_xml.text).split(':')
                                url = url_split[0] + ':' + urllib.parse.quote(url_split[1])
                            else:
                                url = data_xml.text
                            URL.create(item=item_new.id, content_id=content_id, url=url, add=self.code_id)
                        if data_xml.tag == 'domain':
                            if not self.only_ascii(data_xml.text):
                                domain = (str(data_xml.text).encode('idna')).decode()
                            else:
                                domain = data_xml.text
                            Domain.create(item=item_new.id, content_id=content_id, domain=domain, add=self.code_id)
                        if data_xml.tag == 'ip':
                            ip = data_xml.text
                            IP.create(item=item_new.id, content_id=content_id, ip=ip, add=self.code_id)
                        if data_xml.tag == 'ipSubnet':
                            net = data_xml.text.split('/')
                            ip = net[0]
                            mask = net[1]
                            IP.create(item=item_new.id, content_id=content_id, ip=ip, mask=mask, add=self.code_id)

        url_db_set = set()
        url_xml_set = set()
        ip_db_set = set()
        ip_xml_set = set()
        sub_ip_xml_set = set()
        sub_ip_db_set = set()
        domain_db_set = set()
        domain_xml_set = set()
        data_update = False
        with self.transact.atomic():
            for item_xml in list_xml:
                for data_xml in item_xml.iter():
                    # print(data_xml.tag, data_xml.attrib, data_xml.text)
                    if data_xml.tag == 'content':
                        content_id = int(data_xml.attrib['id'])
                        hash_value = data_xml.attrib['hash']
                        item_db = Item.get(Item.content_id == content_id, Item.purge >> None)

                        if hash_value != item_db.hashRecord:
                            logger.info('Hashes not equal, update hash id: %s', content_id)
                            try:
                                urgency_type = int(data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            item_db.hashRecord = hash_value
                            # Item.update(purge=None).where(Item.content_id == content_id).execute()
                            data_update = True
                        else:
                            data_update = False
                            break

                    if data_xml.tag == 'decision':
                        decision_date = data_xml.attrib['date']
                        decision_number = data_xml.attrib['number']
                        decision_org = data_xml.attrib['org']
                        # print(item_db)
                        if str(item_db.includeTime) != include_time:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML includeTime: %s.', include_time)
                            logger.info('DB includeTime: %s.', item_db.includeTime)
                            item_db.includeTime = include_time
                            # Item.update(includeTime=include_time).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.urgencyType != urgency_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML urgencyType: %s.', urgency_type)
                            logger.info('DB urgencyType: %s.', item_db.urgencyType)
                            item_db.urgencyType = urgency_type
                            # Item.update(urgencyType=urgency_type).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.blockType != block_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML blockType: %s.', block_type)
                            logger.info('DB blockType: %s.', item_db.blockType)
                            item_db.blockType = block_type
                            # Item.update(blockType=block_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if item_db.entryType != entry_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML entryType: %s.', entry_type)
                            logger.info('DB entryType: %s.', item_db.entryType)
                            item_db.entryType = entry_type
                            # Item.update(entryType=entry_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if str(item_db.decision_date) != decision_date:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML date: %s.', decision_date)
                            logger.info('DB date: %s.', str(item_db.decision_date))
                            item_db.decision_date = decision_date
                            # Item.update(decision_date=decision_date).where(Item.content_id == content_id,
                            #                                                Item.purge >> None).execute()
                        if item_db.decision_num != decision_number:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML number: %s.', decision_number)
                            logger.info('DB number: %s.', item_db.decision_num)
                            item_db.decision_num = decision_number
                            # Item.update(decision_num=decision_number).where(Item.content_id == content_id,
                            #                                                 Item.purge >> None).execute()
                        if item_db.decision_org != decision_org:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML org: %s.', decision_org)
                            logger.info('DB org: %s.', item_db.decision_org)
                            item_db.decision_org = decision_org
                            # Item.update(decision_org=decision_org).where(Item.content_id == content_id,
                            #                                              Item.purge >> None).execute()

                    if data_xml.tag == 'url':
                        if not self.only_ascii(data_xml.text):
                            url_split = str(data_xml.text).split(':')
                            url = url_split[0] + ':' + urllib.parse.quote(url_split[1])
                        else:
                            url = data_xml.text
                        url_xml_set.add(url)

                    if data_xml.tag == 'domain':
                        if not self.only_ascii(data_xml.text):
                            domain = (str(data_xml.text).encode('idna')).decode()
                        else:
                            domain = data_xml.text
                        domain_xml_set.add(domain)

                    if data_xml.tag == 'ip':
                        ip_xml_set.add(data_xml.text)

                    if data_xml.tag == 'ipSubnet':
                        sub_ip_xml_set.add(data_xml.text)

                if data_update:
                    url_db = URL.select().where(URL.item == item_db.id, URL.purge >> None)

                    for url_item in url_db:
                        url_db_set.add(url_item.url)
                    if url_db_set != url_xml_set:
                        common_url_set = url_xml_set.intersection(url_db_set)
                        delete_url_set = url_db_set.difference(common_url_set)
                        add_url_set = url_xml_set.difference(common_url_set)
                        if len(delete_url_set) > 0:
                            logger.info('Delete id %s URL: %s', content_id, delete_url_set)
                            for delete_url in delete_url_set:
                                URL.update(purge=self.code_id).where(URL.item == item_db.id, URL.url == delete_url,
                                                                     URL.purge >> None).execute()
                        if len(add_url_set) > 0:
                            logger.info('Add id %s URL: %s', content_id, add_url_set)
                            for add_url in add_url_set:
                                URL.create(item=item_db.id, content_id=item_db.content_id, url=add_url,
                                           add=self.code_id)
                    url_db_set.clear()
                    url_xml_set.clear()

                    domain_db = Domain.select().where(Domain.item == item_db.id, Domain.purge >> None)

                    for domain_item in domain_db:
                        domain_db_set.add(domain_item.domain)
                    if domain_db_set != domain_xml_set:
                        common_domain_set = domain_xml_set.intersection(domain_db_set)
                        delete_domain_set = domain_db_set.difference(common_domain_set)
                        add_domain_set = domain_xml_set.difference(common_domain_set)
                        if len(delete_domain_set) > 0:
                            logger.info('Delete id %s Domain: %s', content_id, delete_domain_set)
                            for delete_domain in delete_domain_set:
                                Domain.update(purge=self.code_id).where(Domain.item == item_db.id,
                                                                        Domain.domain == delete_domain,
                                                                        Domain.purge >> None).execute()
                        if len(add_domain_set) > 0:
                            logger.info('Add id %s Domain: %s', content_id, add_domain_set)
                            for add_domain in add_domain_set:
                                Domain.create(item=item_db.id, content_id=item_db.content_id, domain=add_domain,
                                              add=self.code_id)
                    domain_db_set.clear()
                    domain_xml_set.clear()

                    ip_db = IP.select().where(IP.item == item_db.id, IP.mask == 32, IP.purge >> None)

                    for ip_item in ip_db:
                        ip_db_set.add(ip_item.ip)
                    if ip_db_set != ip_xml_set:
                        common_ip_set = ip_xml_set.intersection(ip_db_set)
                        delete_ip_set = ip_db_set.difference(common_ip_set)
                        add_ip_set = ip_xml_set.difference(common_ip_set)
                        if len(delete_ip_set) > 0:
                            logger.info('Delete id %s ip: %s', content_id, delete_ip_set)
                            for delete_ip in delete_ip_set:
                                IP.update(purge=self.code_id).where(IP.item == item_db.id, IP.ip == delete_ip,
                                                                    IP.mask == 32, IP.purge >> None).execute()
                        if len(add_ip_set) > 0:
                            logger.info('Add id %s ip: %s', content_id, add_ip_set)
                            for add_ip in add_ip_set:
                                IP.create(item=item_db.id, content_id=item_db.content_id, ip=add_ip,
                                          add=self.code_id)
                    ip_db_set.clear()
                    ip_xml_set.clear()

                    sub_ip_db = IP.select().where(IP.item == item_db.id, IP.mask < 32, IP.purge >> None)

                    for sub_ip_item in sub_ip_db:
                        sub_ip_db_set.add(str(sub_ip_item.ip) + '/' + str(sub_ip_item.mask))
                    if sub_ip_db_set != sub_ip_xml_set:
                        common_sub_ip_set = sub_ip_xml_set.intersection(sub_ip_db_set)
                        delete_sub_ip_set = sub_ip_db_set.difference(common_sub_ip_set)
                        add_sub_ip_set = sub_ip_xml_set.difference(common_sub_ip_set)
                        if len(delete_sub_ip_set) > 0:
                            logger.info('Delete id %s subnet: %s', content_id, delete_sub_ip_set)
                            for delete_sub_ip in delete_sub_ip_set:
                                del_subnet = str(delete_sub_ip).split('/')
                                del_ip = del_subnet[0]
                                del_mask = del_subnet[1]
                                IP.update(purge=self.code_id).where(IP.item == item_db.id, IP.ip == del_ip,
                                                                    IP.mask == del_mask, IP.purge >> None).execute()
                        if len(add_sub_ip_set) > 0:
                            logger.info('Add id %s subnet: %s', content_id, add_sub_ip_set)
                            for add_sub_ip in add_sub_ip_set:
                                add_subnet = str(add_sub_ip).split('/')
                                add_ip = add_subnet[0]
                                add_mask = add_subnet[1]
                                IP.create(item=item_db.id, content_id=item_db.content_id, ip=add_ip, mask=add_mask,
                                          add=self.code_id)
                    item_db.save()
                    sub_ip_db_set.clear()
                    sub_ip_xml_set.clear()

        if self.check_diff():
            self.cleaner()
            return 1
        else:
            logger.info('no updates')
            self.cleaner()
            return 2
    def parse_dump(self):
        if not os.path.exists(self.path_py + '/dump.xml'):
            logger.info('dump.xml not found: s%', self.path_py + '/dump.xml')
            return 0
        logger.info('dump.xml already exists.')
        tree_xml = ElementTree().parse(self.path_py + '/dump.xml')

        dt = datetime.strptime(tree_xml.attrib['updateTime'][:19],
                               '%Y-%m-%dT%H:%M:%S')
        update_time = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time).where(
            Dump.param == 'lastDumpDate').execute()
        logger.info('Got updateTime: %s.', update_time)

        dt = datetime.strptime(tree_xml.attrib['updateTimeUrgently'][:19],
                               '%Y-%m-%dT%H:%M:%S')
        update_time_urgently = int(time.mktime(dt.timetuple()))
        Dump.update(value=update_time_urgently).where(
            Dump.param == 'lastDumpDateUrgently').execute()
        logger.info('Got updateTimeUrgently: %s.', update_time_urgently)

        list_xml = tree_xml.findall(".//*[@id]")
        id_set_dump = set()
        id_set_db = set()
        for content_xml in list_xml:
            # print(content_xml.tag, content_xml.attrib, content_xml.text)
            id_set_dump.add(int(content_xml.attrib['id']))

        select_content_id_db = Item.select(
            Item.content_id).where(Item.purge >> None)
        for content_db in select_content_id_db:
            id_set_db.add(content_db.content_id)

        common_id_set = id_set_dump.intersection(id_set_db)
        delete_id_set = id_set_db.difference(common_id_set)
        add_id_set = id_set_dump.difference(common_id_set)
        # print(delete_id_set)
        # print(add_id_set)

        if len(delete_id_set) > 0:
            with self.transact.atomic():
                for del_item in delete_id_set:
                    logger.info('Full delete Item, IP, Domain, URL id: %s.',
                                del_item)

                    Item.update(purge=self.code_id).where(
                        Item.content_id == del_item,
                        Item.purge >> None).execute()
                    Domain.update(purge=self.code_id).where(
                        Domain.content_id == del_item,
                        Domain.purge >> None).execute()
                    URL.update(purge=self.code_id).where(
                        URL.content_id == del_item,
                        URL.purge >> None).execute()
                    IP.update(purge=self.code_id).where(
                        IP.content_id == del_item, IP.purge >> None).execute()

        if len(add_id_set) > 0:
            include_time = str()
            urgency_type = int()
            entry_type = int()
            block_type = str()
            hash_value = str()
            with self.transact.atomic():
                for new_item in add_id_set:
                    logger.info('New Item, IP, Domain, URL id: %s.', new_item)
                    new_item_xml = tree_xml.find(".//content[@id='" +
                                                 str(new_item) + "']")
                    for data_xml in new_item_xml.iter():
                        if data_xml.tag == 'content':
                            content_id = int(data_xml.attrib['id'])
                            try:
                                urgency_type = int(
                                    data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(
                                data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            hash_value = data_xml.attrib['hash']
                        if data_xml.tag == 'decision':
                            decision_date = data_xml.attrib['date']
                            decision_number = data_xml.attrib['number']
                            decision_org = data_xml.attrib['org']
                            item_new = Item(content_id=content_id,
                                            includeTime=include_time,
                                            urgencyType=urgency_type,
                                            entryType=entry_type,
                                            blockType=block_type,
                                            hashRecord=hash_value,
                                            decision_date=decision_date,
                                            decision_num=decision_number,
                                            decision_org=decision_org,
                                            add=self.code_id)
                            item_new.save()
                        if data_xml.tag == 'url':
                            if not self.only_ascii(data_xml.text):
                                url_split = str(data_xml.text).split(':')
                                url = url_split[0] + ':' + urllib.parse.quote(
                                    url_split[1])
                            else:
                                url = data_xml.text
                            URL.create(item=item_new.id,
                                       content_id=content_id,
                                       url=url,
                                       add=self.code_id)
                        if data_xml.tag == 'domain':
                            if not self.only_ascii(data_xml.text):
                                domain = (str(
                                    data_xml.text).encode('idna')).decode()
                            else:
                                domain = data_xml.text
                            Domain.create(item=item_new.id,
                                          content_id=content_id,
                                          domain=domain,
                                          add=self.code_id)
                        if data_xml.tag == 'ip':
                            ip = data_xml.text
                            IP.create(item=item_new.id,
                                      content_id=content_id,
                                      ip=ip,
                                      add=self.code_id)
                        if data_xml.tag == 'ipSubnet':
                            net = data_xml.text.split('/')
                            ip = net[0]
                            mask = net[1]
                            IP.create(item=item_new.id,
                                      content_id=content_id,
                                      ip=ip,
                                      mask=mask,
                                      add=self.code_id)

        url_db_set = set()
        url_xml_set = set()
        ip_db_set = set()
        ip_xml_set = set()
        sub_ip_xml_set = set()
        sub_ip_db_set = set()
        domain_db_set = set()
        domain_xml_set = set()
        data_update = False
        with self.transact.atomic():
            for item_xml in list_xml:
                for data_xml in item_xml.iter():
                    # print(data_xml.tag, data_xml.attrib, data_xml.text)
                    if data_xml.tag == 'content':
                        content_id = int(data_xml.attrib['id'])
                        hash_value = data_xml.attrib['hash']
                        item_db = Item.get(Item.content_id == content_id,
                                           Item.purge >> None)

                        if hash_value != item_db.hashRecord:
                            logger.info('Hashes not equal, update hash id: %s',
                                        content_id)
                            try:
                                urgency_type = int(
                                    data_xml.attrib['urgencyType'])
                            except KeyError:
                                urgency_type = 0
                            include_time = self.date_time_xml_to_db(
                                data_xml.attrib['includeTime'])
                            try:
                                block_type = data_xml.attrib['blockType']
                            except KeyError:
                                block_type = 'default'
                            entry_type = int(data_xml.attrib['entryType'])
                            item_db.hashRecord = hash_value
                            # Item.update(purge=None).where(Item.content_id == content_id).execute()
                            data_update = True
                        else:
                            data_update = False
                            break

                    if data_xml.tag == 'decision':
                        decision_date = data_xml.attrib['date']
                        decision_number = data_xml.attrib['number']
                        decision_org = data_xml.attrib['org']
                        # print(item_db)
                        if str(item_db.includeTime) != include_time:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML includeTime: %s.', include_time)
                            logger.info('DB includeTime: %s.',
                                        item_db.includeTime)
                            item_db.includeTime = include_time
                            # Item.update(includeTime=include_time).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.urgencyType != urgency_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML urgencyType: %s.', urgency_type)
                            logger.info('DB urgencyType: %s.',
                                        item_db.urgencyType)
                            item_db.urgencyType = urgency_type
                            # Item.update(urgencyType=urgency_type).where(Item.content_id == content_id,
                            #                                             Item.purge >> None).execute()
                        if item_db.blockType != block_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML blockType: %s.', block_type)
                            logger.info('DB blockType: %s.', item_db.blockType)
                            item_db.blockType = block_type
                            # Item.update(blockType=block_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if item_db.entryType != entry_type:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML entryType: %s.', entry_type)
                            logger.info('DB entryType: %s.', item_db.entryType)
                            item_db.entryType = entry_type
                            # Item.update(entryType=entry_type).where(Item.content_id == content_id,
                            #                                         Item.purge >> None).execute()
                        if str(item_db.decision_date) != decision_date:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML date: %s.', decision_date)
                            logger.info('DB date: %s.',
                                        str(item_db.decision_date))
                            item_db.decision_date = decision_date
                            # Item.update(decision_date=decision_date).where(Item.content_id == content_id,
                            #                                                Item.purge >> None).execute()
                        if item_db.decision_num != decision_number:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML number: %s.', decision_number)
                            logger.info('DB number: %s.', item_db.decision_num)
                            item_db.decision_num = decision_number
                            # Item.update(decision_num=decision_number).where(Item.content_id == content_id,
                            #                                                 Item.purge >> None).execute()
                        if item_db.decision_org != decision_org:
                            logger.info('content_id: %s.', content_id)
                            logger.info('XML org: %s.', decision_org)
                            logger.info('DB org: %s.', item_db.decision_org)
                            item_db.decision_org = decision_org
                            # Item.update(decision_org=decision_org).where(Item.content_id == content_id,
                            #                                              Item.purge >> None).execute()

                    if data_xml.tag == 'url':
                        if not self.only_ascii(data_xml.text):
                            url_split = str(data_xml.text).split(':')
                            url = url_split[0] + ':' + urllib.parse.quote(
                                url_split[1])
                        else:
                            url = data_xml.text
                        url_xml_set.add(url)

                    if data_xml.tag == 'domain':
                        if not self.only_ascii(data_xml.text):
                            domain = (str(
                                data_xml.text).encode('idna')).decode()
                        else:
                            domain = data_xml.text
                        domain_xml_set.add(domain)

                    if data_xml.tag == 'ip':
                        ip_xml_set.add(data_xml.text)

                    if data_xml.tag == 'ipSubnet':
                        sub_ip_xml_set.add(data_xml.text)

                if data_update:
                    url_db = URL.select().where(URL.item == item_db.id,
                                                URL.purge >> None)

                    for url_item in url_db:
                        url_db_set.add(url_item.url)
                    if url_db_set != url_xml_set:
                        common_url_set = url_xml_set.intersection(url_db_set)
                        delete_url_set = url_db_set.difference(common_url_set)
                        add_url_set = url_xml_set.difference(common_url_set)
                        if len(delete_url_set) > 0:
                            logger.info('Delete id %s URL: %s', content_id,
                                        delete_url_set)
                            for delete_url in delete_url_set:
                                URL.update(purge=self.code_id).where(
                                    URL.item == item_db.id,
                                    URL.url == delete_url,
                                    URL.purge >> None).execute()
                        if len(add_url_set) > 0:
                            logger.info('Add id %s URL: %s', content_id,
                                        add_url_set)
                            for add_url in add_url_set:
                                URL.create(item=item_db.id,
                                           content_id=item_db.content_id,
                                           url=add_url,
                                           add=self.code_id)
                    url_db_set.clear()
                    url_xml_set.clear()

                    domain_db = Domain.select().where(
                        Domain.item == item_db.id, Domain.purge >> None)

                    for domain_item in domain_db:
                        domain_db_set.add(domain_item.domain)
                    if domain_db_set != domain_xml_set:
                        common_domain_set = domain_xml_set.intersection(
                            domain_db_set)
                        delete_domain_set = domain_db_set.difference(
                            common_domain_set)
                        add_domain_set = domain_xml_set.difference(
                            common_domain_set)
                        if len(delete_domain_set) > 0:
                            logger.info('Delete id %s Domain: %s', content_id,
                                        delete_domain_set)
                            for delete_domain in delete_domain_set:
                                Domain.update(purge=self.code_id).where(
                                    Domain.item == item_db.id,
                                    Domain.domain == delete_domain,
                                    Domain.purge >> None).execute()
                        if len(add_domain_set) > 0:
                            logger.info('Add id %s Domain: %s', content_id,
                                        add_domain_set)
                            for add_domain in add_domain_set:
                                Domain.create(item=item_db.id,
                                              content_id=item_db.content_id,
                                              domain=add_domain,
                                              add=self.code_id)
                    domain_db_set.clear()
                    domain_xml_set.clear()

                    ip_db = IP.select().where(IP.item == item_db.id,
                                              IP.mask == 32, IP.purge >> None)

                    for ip_item in ip_db:
                        ip_db_set.add(ip_item.ip)
                    if ip_db_set != ip_xml_set:
                        common_ip_set = ip_xml_set.intersection(ip_db_set)
                        delete_ip_set = ip_db_set.difference(common_ip_set)
                        add_ip_set = ip_xml_set.difference(common_ip_set)
                        if len(delete_ip_set) > 0:
                            logger.info('Delete id %s ip: %s', content_id,
                                        delete_ip_set)
                            for delete_ip in delete_ip_set:
                                IP.update(purge=self.code_id).where(
                                    IP.item == item_db.id, IP.ip == delete_ip,
                                    IP.mask == 32, IP.purge >> None).execute()
                        if len(add_ip_set) > 0:
                            logger.info('Add id %s ip: %s', content_id,
                                        add_ip_set)
                            for add_ip in add_ip_set:
                                IP.create(item=item_db.id,
                                          content_id=item_db.content_id,
                                          ip=add_ip,
                                          add=self.code_id)
                    ip_db_set.clear()
                    ip_xml_set.clear()

                    sub_ip_db = IP.select().where(IP.item == item_db.id,
                                                  IP.mask < 32,
                                                  IP.purge >> None)

                    for sub_ip_item in sub_ip_db:
                        sub_ip_db_set.add(
                            str(sub_ip_item.ip) + '/' + str(sub_ip_item.mask))
                    if sub_ip_db_set != sub_ip_xml_set:
                        common_sub_ip_set = sub_ip_xml_set.intersection(
                            sub_ip_db_set)
                        delete_sub_ip_set = sub_ip_db_set.difference(
                            common_sub_ip_set)
                        add_sub_ip_set = sub_ip_xml_set.difference(
                            common_sub_ip_set)
                        if len(delete_sub_ip_set) > 0:
                            logger.info('Delete id %s subnet: %s', content_id,
                                        delete_sub_ip_set)
                            for delete_sub_ip in delete_sub_ip_set:
                                del_subnet = str(delete_sub_ip).split('/')
                                del_ip = del_subnet[0]
                                del_mask = del_subnet[1]
                                IP.update(purge=self.code_id).where(
                                    IP.item == item_db.id, IP.ip == del_ip,
                                    IP.mask == del_mask,
                                    IP.purge >> None).execute()
                        if len(add_sub_ip_set) > 0:
                            logger.info('Add id %s subnet: %s', content_id,
                                        add_sub_ip_set)
                            for add_sub_ip in add_sub_ip_set:
                                add_subnet = str(add_sub_ip).split('/')
                                add_ip = add_subnet[0]
                                add_mask = add_subnet[1]
                                IP.create(item=item_db.id,
                                          content_id=item_db.content_id,
                                          ip=add_ip,
                                          mask=add_mask,
                                          add=self.code_id)
                    item_db.save()
                    sub_ip_db_set.clear()
                    sub_ip_xml_set.clear()

        if self.check_diff():
            self.cleaner()
            return 1
        else:
            logger.info('no updates')
            # print('no updates')
            return 2