Beispiel #1
0
    def _normalization_delete_record(self):
        """
        Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован,
        у него должна быть одна история
        :return:
        """
        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

        sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history
WHERE domain_id NOT IN (SELECT id FROM domain)"""
        cursor.execute(sql)
        data = cursor.fetchall()

        count_deleted_domain = len(data)
        current_domain = 0
        count_not_update = 0
        count_update = 0

        if self.show_log:
            BColor.ok("All deleted domain is %s" % count_deleted_domain)

        for row in data:
            if current_domain % 10000 == 1:
                if self.show_log:
                    updated_percent = round(count_update /
                                            (current_domain / 100))
                    BColor.process(
                        "Current domain %s/%s (updated %s percent)" %
                        (current_domain, count_deleted_domain,
                         updated_percent))

                self.connection.commit()

            sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (
                row['domain_id'])
            cursor.execute(sql)
            domain_history = cursor.fetchone()

            sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (
                domain_history['domain_name'])
            cursor.execute(sql)
            domain = cursor.fetchone()

            if domain:
                sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (
                    domain['id'], row['domain_id'])
                cursor.execute(sql_update)
                count_update += 1
            else:
                count_not_update += 1

            current_domain += 1
Beispiel #2
0
    def download_data_for_current_date() -> str:
        """
        Скачивает все необходимы файлы для парсинга

        С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге
        http://phpsuxx.blogspot.com/2011/12/full-bgp.html
        http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html

        для остальных зоне можно посмотреть
        http://csa.ee/databases-zone-files/

        :rtype: unicode
        """
        now_date = datetime.date.today()
        delta = datetime.timedelta(days=1)
        now_date = now_date - delta

        files_list = [{
            'url': 'https://ru-tld.ru/files/RU_Domains_ru-tld.ru.gz',
            'file_name': 'ru_domains.gz'
        }, {
            'url': 'https://ru-tld.ru/files/SU_Domains_ru-tld.ru.gz',
            'file_name': 'su_domains.gz'
        }, {
            'url': 'https://ru-tld.ru/files/RF_Domains_ru-tld.ru.gz',
            'file_name': 'rf_domains.gz'
        }, {
            'url':
            'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' %
            (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")),
            'file_name':
            'rib.bz2'
        }]

        path = Downloader.create_data_dir()

        with concurrent.futures.ThreadPoolExecutor(
                max_workers=len(files_list)) as executor:
            future_to_download = {
                executor.submit(Downloader.download, path, item): item
                for item in files_list
            }
            for future in concurrent.futures.as_completed(future_to_download,
                                                          timeout=1800):
                item = future_to_download[future]
                file_name = item['file_name']
                url = item['url']
                array_data = future.result()
                BColor.ok("Download url %s to %s, size is %i" %
                          (url, file_name, array_data))

        return path
    def _normalization_delete_record(self):
        """
        Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован,
        у него должна быть одна история
        :return:
        """
        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

        if self.show_log:
            BColor.ok("Select deleted domain from domain_history")

        sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history
WHERE domain_id NOT IN (SELECT id FROM domain)"""
        cursor.execute(sql)
        data = cursor.fetchall()

        count_deleted_domain = len(data)
        current_domain = 0
        count_not_update = 0
        count_update = 0

        if self.show_log:
            BColor.ok("All deleted domain is %s" % count_deleted_domain)

        for row in data:
            if self.show_log:
                BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain))
                BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update))

            sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (row['domain_id'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain_history = cursor.fetchone()

            sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (domain_history['domain_name'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain = cursor.fetchone()

            if domain:
                if self.show_log:
                    BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'],
                                                                              row['domain_id'],
                                                                              domain['id']))

                sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (domain['id'],
                                                                                                row['domain_id'])
                cursor.execute(sql_update)
                count_update += 1
            else:
                count_not_update += 1

            current_domain += 1
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True, version='1.0')

        parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store")
        parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count")
        parser.add_argument('-u', '--update_statistic', help="Update statistic after update domain", action="count")
        parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store")
        parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store")
        args = parser.parse_args()

        if args.show_verbose:
            BColor.ok("Use verbose")

        if not args.dir:
            BColor.process("Download files")
            path = Downloader.download_data_for_current_date()
            BColor.ok("Path to work dir %s" % path)

            BColor.process("Unzip file")
            converter = Converter(path, delete_work_dir=(not args.show_verbose))

            BColor.process("Parsing rib file")
            converter.parce_file_rib_file_to()

            BColor.process("Get AS list")
            as_list_text = converter.convert_rib_to_net_as()
Beispiel #5
0
if __name__ == "__main__":
    try:
        if check_prog_run(PROGRAM_NAME):
            BColor.error("Program %s already running" % PROGRAM_NAME)
            sys.exit(1)

        parser = argparse.ArgumentParser(add_help=True, version='1.0')

        parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store")
        parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count")
        parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store")
        parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store")
        args = parser.parse_args()

        if args.show_verbose:
            BColor.ok("Use verbose")

        if not args.dir:
            BColor.process("Download files")
            path = Downloader.download_data_for_current_date()
            BColor.ok("Path to work dir %s" % path)

            BColor.process("Unzip file")
            converter = Converter(path, delete_work_dir=(not args.show_verbose))

            BColor.process("Parsing rib file")
            converter.parce_file_rib_file_to()

            BColor.process("Get AS list")
            as_list_text = converter.convert_rib_to_net_as()