def _normalization_delete_record(self): """ Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован, у него должна быть одна история :return: """ cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history WHERE domain_id NOT IN (SELECT id FROM domain)""" cursor.execute(sql) data = cursor.fetchall() count_deleted_domain = len(data) current_domain = 0 count_not_update = 0 count_update = 0 if self.show_log: BColor.ok("All deleted domain is %s" % count_deleted_domain) for row in data: if current_domain % 10000 == 1: if self.show_log: updated_percent = round(count_update / (current_domain / 100)) BColor.process( "Current domain %s/%s (updated %s percent)" % (current_domain, count_deleted_domain, updated_percent)) self.connection.commit() sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % ( row['domain_id']) cursor.execute(sql) domain_history = cursor.fetchone() sql = "SELECT id FROM domain WHERE domain_name = '%s'" % ( domain_history['domain_name']) cursor.execute(sql) domain = cursor.fetchone() if domain: sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % ( domain['id'], row['domain_id']) cursor.execute(sql_update) count_update += 1 else: count_not_update += 1 current_domain += 1
def download_data_for_current_date() -> str: """ Скачивает все необходимы файлы для парсинга С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге http://phpsuxx.blogspot.com/2011/12/full-bgp.html http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html для остальных зоне можно посмотреть http://csa.ee/databases-zone-files/ :rtype: unicode """ now_date = datetime.date.today() delta = datetime.timedelta(days=1) now_date = now_date - delta files_list = [{ 'url': 'https://ru-tld.ru/files/RU_Domains_ru-tld.ru.gz', 'file_name': 'ru_domains.gz' }, { 'url': 'https://ru-tld.ru/files/SU_Domains_ru-tld.ru.gz', 'file_name': 'su_domains.gz' }, { 'url': 'https://ru-tld.ru/files/RF_Domains_ru-tld.ru.gz', 'file_name': 'rf_domains.gz' }, { 'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2' }] path = Downloader.create_data_dir() with concurrent.futures.ThreadPoolExecutor( max_workers=len(files_list)) as executor: future_to_download = { executor.submit(Downloader.download, path, item): item for item in files_list } for future in concurrent.futures.as_completed(future_to_download, timeout=1800): item = future_to_download[future] file_name = item['file_name'] url = item['url'] array_data = future.result() BColor.ok("Download url %s to %s, size is %i" % (url, file_name, array_data)) return path
def _normalization_delete_record(self): """ Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован, у него должна быть одна история :return: """ cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) if self.show_log: BColor.ok("Select deleted domain from domain_history") sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history WHERE domain_id NOT IN (SELECT id FROM domain)""" cursor.execute(sql) data = cursor.fetchall() count_deleted_domain = len(data) current_domain = 0 count_not_update = 0 count_update = 0 if self.show_log: BColor.ok("All deleted domain is %s" % count_deleted_domain) for row in data: if self.show_log: BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain)) BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update)) sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (row['domain_id']) BColor.warning(sql) cursor.execute(sql) domain_history = cursor.fetchone() sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (domain_history['domain_name']) BColor.warning(sql) cursor.execute(sql) domain = cursor.fetchone() if domain: if self.show_log: BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'], row['domain_id'], domain['id'])) sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (domain['id'], row['domain_id']) cursor.execute(sql_update) count_update += 1 else: count_not_update += 1 current_domain += 1
try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True, version='1.0') parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-u', '--update_statistic', help="Update statistic after update domain", action="count") parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store") parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store") args = parser.parse_args() if args.show_verbose: BColor.ok("Use verbose") if not args.dir: BColor.process("Download files") path = Downloader.download_data_for_current_date() BColor.ok("Path to work dir %s" % path) BColor.process("Unzip file") converter = Converter(path, delete_work_dir=(not args.show_verbose)) BColor.process("Parsing rib file") converter.parce_file_rib_file_to() BColor.process("Get AS list") as_list_text = converter.convert_rib_to_net_as()
if __name__ == "__main__": try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True, version='1.0') parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store") parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store") args = parser.parse_args() if args.show_verbose: BColor.ok("Use verbose") if not args.dir: BColor.process("Download files") path = Downloader.download_data_for_current_date() BColor.ok("Path to work dir %s" % path) BColor.process("Unzip file") converter = Converter(path, delete_work_dir=(not args.show_verbose)) BColor.process("Parsing rib file") converter.parce_file_rib_file_to() BColor.process("Get AS list") as_list_text = converter.convert_rib_to_net_as()