def download_data_for_current_date(): """ Скачивает все необходимы файлы для парсинга С R01 данные по локальным зонам https://partner.r01.ru/zones/ru_domains.gz https://partner.r01.ru/zones/su_domains.gz https://partner.r01.ru/zones/rf_domains.gz С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге http://phpsuxx.blogspot.com/2011/12/full-bgp.html http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html для остальных зоне можно посмотреть http://csa.ee/databases-zone-files/ :rtype: unicode """ now_date = datetime.date.today() delta = datetime.timedelta(days=1) now_date = now_date - delta files_list = [{ 'url': 'https://partner.r01.ru/zones/ru_domains.gz', 'file_name': 'ru_domains.gz' }, { 'url': 'https://partner.r01.ru/zones/su_domains.gz', 'file_name': 'su_domains.gz' }, { 'url': 'https://partner.r01.ru/zones/rf_domains.gz', 'file_name': 'rf_domains.gz' }, { 'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2' }] path = Downloader.create_data_dir() for item in files_list: path_file = os.path.abspath(os.path.join(path, item['file_name'])) BColor.process("Download %s to %s " % (item['url'], path_file)) shutil.rmtree(path_file, ignore_errors=True) Downloader.download_file(item['url'], path_file) if os.path.getsize(path_file) == 0: BColor.error("Can`t download file %s to %s" % (item['url'], path_file)) raise Exception("Can`t download file %s to %s" % (item['url'], path_file)) return path
def download_data_for_current_date() -> str: """ Скачивает все необходимы файлы для парсинга С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге http://phpsuxx.blogspot.com/2011/12/full-bgp.html http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html для остальных зоне можно посмотреть http://csa.ee/databases-zone-files/ :rtype: unicode """ now_date = datetime.date.today() delta = datetime.timedelta(days=1) now_date = now_date - delta files_list = [{ 'url': 'https://ru-tld.ru/files/RU_Domains_ru-tld.ru.gz', 'file_name': 'ru_domains.gz' }, { 'url': 'https://ru-tld.ru/files/SU_Domains_ru-tld.ru.gz', 'file_name': 'su_domains.gz' }, { 'url': 'https://ru-tld.ru/files/RF_Domains_ru-tld.ru.gz', 'file_name': 'rf_domains.gz' }, { 'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2' }] path = Downloader.create_data_dir() with concurrent.futures.ThreadPoolExecutor( max_workers=len(files_list)) as executor: future_to_download = { executor.submit(Downloader.download, path, item): item for item in files_list } for future in concurrent.futures.as_completed(future_to_download, timeout=1800): item = future_to_download[future] file_name = item['file_name'] url = item['url'] array_data = future.result() BColor.ok("Download url %s to %s, size is %i" % (url, file_name, array_data)) return path
def _normalization_delete_record(self): """ Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован, у него должна быть одна история :return: """ cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history WHERE domain_id NOT IN (SELECT id FROM domain)""" cursor.execute(sql) data = cursor.fetchall() count_deleted_domain = len(data) current_domain = 0 count_not_update = 0 count_update = 0 if self.show_log: BColor.ok("All deleted domain is %s" % count_deleted_domain) for row in data: if current_domain % 10000 == 1: if self.show_log: updated_percent = round(count_update / (current_domain / 100)) BColor.process( "Current domain %s/%s (updated %s percent)" % (current_domain, count_deleted_domain, updated_percent)) self.connection.commit() sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % ( row['domain_id']) cursor.execute(sql) domain_history = cursor.fetchone() sql = "SELECT id FROM domain WHERE domain_name = '%s'" % ( domain_history['domain_name']) cursor.execute(sql) domain = cursor.fetchone() if domain: sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % ( domain['id'], row['domain_id']) cursor.execute(sql_update) count_update += 1 else: count_not_update += 1 current_domain += 1
def download(path: str, item: dict): """ :return: """ file_name = item['file_name'] url = item['url'] path_file = os.path.abspath(os.path.join(path, file_name)) BColor.process("Download %s to %s " % (url, path_file)) shutil.rmtree(path_file, ignore_errors=True) Downloader.download_file(url, path_file) if os.path.getsize(path_file) == 0: BColor.error("Can`t download file %s to %s" % (url, path_file)) raise Exception("Can`t download file %s to %s" % (url, path_file)) return os.path.getsize(path_file)
def unzip_file(path_file): """ :rtype path_file: unicode :return: """ gunzip = Gunzip(path_file) command = gunzip.get_command() p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) if p.process.returncode != 0: BColor.error("unzip p.process.returncode = %s" % p.process.returncode) return False return True
def unzip_file(path_file: str) -> bool: """ :rtype path_file: unicode :return: """ gunzip = Gunzip(path_file) command = gunzip.get_command() p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) if p.process.returncode != 0: BColor.error("unzip p.process.returncode = %s" % p.process.returncode) return False return True
def update_all_statistic(self): """ Обновление всех статистик :return: """ start_time = datetime.datetime.now() self.update_as_count_statistic() self.update_a_domain_old_count_statistic() self.update_ns_domain_old_count_statistic() self.update_as_domain_old_count_statistic() self.update_registrant_count_statistic() self.update_ns_count_statistic() self.update_mx_count_statistic() self.update_domain_count_statistic() self.update_a_count_statistic() self.update_cname_count_statistic() # beget statistic self.update_beget_statistic() self.update_provider_statistic('netangels', 44128) self.update_provider_statistic('timeweb', 9123) for process in self.process_list: try: # timeout 2 days process.join(1728000) self.process_list.remove(process) except KeyboardInterrupt: return # отдельно ждем пока собирется статистика по NS серверам что бы сгруперовать ее self.update_ns_domain_group_count_statistic() for process in self.process_list: try: # timeout 2 days process.join(1728000) self.process_list.remove(process) except KeyboardInterrupt: return diff = datetime.datetime.now() - start_time BColor.process("Statistic done to %i second" % diff.seconds)
def delete_not_updated_today(count_all_domain=False): """ :type count_all_domain: bool|dict :return: """ connection = get_mysql_connection() cursor = connection.cursor(MySQLdb.cursors.DictCursor) sql_trigger_enable = "SET @TRIGGER_DISABLED = 0" sql_trigger_disable = "SET @TRIGGER_DISABLED = 1" if not count_all_domain: sql = "DELETE FROM domain WHERE load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: for key_tld, tld_count_in_file in count_all_domain.items(): cursor.execute( "SELECT count(*) as domain_count FROM domain WHERE tld = '%s'" % PREFIX_LIST_ZONE[key_tld]) count_in_base = cursor.fetchone() BColor.process("Count zone (%s) in file %s, in base %s" % (str(key_tld), str(tld_count_in_file), str(count_in_base['domain_count']))) sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % PREFIX_LIST_ZONE[ key_tld] BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % PREFIX_LIST_ZONE[ key_tld] BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) connection.commit() connection.close()
def download_file(url, data_dir): """ Скачивает файл в указанную директорию :type url: unicode :type data_dir: unicode :rtype: bool """ wget_until = Wget(url, data_dir) command = wget_until.get_command() p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) if p.process.returncode != 0: BColor.error("wget p.process.returncode = %s" % p.process.returncode) return False return True
def delete_not_updated_today(): """ :return: """ connection = get_mysql_connection() cursor = connection.cursor(MySQLdb.cursors.DictCursor) sql_trigger_enable = "SET @TRIGGER_DISABLED = 0" sql_trigger_disable = "SET @TRIGGER_DISABLED = 1" sql = "DELETE FROM rpki WHERE load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE rpki SET load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) connection.commit() connection.close()
def download_data_for_current_date(): """ Скачивает все необходимы файлы для парсинга С R01 данные по локальным зонам https://partner.r01.ru/zones/ru_domains.gz https://partner.r01.ru/zones/su_domains.gz https://partner.r01.ru/zones/rf_domains.gz С http://archive.routeviews.org информацию по fullview, подробно описывает Павел в своем блоге http://phpsuxx.blogspot.com/2011/12/full-bgp.html http://phpsuxx.blogspot.com/2011/12/libbgpdump-debian-6-squeeze.html для остальных зоне можно посмотреть http://csa.ee/databases-zone-files/ :rtype: unicode """ now_date = datetime.date.today() delta = datetime.timedelta(days=1) now_date = now_date - delta files_list = [{'url': 'https://partner.r01.ru/zones/ru_domains.gz', 'file_name': 'ru_domains.gz'}, {'url': 'https://partner.r01.ru/zones/su_domains.gz', 'file_name': 'su_domains.gz'}, {'url': 'https://partner.r01.ru/zones/rf_domains.gz', 'file_name': 'rf_domains.gz'}, {'url': 'http://archive.routeviews.org/bgpdata/%s/RIBS/rib.%s.0600.bz2' % (now_date.strftime("%Y.%m"), now_date.strftime("%Y%m%d")), 'file_name': 'rib.bz2'}] path = Downloader.create_data_dir() for item in files_list: path_file = os.path.abspath(os.path.join(path, item['file_name'])) BColor.process("Download %s to %s " % (item['url'], path_file)) shutil.rmtree(path_file, ignore_errors=True) Downloader.download_file(item['url'], path_file) if os.path.getsize(path_file) == 0: BColor.error("Can`t download file %s to %s" % (item['url'], path_file)) raise Exception("Can`t download file %s to %s" % (item['url'], path_file)) return path
def run(self): """ Запрашиваем DNS данные :return: """ self.write_to_file(BColor.process("Process %s running" % self.number)) added_domains = 0 re_prefix = re.compile(r'\s*') start_time = datetime.now() try: self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) #rpki = RpkiChecker() while not self.queue.empty(): domain_data = self.queue.get(timeout=5) try: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) delegated = re.sub(re_prefix, '', data[5]) if delegated == '1': delegated = 'Y' domain_dns_data_array = self.get_ns_record(domain) as_array = self._get_asn_array(domain_dns_data_array) # try: # status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0]) # rpki_status = status['code'] # except: # rpki_status = -2 rpki_status = -2 else: delegated = 'N' domain_dns_data_array = {} as_array = {} rpki_status = -2 register_info = { 'registrant': re.sub(re_prefix, '', data[1]), 'register_date': re.sub(re_prefix, '', data[2]), 'register_end_date': re.sub(re_prefix, '', data[3]), 'free_date': re.sub(re_prefix, '', data[4]), 'delegated': delegated, 'domain': domain, 'prefix': domain_data['prefix'] } run_sql = self._update_domain_row(domain_dns_data_array, as_array, register_info, rpki_status) run_sql = run_sql.replace("b\'", '') run_sql = run_sql.replace("\'\'", '\'') self.write_to_file(run_sql + ";", sql=True) try: cursor.execute(run_sql) self.connection.commit() except Exception: self.write_to_file( BColor.error("MySQL exceptions (SQL %s)" % run_sql)) self.write_to_file(BColor.error( traceback.format_exc())) # try again time.sleep(5) self._connect_mysql() cursor = self.connection.cursor( MySQLdb.cursors.DictCursor) cursor.execute(run_sql) self.connection.commit() added_domains += 1 # READ http://habrahabr.ru/post/178637/ data = None domain = None delegated = None domain_dns_data_array = None as_array = None register_info = None domain_id = None run_sql = None except Exception: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) self.write_to_file( BColor.error( "Domain %s work failed process number %i" % (domain, self.number))) self.write_to_file(BColor.error(traceback.format_exc())) diff = datetime.now() - start_time performance = diff.seconds / added_domains self.queue_statistic.put({ 'time_diff': diff.seconds, 'performance': performance, 'count': added_domains }) self.write_to_file( BColor.process( "Process %i done, processed %i domain (performance %f)" % (self.number, added_domains, performance), pid=self.number)) self.connection.close() return 0 except queue.Empty: diff = datetime.now() - start_time performance = diff.seconds / added_domains self.queue_statistic.put({ 'time_diff': diff.seconds, 'performance': performance, 'count': added_domains }) self.write_to_file( BColor.process( "Process %i done queue is Empty = %i, processed %i domain (performance %f)" % (self.number, self.queue.empty(), added_domains, performance), pid=self.number)) return 0 except Exception: self.write_to_file( BColor.error("Process failed %i" % self.number, pid=self.number)) self.write_to_file(BColor.error(traceback.format_exc())) return 1
def load_prefix_list_from_var(prefix_list): """ Загрузка данных из переменной :return: """ subnet_list_tree = SubnetTree.SubnetTree() for index in prefix_list: subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index]) return subnet_list_tree if __name__ == "__main__": try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True, version='1.0') parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-u', '--update_statistic', help="Update statistic after update domain", action="count") parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store") parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store") args = parser.parse_args() if args.show_verbose: BColor.ok("Use verbose") if not args.dir:
def load_prefix_list_from_var(prefix_list): """ Загрузка данных из переменной :return: """ subnet_list_tree = SubnetTree.SubnetTree() for index in prefix_list: subnet_list_tree[as_bytes(index)] = as_bytes(prefix_list[index]) return subnet_list_tree if __name__ == "__main__": try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) parser = argparse.ArgumentParser(add_help=True, version='1.0') parser.add_argument('-d', '--dir', type=str, help="Do`t download data, use exist from dir", action="store") parser.add_argument('-s', '--show_verbose', help="Show verbose log", action="count") parser.add_argument('-D', '--delete_old', type=bool, help="Do`t delete removed domains", action="store") parser.add_argument('-n', '--name_server', type=str, help="Set name server", action="store") args = parser.parse_args() if args.show_verbose: BColor.ok("Use verbose") if not args.dir: BColor.process("Download files")
def run(self): """ Запрашиваем DNS данные :return: """ try: self.write_to_file(BColor.process("Process %s running, need work %s domains" % (self.number, len(self.domains)))) added_domains = 0 re_prefix = re.compile(r'\s*') self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) for domain_data in self.domains: try: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) delegated = re.sub(re_prefix, '', data[5]) if delegated == '1': delegated = 'Y' domain_dns_data_array = self._get_ns_record(domain) as_array = self._get_asn_array(domain_dns_data_array) else: delegated = 'N' domain_dns_data_array = {} as_array = {} register_info = {'registrant': re.sub(re_prefix, '', data[1]), 'register_date': re.sub(re_prefix, '', data[2]), 'register_end_date': re.sub(re_prefix, '', data[3]), 'free_date': re.sub(re_prefix, '', data[4]), 'delegated': delegated, 'domain': domain, 'prefix': domain_data['prefix']} cursor.execute("SELECT id FROM domain WHERE domain_name = LOWER('%s')" % domain) domain_id = cursor.fetchone() if not domain_id: run_sql = self._insert_domain(domain_dns_data_array, as_array, register_info) else: run_sql = self._update_domain(domain_dns_data_array, as_array, domain_id['id'], register_info) self.write_to_file(run_sql + ";", sql=True) try: cursor.execute(run_sql) self.connection.commit() except: self.write_to_file(BColor.error("MySQL exceptions (SQL %s)" % run_sql)) self.write_to_file(BColor.error(traceback.format_exc())) # try again time.sleep(5) self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) cursor.execute(run_sql) self.connection.commit() added_domains += 1 if (added_domains % 1000) == 0: self.write_to_file(BColor.process("Thread %d success resolved %d domains" % (self.number, added_domains), pid=self.number)) # USE http://habrahabr.ru/post/178637/ data = None domain = None delegated = None domain_dns_data_array = None as_array = None register_info = None domain_id = None run_sql = None except: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) self.write_to_file(BColor.error("Domain %s work failed process number %s" % (domain, self.number))) self.write_to_file(BColor.error(traceback.format_exc())) self.write_to_file(BColor.process("Process %s done " % self.number)) self.connection.close() return 0 except: self.write_to_file(BColor.error("Process failed %s" % self.number)) self.write_to_file(BColor.error(traceback.format_exc())) return 1
# -*- coding: utf-8 -*- from __future__ import unicode_literals __author__ = 'Alexey Y Manikin' import sys from config.main import * PROGRAM_NAME = 'update_statistic' CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, CURRENT_DIR) logfile = os.path.join(CURRENT_DIR, '%s.debug' % PROGRAM_NAME) import traceback from helpers.helpersCollor import BColor from classes.statistic import Statistic if __name__ == "__main__": try: statistic = Statistic() statistic.update_all_statistic() except Exception as e: BColor.error("Got an exception: %s" % e.message) print(traceback.format_exc())
def run(self): """ Запрашиваем DNS данные :return: """ try: self.write_to_file( BColor.process("Process %s running, need work %s domains" % (self.number, len(self.domains)))) added_domains = 0 re_prefix = re.compile(r'\s*') self._connect_mysql() cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) # rpki = RpkiChecker() for domain_data in self.domains: try: data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) delegated = re.sub(re_prefix, '', data[5]) if delegated == '1': delegated = 'Y' domain_dns_data_array = self.get_ns_record(domain) as_array = self._get_asn_array(domain_dns_data_array) # try: # status = rpki.check_ip(domain_dns_data_array['a'][0], as_array[0]) # rpki_status = status['code'] # except: # rpki_status = -2 rpki_status = -2 else: delegated = 'N' domain_dns_data_array = {} as_array = {} rpki_status = -2 register_info = { 'registrant': re.sub(re_prefix, '', data[1]), 'register_date': re.sub(re_prefix, '', data[2]), 'register_end_date': re.sub(re_prefix, '', data[3]), 'free_date': re.sub(re_prefix, '', data[4]), 'delegated': delegated, 'domain': domain, 'prefix': domain_data['prefix'] } cursor.execute( "SELECT id FROM domain WHERE domain_name = LOWER('%s')" % domain) domain_id = cursor.fetchone() if not domain_id: run_sql = self._insert_domain(domain_dns_data_array, as_array, register_info, rpki_status, cursor) else: run_sql = self._update_domain(domain_dns_data_array, as_array, domain_id['id'], register_info, rpki_status, cursor) run_sql = run_sql.replace("b\'", '') run_sql = run_sql.replace("\'\'", '\'') self.write_to_file(run_sql + ";", sql=True) try: cursor.execute(run_sql) self.connection.commit() except Exception: self.write_to_file( BColor.error("MySQL exceptions (SQL %s)" % run_sql)) self.write_to_file(BColor.error( traceback.format_exc())) # try again time.sleep(5) self._connect_mysql() cursor = self.connection.cursor( MySQLdb.cursors.DictCursor) cursor.execute(run_sql) self.connection.commit() added_domains += 1 if (added_domains % 1000) == 0: self.write_to_file( BColor.process( "Thread %d success resolved %d domains" % (self.number, added_domains), pid=self.number)) # READ http://habrahabr.ru/post/178637/ data = None domain = None delegated = None domain_dns_data_array = None as_array = None register_info = None domain_id = None run_sql = None except Exception: pprint.pprint(domain_data) data = domain_data['line'].split("\t") domain = re.sub(re_prefix, '', data[0]) self.write_to_file( BColor.error( "Domain %s work failed process number %s" % (domain, self.number))) self.write_to_file(BColor.error(traceback.format_exc())) self.write_to_file(BColor.process("Process %s done " % self.number)) self.connection.close() return 0 except Exception: self.write_to_file(BColor.error("Process failed %s" % self.number)) self.write_to_file(BColor.error(traceback.format_exc())) return 1
def _normalization_delete_record(self): """ Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован, у него должна быть одна история :return: """ cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) if self.show_log: BColor.ok("Select deleted domain from domain_history") sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history WHERE domain_id NOT IN (SELECT id FROM domain)""" cursor.execute(sql) data = cursor.fetchall() count_deleted_domain = len(data) current_domain = 0 count_not_update = 0 count_update = 0 if self.show_log: BColor.ok("All deleted domain is %s" % count_deleted_domain) for row in data: if self.show_log: BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain)) BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update)) sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (row['domain_id']) BColor.warning(sql) cursor.execute(sql) domain_history = cursor.fetchone() sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (domain_history['domain_name']) BColor.warning(sql) cursor.execute(sql) domain = cursor.fetchone() if domain: if self.show_log: BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'], row['domain_id'], domain['id'])) sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (domain['id'], row['domain_id']) cursor.execute(sql_update) count_update += 1 else: count_not_update += 1 current_domain += 1
def start_load_and_resolver_domain(net_array, work_path, delete_old=True, count_thread=COUNT_THREAD, verbose=False, count_cycle=2, resolve_dns='127.0.0.1'): """ Запускам процессы резолвинга :param net_array: unicode|list :type work_path: unicode :type delete_old: bool :type count_thread: int :type verbose: bool :type count_cycle: int :type resolve_dns: unicode :return: """ if verbose: log_path = os.path.abspath(os.path.join(work_path, 'log')) if not os.path.exists(log_path): os.makedirs(log_path) else: log_path = False count_array_data = count_thread * count_cycle data_for_process = [] for thread_number in range(0, count_array_data): data_for_process.append([]) counter_all = {} for prefix in PREFIX_LIST_ZONE.keys(): BColor.process("Load prefix_list %s " % prefix) file_prefix = os.path.join(work_path, prefix + "_domains") file_domain_data = open(file_prefix) BColor.process("Load file %s " % file_prefix) line = file_domain_data.readline() counter_all[prefix] = 0 while line: data_for_process[counter_all[prefix] % count_array_data].append({ 'line': line, 'prefix': prefix }) counter_all[prefix] += 1 line = file_domain_data.readline() BColor.process("All load zone %s - %s" % (prefix, counter_all[prefix])) process_list = [] for i in range(0, count_array_data): BColor.process("Start process to work %s %s" % (i, len(data_for_process[i]))) resolver = Resolver(i, data_for_process[i], resolve_dns, net_array, log_path) resolver.daemon = True process_list.append(resolver) resolver.start() if i != 0 and i % count_thread == 0: BColor.process("Wait for threads finish...") for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return process_list = [] if len(process_list): for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return if delete_old: Resolver.delete_not_updated_today(counter_all)
def start_load_and_resolver_domain(net_array: SubnetTree.SubnetTree, work_path: str, delete_old: bool = True, count_thread: int = COUNT_THREAD, verbose: bool = False, resolve_dns: str = '127.0.0.1') -> None: """ Запускам процессы резолвинга :return: """ if verbose: log_path = os.path.abspath(os.path.join(work_path, 'log')) if not os.path.exists(log_path): os.makedirs(log_path) else: log_path = False # Разбиваем все домены в файлах на N массивов # пример формата строки из файла # 0--2.RU REGRU-RU 15.06.2019 15.06.2020 16.07.2020 1 queue_data = multiprocessing.Queue(MAX_DOMAIN_COUNT) queue_statistic = multiprocessing.Queue(count_thread + 5) counter_all = {} for prefix in PREFIX_LIST_ZONE.keys(): BColor.process("Load prefix_list %s " % prefix) file_prefix = os.path.join(work_path, prefix + "_domains") file_domain_data = open(file_prefix) BColor.process("Load file %s " % file_prefix) line = file_domain_data.readline() counter_all[prefix] = 0 while line: queue_data.put({'line': line, 'prefix': prefix}) counter_all[prefix] += 1 line = file_domain_data.readline() # if counter_all[prefix] > 10000: # break BColor.process("All load zone %s - %s" % (prefix, counter_all[prefix])) # Запускаем процессы парсинга доменов start_time = datetime.now() registrant_mutex = multiprocessing.Lock() process_list = [] dist_ip = multiprocessing.Manager().dict() for i in range(0, count_thread): resolver = Resolver(i, queue_data, resolve_dns, net_array, log_path, registrant_mutex, queue_statistic, dist_ip) resolver.daemon = True process_list.append(resolver) resolver.start() # Вывод информации о процессе парсинга доменов status_prefix = os.path.join(work_path, "status_parse_domain") process_status = Status(queue_data, status_prefix) process_status.daemon = True process_status.start() if len(process_list): for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return process_status.join(10) queue_data.close() diff = datetime.now() - start_time all_time = 0 all_count = 0 while not queue_statistic.empty(): statistic_data = queue_statistic.get() all_time += statistic_data['time_diff'] all_count += statistic_data['count'] performance_per_process = all_time / all_count performance = diff.seconds / all_count BColor.process( "Performance %f per process, total time %i per process, total count %i, performance %f, all time %i" % (performance_per_process, all_time, all_count, performance, diff.seconds)) # После обработки всех доменов запускаем удаление доменов которые сегодня не обновлены if delete_old: Resolver.delete_not_updated_today(counter_all)
def delete_not_updated_today(count_all_domain=False): """ :type count_all_domain: bool|dict :return: """ connection = get_mysql_connection() cursor = connection.cursor(MySQLdb.cursors.DictCursor) sql_trigger_enable = "SET @TRIGGER_DISABLED = 0" sql_trigger_disable = "SET @TRIGGER_DISABLED = 1" if not count_all_domain: sql = "DELETE FROM domain WHERE load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N'" BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: for key_tld, tld_count_in_file in count_all_domain.iteritems(): cursor.execute("SELECT count(*) as domain_count FROM domain WHERE tld = '%s'" % str(key_tld)) count_in_base = cursor.fetchone() BColor.process("Count zone (%s) in file %s, in base %s" % (str(key_tld), str(tld_count_in_file), str(count_in_base['domain_count']))) if count_in_base and int(count_in_base['domain_count']) >= int(tld_count_in_file): sql = "DELETE FROM domain WHERE load_today = 'N' AND tld = '%s'" % str(key_tld) BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_disable) sql = "UPDATE domain SET load_today = 'N' WHERE tld = '%s'" % str(key_tld) BColor.process(sql) cursor.execute(sql) cursor.execute(sql_trigger_enable) else: BColor.error("TLD %s - count in file %s, count in base %s" % (str(key_tld), str(count_in_base), str(tld_count_in_file))) connection.commit() connection.close()
import traceback from helpers.helpers import check_prog_run from classes.resolver import Resolver from helpers.helpersCollor import BColor def print_log(log_flag, text): """ Выводим сообщение в консоль или лог :type log_flag: bool :type text: unicode :return: """ if log_flag: print text if __name__ == "__main__": show_log = True try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) Resolver.delete_not_updated_today() except Exception as e: BColor.error("Got an exception: %s" % e.message) print traceback.format_exc()
def start_load_and_resolver_domain(net_array, work_path, delete_old=True, count=COUNT_THREAD, verbose=False, count_cycle=10, resolve_dns='127.0.0.1'): """ Запускам процессы резолвинга :param net_array: unicode|list :type work_path: unicode :type delete_old: bool :type count: int :type verbose: bool :type count_cycle: int :type resolve_dns: unicode :return: """ if verbose: log_path = os.path.abspath(os.path.join(work_path, 'log')) if not os.path.exists(log_path): os.makedirs(log_path) else: log_path = False count_array_data = count * count_cycle data_for_process = [] for thread_number in range(0, count_array_data): data_for_process.append([]) counter_all = {} for prefix in PREFIX_LIST: BColor.process("Load prefix_list %s " % prefix) file_prefix = os.path.join(work_path, prefix+"_domains") file_rib_data = open(file_prefix) BColor.process("Load file %s " % file_prefix) line = file_rib_data.readline() counter_all[prefix] = 0 while line: data_for_process[counter_all[prefix] % count_array_data].append({'line': line, 'prefix': prefix}) counter_all[prefix] += 1 line = file_rib_data.readline() BColor.process("All load zone %s - %s" % (prefix, counter_all[prefix])) process_list = [] for i in range(0, count_array_data): BColor.process("Start process to work %s %s" % (i, len(data_for_process[i]))) resolver = Resolver(i, data_for_process[i], resolve_dns, net_array, log_path) resolver.daemon = True process_list.append(resolver) resolver.start() if i !=0 and i % count == 0: BColor.process("Wait for threads finish...") for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return process_list = [] if len(process_list): for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return if delete_old: Resolver.delete_not_updated_today(counter_all)
logfile = os.path.join(CURRENT_DIR, '%s.debug' % PROGRAM_NAME) import traceback from helpers.helpers import check_prog_run from classes.resolver import Resolver from helpers.helpersCollor import BColor def print_log(log_flag, text): """ Выводим сообщение в консоль или лог :type log_flag: bool :type text: unicode :return: """ if log_flag: print(text) if __name__ == "__main__": show_log = True try: if check_prog_run(PROGRAM_NAME): BColor.error("Program %s already running" % PROGRAM_NAME) sys.exit(1) Resolver.delete_not_updated_today() except Exception as e: BColor.error("Got an exception: %s" % e.message) print(traceback.format_exc())