def _normalization_delete_record(self): """ Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован, у него должна быть одна история :return: """ cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) if self.show_log: BColor.ok("Select deleted domain from domain_history") sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history WHERE domain_id NOT IN (SELECT id FROM domain)""" cursor.execute(sql) data = cursor.fetchall() count_deleted_domain = len(data) current_domain = 0 count_not_update = 0 count_update = 0 if self.show_log: BColor.ok("All deleted domain is %s" % count_deleted_domain) for row in data: if self.show_log: BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain)) BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update)) sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % ( row['domain_id']) BColor.warning(sql) cursor.execute(sql) domain_history = cursor.fetchone() sql = "SELECT id FROM domain WHERE domain_name = '%s'" % ( domain_history['domain_name']) BColor.warning(sql) cursor.execute(sql) domain = cursor.fetchone() if domain: if self.show_log: BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'], row['domain_id'], domain['id'])) sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % ( domain['id'], row['domain_id']) cursor.execute(sql_update) count_update += 1 else: count_not_update += 1 current_domain += 1
def _normalization_delete_record(self): """ Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован, у него должна быть одна история :return: """ cursor = self.connection.cursor(MySQLdb.cursors.DictCursor) if self.show_log: BColor.ok("Select deleted domain from domain_history") sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history WHERE domain_id NOT IN (SELECT id FROM domain)""" cursor.execute(sql) data = cursor.fetchall() count_deleted_domain = len(data) current_domain = 0 count_not_update = 0 count_update = 0 if self.show_log: BColor.ok("All deleted domain is %s" % count_deleted_domain) for row in data: if self.show_log: BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain)) BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update)) sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (row['domain_id']) BColor.warning(sql) cursor.execute(sql) domain_history = cursor.fetchone() sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (domain_history['domain_name']) BColor.warning(sql) cursor.execute(sql) domain = cursor.fetchone() if domain: if self.show_log: BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'], row['domain_id'], domain['id'])) sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (domain['id'], row['domain_id']) cursor.execute(sql_update) count_update += 1 else: count_not_update += 1 current_domain += 1
def start_load_and_resolver_domain(net_array, work_path, delete_old=True, count=COUNT_THREAD, verbose=False, count_cycle=10, resolve_dns='127.0.0.1'): """ Запускам процессы резолвинга :param net_array: unicode|list :type work_path: unicode :type delete_old: bool :type count: int :type verbose: bool :type count_cycle: int :type resolve_dns: unicode :return: """ if verbose: log_path = os.path.abspath(os.path.join(work_path, 'log')) if not os.path.exists(log_path): os.makedirs(log_path) else: log_path = False count_array_data = count * count_cycle data_for_process = [] for thread_number in range(0, count_array_data): data_for_process.append([]) counter_all = {} for prefix in PREFIX_LIST: BColor.process("Load prefix_list %s " % prefix) file_prefix = os.path.join(work_path, prefix+"_domains") file_rib_data = open(file_prefix) BColor.process("Load file %s " % file_prefix) line = file_rib_data.readline() counter_all[prefix] = 0 while line: data_for_process[counter_all[prefix] % count_array_data].append({'line': line, 'prefix': prefix}) counter_all[prefix] += 1 line = file_rib_data.readline() BColor.process("All load zone %s - %s" % (prefix, counter_all[prefix])) process_list = [] for i in range(0, count_array_data): BColor.process("Start process to work %s %s" % (i, len(data_for_process[i]))) resolver = Resolver(i, data_for_process[i], resolve_dns, net_array, log_path) resolver.daemon = True process_list.append(resolver) resolver.start() if i !=0 and i % count == 0: BColor.process("Wait for threads finish...") for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return process_list = [] if len(process_list): for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return if delete_old: Resolver.delete_not_updated_today(counter_all)
def start_load_and_resolver_domain(net_array, work_path, delete_old=True, count_thread=COUNT_THREAD, verbose=False, count_cycle=2, resolve_dns='127.0.0.1'): """ Запускам процессы резолвинга :param net_array: unicode|list :type work_path: unicode :type delete_old: bool :type count_thread: int :type verbose: bool :type count_cycle: int :type resolve_dns: unicode :return: """ if verbose: log_path = os.path.abspath(os.path.join(work_path, 'log')) if not os.path.exists(log_path): os.makedirs(log_path) else: log_path = False count_array_data = count_thread * count_cycle data_for_process = [] for thread_number in range(0, count_array_data): data_for_process.append([]) counter_all = {} for prefix in PREFIX_LIST_ZONE.keys(): BColor.process("Load prefix_list %s " % prefix) file_prefix = os.path.join(work_path, prefix + "_domains") file_domain_data = open(file_prefix) BColor.process("Load file %s " % file_prefix) line = file_domain_data.readline() counter_all[prefix] = 0 while line: data_for_process[counter_all[prefix] % count_array_data].append({ 'line': line, 'prefix': prefix }) counter_all[prefix] += 1 line = file_domain_data.readline() BColor.process("All load zone %s - %s" % (prefix, counter_all[prefix])) process_list = [] for i in range(0, count_array_data): BColor.process("Start process to work %s %s" % (i, len(data_for_process[i]))) resolver = Resolver(i, data_for_process[i], resolve_dns, net_array, log_path) resolver.daemon = True process_list.append(resolver) resolver.start() if i != 0 and i % count_thread == 0: BColor.process("Wait for threads finish...") for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return process_list = [] if len(process_list): for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return if delete_old: Resolver.delete_not_updated_today(counter_all)
def start_load_and_resolver_domain(net_array: SubnetTree.SubnetTree, work_path: str, delete_old: bool = True, count_thread: int = COUNT_THREAD, verbose: bool = False, resolve_dns: str = '127.0.0.1') -> None: """ Запускам процессы резолвинга :return: """ if verbose: log_path = os.path.abspath(os.path.join(work_path, 'log')) if not os.path.exists(log_path): os.makedirs(log_path) else: log_path = False # Разбиваем все домены в файлах на N массивов # пример формата строки из файла # 0--2.RU REGRU-RU 15.06.2019 15.06.2020 16.07.2020 1 queue_data = multiprocessing.Queue(MAX_DOMAIN_COUNT) queue_statistic = multiprocessing.Queue(count_thread + 5) counter_all = {} for prefix in PREFIX_LIST_ZONE.keys(): BColor.process("Load prefix_list %s " % prefix) file_prefix = os.path.join(work_path, prefix + "_domains") file_domain_data = open(file_prefix) BColor.process("Load file %s " % file_prefix) line = file_domain_data.readline() counter_all[prefix] = 0 while line: queue_data.put({'line': line, 'prefix': prefix}) counter_all[prefix] += 1 line = file_domain_data.readline() # if counter_all[prefix] > 10000: # break BColor.process("All load zone %s - %s" % (prefix, counter_all[prefix])) # Запускаем процессы парсинга доменов start_time = datetime.now() registrant_mutex = multiprocessing.Lock() process_list = [] dist_ip = multiprocessing.Manager().dict() for i in range(0, count_thread): resolver = Resolver(i, queue_data, resolve_dns, net_array, log_path, registrant_mutex, queue_statistic, dist_ip) resolver.daemon = True process_list.append(resolver) resolver.start() # Вывод информации о процессе парсинга доменов status_prefix = os.path.join(work_path, "status_parse_domain") process_status = Status(queue_data, status_prefix) process_status.daemon = True process_status.start() if len(process_list): for process in process_list: try: # timeout 2 days process.join(1728000) except KeyboardInterrupt: BColor.warning("Interrupted by user") return process_status.join(10) queue_data.close() diff = datetime.now() - start_time all_time = 0 all_count = 0 while not queue_statistic.empty(): statistic_data = queue_statistic.get() all_time += statistic_data['time_diff'] all_count += statistic_data['count'] performance_per_process = all_time / all_count performance = diff.seconds / all_count BColor.process( "Performance %f per process, total time %i per process, total count %i, performance %f, all time %i" % (performance_per_process, all_time, all_count, performance, diff.seconds)) # После обработки всех доменов запускаем удаление доменов которые сегодня не обновлены if delete_old: Resolver.delete_not_updated_today(counter_all)