def _normalization_delete_record(self):
        """
        Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован,
        у него должна быть одна история
        :return:
        """
        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

        if self.show_log:
            BColor.ok("Select deleted domain from domain_history")

        sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history
WHERE domain_id NOT IN (SELECT id FROM domain)"""
        cursor.execute(sql)
        data = cursor.fetchall()

        count_deleted_domain = len(data)
        current_domain = 0
        count_not_update = 0
        count_update = 0

        if self.show_log:
            BColor.ok("All deleted domain is %s" % count_deleted_domain)

        for row in data:
            if self.show_log:
                BColor.process("Current domain %s/%s" %
                               (current_domain, count_deleted_domain))
                BColor.ok("Updated %s, not updated %s" %
                          (count_update, count_not_update))

            sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (
                row['domain_id'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain_history = cursor.fetchone()

            sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (
                domain_history['domain_name'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain = cursor.fetchone()

            if domain:
                if self.show_log:
                    BColor.warning("Domain %s (%s) has new domain_id = %s" %
                                   (domain_history['domain_name'],
                                    row['domain_id'], domain['id']))

                sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (
                    domain['id'], row['domain_id'])
                cursor.execute(sql_update)
                count_update += 1
            else:
                count_not_update += 1

            current_domain += 1
    def _normalization_delete_record(self):
        """
        Нормализация удаленных и вновь добавленных доменов. То есть если домен был удален и зарегистрирован,
        у него должна быть одна история
        :return:
        """
        cursor = self.connection.cursor(MySQLdb.cursors.DictCursor)

        if self.show_log:
            BColor.ok("Select deleted domain from domain_history")

        sql = """SELECT DISTINCT domain_id AS domain_id FROM domain_history
WHERE domain_id NOT IN (SELECT id FROM domain)"""
        cursor.execute(sql)
        data = cursor.fetchall()

        count_deleted_domain = len(data)
        current_domain = 0
        count_not_update = 0
        count_update = 0

        if self.show_log:
            BColor.ok("All deleted domain is %s" % count_deleted_domain)

        for row in data:
            if self.show_log:
                BColor.process("Current domain %s/%s" % (current_domain, count_deleted_domain))
                BColor.ok("Updated %s, not updated %s" % (count_update, count_not_update))

            sql = "SELECT DISTINCT domain_name FROM domain_history WHERE domain_id = %s" % (row['domain_id'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain_history = cursor.fetchone()

            sql = "SELECT id FROM domain WHERE domain_name = '%s'" % (domain_history['domain_name'])
            BColor.warning(sql)
            cursor.execute(sql)
            domain = cursor.fetchone()

            if domain:
                if self.show_log:
                    BColor.warning("Domain %s (%s) has new domain_id = %s" % (domain_history['domain_name'],
                                                                              row['domain_id'],
                                                                              domain['id']))

                sql_update = "UPDATE domain_history SET domain_id = %s WHERE domain_id = %s" % (domain['id'],
                                                                                                row['domain_id'])
                cursor.execute(sql_update)
                count_update += 1
            else:
                count_not_update += 1

            current_domain += 1
    def start_load_and_resolver_domain(net_array, work_path, delete_old=True, count=COUNT_THREAD, verbose=False,
                                       count_cycle=10, resolve_dns='127.0.0.1'):
        """
        Запускам процессы резолвинга

        :param net_array: unicode|list
        :type work_path: unicode
        :type delete_old: bool
        :type count: int
        :type verbose: bool
        :type count_cycle: int
        :type resolve_dns: unicode
        :return:
        """

        if verbose:
            log_path = os.path.abspath(os.path.join(work_path, 'log'))
            if not os.path.exists(log_path):
                os.makedirs(log_path)
        else:
            log_path = False

        count_array_data = count * count_cycle
        data_for_process = []
        for thread_number in range(0, count_array_data):
            data_for_process.append([])

        counter_all = {}

        for prefix in PREFIX_LIST:
            BColor.process("Load prefix_list %s " % prefix)
            file_prefix = os.path.join(work_path, prefix+"_domains")
            file_rib_data = open(file_prefix)

            BColor.process("Load file %s " % file_prefix)
            line = file_rib_data.readline()
            counter_all[prefix] = 0
            while line:
                data_for_process[counter_all[prefix] % count_array_data].append({'line': line, 'prefix': prefix})
                counter_all[prefix] += 1
                line = file_rib_data.readline()

            BColor.process("All load zone %s -  %s" % (prefix, counter_all[prefix]))

        process_list = []
        for i in range(0, count_array_data):
            BColor.process("Start process to work %s %s" % (i, len(data_for_process[i])))
            resolver = Resolver(i,  data_for_process[i], resolve_dns, net_array, log_path)
            resolver.daemon = True
            process_list.append(resolver)
            resolver.start()

            if i !=0 and i % count == 0:
                BColor.process("Wait for threads finish...")
                for process in process_list:
                    try:
                        # timeout 2 days
                        process.join(1728000)
                    except KeyboardInterrupt:
                        BColor.warning("Interrupted by user")
                        return
                process_list = []

        if len(process_list):
            for process in process_list:
                try:
                    # timeout 2 days
                    process.join(1728000)
                except KeyboardInterrupt:
                    BColor.warning("Interrupted by user")
                    return

        if delete_old:
            Resolver.delete_not_updated_today(counter_all)
Beispiel #4
0
    def start_load_and_resolver_domain(net_array,
                                       work_path,
                                       delete_old=True,
                                       count_thread=COUNT_THREAD,
                                       verbose=False,
                                       count_cycle=2,
                                       resolve_dns='127.0.0.1'):
        """
        Запускам процессы резолвинга

        :param net_array: unicode|list
        :type work_path: unicode
        :type delete_old: bool
        :type count_thread: int
        :type verbose: bool
        :type count_cycle: int
        :type resolve_dns: unicode
        :return:
        """

        if verbose:
            log_path = os.path.abspath(os.path.join(work_path, 'log'))
            if not os.path.exists(log_path):
                os.makedirs(log_path)
        else:
            log_path = False

        count_array_data = count_thread * count_cycle
        data_for_process = []
        for thread_number in range(0, count_array_data):
            data_for_process.append([])

        counter_all = {}

        for prefix in PREFIX_LIST_ZONE.keys():
            BColor.process("Load prefix_list %s " % prefix)
            file_prefix = os.path.join(work_path, prefix + "_domains")
            file_domain_data = open(file_prefix)

            BColor.process("Load file %s " % file_prefix)
            line = file_domain_data.readline()
            counter_all[prefix] = 0
            while line:
                data_for_process[counter_all[prefix] %
                                 count_array_data].append({
                                     'line': line,
                                     'prefix': prefix
                                 })
                counter_all[prefix] += 1
                line = file_domain_data.readline()

            BColor.process("All load zone %s -  %s" %
                           (prefix, counter_all[prefix]))

        process_list = []
        for i in range(0, count_array_data):
            BColor.process("Start process to work %s %s" %
                           (i, len(data_for_process[i])))
            resolver = Resolver(i, data_for_process[i], resolve_dns, net_array,
                                log_path)
            resolver.daemon = True
            process_list.append(resolver)
            resolver.start()

            if i != 0 and i % count_thread == 0:
                BColor.process("Wait for threads finish...")
                for process in process_list:
                    try:
                        # timeout 2 days
                        process.join(1728000)
                    except KeyboardInterrupt:
                        BColor.warning("Interrupted by user")
                        return
                process_list = []

        if len(process_list):
            for process in process_list:
                try:
                    # timeout 2 days
                    process.join(1728000)
                except KeyboardInterrupt:
                    BColor.warning("Interrupted by user")
                    return

        if delete_old:
            Resolver.delete_not_updated_today(counter_all)
Beispiel #5
0
    def start_load_and_resolver_domain(net_array: SubnetTree.SubnetTree,
                                       work_path: str,
                                       delete_old: bool = True,
                                       count_thread: int = COUNT_THREAD,
                                       verbose: bool = False,
                                       resolve_dns: str = '127.0.0.1') -> None:
        """
        Запускам процессы резолвинга
        :return:
        """

        if verbose:
            log_path = os.path.abspath(os.path.join(work_path, 'log'))
            if not os.path.exists(log_path):
                os.makedirs(log_path)
        else:
            log_path = False

        # Разбиваем все домены в файлах на N массивов
        # пример формата строки из файла
        # 0--2.RU	REGRU-RU	15.06.2019	15.06.2020	16.07.2020	1
        queue_data = multiprocessing.Queue(MAX_DOMAIN_COUNT)
        queue_statistic = multiprocessing.Queue(count_thread + 5)
        counter_all = {}

        for prefix in PREFIX_LIST_ZONE.keys():
            BColor.process("Load prefix_list %s " % prefix)
            file_prefix = os.path.join(work_path, prefix + "_domains")
            file_domain_data = open(file_prefix)

            BColor.process("Load file %s " % file_prefix)
            line = file_domain_data.readline()
            counter_all[prefix] = 0
            while line:
                queue_data.put({'line': line, 'prefix': prefix})
                counter_all[prefix] += 1
                line = file_domain_data.readline()

                # if counter_all[prefix] > 10000:
                #     break

            BColor.process("All load zone %s -  %s" %
                           (prefix, counter_all[prefix]))

        # Запускаем процессы парсинга доменов
        start_time = datetime.now()
        registrant_mutex = multiprocessing.Lock()
        process_list = []

        dist_ip = multiprocessing.Manager().dict()
        for i in range(0, count_thread):
            resolver = Resolver(i, queue_data, resolve_dns, net_array,
                                log_path, registrant_mutex, queue_statistic,
                                dist_ip)
            resolver.daemon = True
            process_list.append(resolver)
            resolver.start()

        # Вывод информации о процессе парсинга доменов
        status_prefix = os.path.join(work_path, "status_parse_domain")
        process_status = Status(queue_data, status_prefix)
        process_status.daemon = True
        process_status.start()

        if len(process_list):
            for process in process_list:
                try:
                    # timeout 2 days
                    process.join(1728000)
                except KeyboardInterrupt:
                    BColor.warning("Interrupted by user")
                    return

        process_status.join(10)

        queue_data.close()
        diff = datetime.now() - start_time

        all_time = 0
        all_count = 0
        while not queue_statistic.empty():
            statistic_data = queue_statistic.get()
            all_time += statistic_data['time_diff']
            all_count += statistic_data['count']

        performance_per_process = all_time / all_count
        performance = diff.seconds / all_count
        BColor.process(
            "Performance %f per process, total time %i per process, total count %i, performance %f, all time %i"
            % (performance_per_process, all_time, all_count, performance,
               diff.seconds))

        # После обработки всех доменов запускаем удаление доменов которые сегодня не обновлены
        if delete_old:
            Resolver.delete_not_updated_today(counter_all)