예제 #1
0
class View_Emd_start(multiprocessing.Process):
    def __init__(self, task_id, current_path, mysql_host, mysql_db, mysql_user,
                 mysql_password, mongo_db, mongo_host, mongo_port, mongo_user,
                 mongo_password, message_other_engine, write_process_pid,
                 remove_process_pid):
        super(View_Emd_start, self).__init__()
        self.task_id = task_id
        self.task_start_time = ''
        self.user_id = ''
        self.view_protected_objectid = ''
        self.view_gray_objectid = ''
        self.view_counterfeit_objectid = ''
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port,
                                           mongo_user, mongo_password)

        self.current_path = sys.path[0]
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid
        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        self.task_start_time = self.mysql_handle.get_task_last_time(
            self.task_id)
        saved_urls_iters = self.mysql_handle.read_saved_urls(
            self.task_id, self.mongo_operate)
        self.get_protected_iter = saved_urls_iters['get_protected_iter']
        self.get_gray_iter = saved_urls_iters['get_gray_iter']
        self.get_counterfeit_iter = saved_urls_iters['get_counterfeit_iter']
        self.get_monitor_iter = saved_urls_iters['get_monitor_iter']

        self.protected_title_dict = self.mysql_handle.get_all_protected_feature(
            self.mongo_operate.get_web_title)

        self.counterfeit_title_dict = self.mysql_handle.get_all_counterfeit_feature(
            self.mongo_operate.get_web_title)

    def update_running_state(self, finish_num, view_find_num):  # 任务执行中更新状态
        '''
        在mysql中更新探测状态及结果

        '''
        table_name = 'task_result'
        fields = {
            'view_check_num': [finish_num, 'd'],
            'view_find_num': [view_find_num, 'd']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')

    def engine_over_handle(self):
        send_result = self.message_other_engine(6, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(self.task_id,
                                                self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))

    # 任务完成更新状态
    def update_finished_state(self, run_time, finish_num):
        '''
        在mysql中更新探测状态及结果
        '''
        table_name = 'task_result'
        fields = {
            'e_view_emd_state': [03, 'd'],
            'view_emd_run_time': [run_time, 's']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')
        self.engine_over_handle()

    def run(self):
        finish_num = 0
        view_find_num = 0
        view_find_flags = 0
        start_time = time.time()

        View_emd = ViewEmd(self.mysql_handle, self.mongo_operate, self.task_id,
                           self.task_start_time, self.protected_title_dict,
                           self.counterfeit_title_dict)
        while True:
            try:
                gray_url = self.get_gray_iter.next()
                view_find_flags = View_emd.emdcalculate(gray_url)
                finish_num += 1
                view_find_num += view_find_flags
                self.update_running_state(finish_num, view_find_num)
            except StopIteration:
                break
        run_time = int(time.time()) - int(start_time)
        #run_time = time.ctime(run_time)

        self.update_finished_state(run_time, finish_num)
예제 #2
0
파일: title_start.py 프로젝트: wyl-hit/job
class Title_start(multiprocessing.Process):
    def __init__(self, task_id, mysql_host, mysql_db, mysql_user,
                 mysql_password, mongo_db, mongo_host, mongo_port, mongo_user,
                 mongo_password, message_other_engine, write_process_pid,
                 remove_process_pid):
        super(Title_start, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.task_start_time = ''
        self.user_id = ''
        self.gary_objectid = ''
        self.protected_list_id = []
        self.get_protect_dict = {}
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port,
                                           mongo_user, mongo_password)
        self.read_task_info()
        self.run_start_time = 0
        self.title_check_num = 0  # 检查数量
        self.title_find_num = 0  # 检查到钓鱼url的数量

        # self.split_values = 10  # 设置数值,分割每多少个url更新入数据库
        self.once_update_num = 1

    def read_task_info(self):
        '''
        读取任务信息
        '''
        self.task_start_time = self.mysql_handle.get_task_last_time(
            self.task_id)
        saved_urls_iters = self.mysql_handle.read_saved_urls(
            self.task_id, self.mongo_operate)
        self.get_gray_iter = saved_urls_iters['get_gray_iter']
        self.get_monitor_iter = saved_urls_iters['get_monitor_iter']
        self.protected_title_dict = self.mysql_handle.get_all_protected_feature(
            self.mongo_operate.get_web_title)
        self.protected_text_dict = self.mysql_handle.get_all_protected_feature(
            self.mongo_operate.get_web_text)

        self.counterfeit_title_dict = self.mysql_handle.get_all_counterfeit_feature(
            self.mongo_operate.get_web_title)
        self.counterfeit_text_dict = self.mysql_handle.get_all_counterfeit_feature(
            self.mongo_operate.get_web_text)

    # 任务执行中更新状态
    def update_running_state(self, title_check_num, title_find_num):
        '''
        在mysql中更新探测状态及结果
        '''

        table_name = 'task_result'
        fields = {
            'title_check_num': [title_check_num, 'd'],
            'title_find_num': [title_find_num, 'd']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        result = self.mysql_handle.require_post(table_name, fields, wheres,
                                                'update')

    # 任务完成更新状态
    def update_finished_state(self):
        '''
        在mysql中更新探测状态及结果
        '''
        run_time = int(time.time()) - int(self.run_start_time)
        table_name = 'task_result'
        fields = {
            'e_title_state': [03, 'd'],
            'title_run_time': [run_time, 's'],
            'title_check_num': [self.title_check_num, 'd'],
            'title_find_num': [self.title_find_num, 'd']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        result = self.mysql_handle.require_post(table_name, fields, wheres,
                                                'update')
        self.engine_over_handle()

    def engine_over_handle(self):
        # message to control
        send_result = self.message_other_engine(6, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(self.task_id,
                                                self.task_start_time, 0)
        sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))
        self.remove_process_pid(self.task_id)

    def run(self):
        self.run_start_time = time.time()
        self.write_process_pid(self.task_id)
        sys.stdout.write('%s  |*|title engine start|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))

        title_main = TitleMain(self.task_id, self.task_start_time,
                               self.protected_title_dict,
                               self.protected_text_dict, self.mongo_operate,
                               self.mysql_handle)
        update_count = 0
        counterfeit_get_gray_iter = []
        while True:
            try:
                gray_url = self.get_gray_iter.next()
                counterfeit_get_gray_iter.append(gray_url)
                check_result = title_main.title_run(gray_url)
                self.title_find_num += check_result
                self.title_check_num += 1
                update_count += 1
                if update_count >= self.once_update_num:
                    update_count = 0
                    self.update_running_state(self.title_check_num,
                                              self.title_find_num)
            except StopIteration:
                break
        title_main2 = TitleMain(self.task_id, self.task_start_time,
                                self.counterfeit_title_dict,
                                self.counterfeit_text_dict, self.mongo_operate,
                                self.mysql_handle, 'counterfeit')
        while True:
            try:
                gray_url = counterfeit_get_gray_iter.pop()
                check_result = title_main2.title_run(gray_url)
                self.title_find_num += check_result
                self.title_check_num += 1
                update_count += 1
                if update_count >= self.once_update_num:
                    update_count = 0
                    self.update_running_state(self.title_check_num,
                                              self.title_find_num)
            except IndexError:
                break
        self.update_finished_state()
예제 #3
0
class View_Emd_start(multiprocessing.Process):

    def __init__(self, task_id, current_path, mysql_host, mysql_db, mysql_user, mysql_password,
                 mongo_db, mongo_host, mongo_port, mongo_user, mongo_password, message_other_engine,
                 write_process_pid, remove_process_pid):
        super(View_Emd_start, self).__init__()
        self.task_id = task_id
        self.task_start_time = ''
        self.user_id = ''
        self.view_protected_objectid = ''
        self.view_gray_objectid = ''
        self.view_counterfeit_objectid = ''
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host,
                                         mysql_user, mysql_password)
        self.mongo_operate = Mongo_Operate(
            mongo_db, mongo_host, mongo_port, mongo_user, mongo_password)

        self.current_path = sys.path[0]
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid
        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        self.task_start_time = self.mysql_handle.get_task_last_time(
            self.task_id)
        saved_urls_iters = self.mysql_handle.read_saved_urls(
            self.task_id, self.mongo_operate)
        self.get_protected_iter = saved_urls_iters['get_protected_iter']
        self.get_gray_iter = saved_urls_iters['get_gray_iter']
        self.get_counterfeit_iter = saved_urls_iters['get_counterfeit_iter']
        self.get_monitor_iter = saved_urls_iters['get_monitor_iter']

        self.protected_title_dict = self.mysql_handle.get_all_protected_feature(
            self.mongo_operate.get_web_title)

        self.counterfeit_title_dict = self.mysql_handle.get_all_counterfeit_feature(
            self.mongo_operate.get_web_title)

    def update_running_state(self, finish_num, view_find_num):  # 任务执行中更新状态
        '''
        在mysql中更新探测状态及结果

        '''
        table_name = 'task_result'
        fields = {
            'view_check_num': [finish_num, 'd'], 'view_find_num': [view_find_num, 'd']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')

    def engine_over_handle(self):
        send_result = self.message_other_engine(6, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(
                self.task_id, self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write(
            '%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id))

    # 任务完成更新状态
    def update_finished_state(self, run_time, finish_num):
        '''
        在mysql中更新探测状态及结果
        '''
        table_name = 'task_result'
        fields = {'e_view_emd_state': [03, 'd'],
                  'view_emd_run_time': [run_time, 's']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')
        self.engine_over_handle()

    def run(self):
        finish_num = 0
        view_find_num = 0
        view_find_flags = 0
        start_time = time.time()

        View_emd = ViewEmd(self.mysql_handle, self.mongo_operate, self.task_id, self.task_start_time,
                           self.protected_title_dict, self.counterfeit_title_dict)
        while True:
            try:
                gray_url = self.get_gray_iter.next()
                view_find_flags = View_emd.emdcalculate(gray_url)
                finish_num += 1
                view_find_num += view_find_flags
                self.update_running_state(finish_num, view_find_num)
            except StopIteration:
                break
        run_time = int(time.time()) - int(start_time)
        #run_time = time.ctime(run_time)

        self.update_finished_state(run_time, finish_num)
예제 #4
0
class StructureStart(multiprocessing.Process):
    def __init__(self, task_id, mysql_host, mysql_db, mysql_user,
                 mysql_password, mongo_db, mongo_host, mongo_port, mongo_user,
                 mongo_password, message_other_engine, write_process_pid,
                 remove_process_pid, structure_num_compare_k,
                 structure_num_compare_b, structure_area_compare_k,
                 structure_area_compare_b):
        super(StructureStart, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid
        self.structure_num_compare_k = structure_num_compare_k
        self.structure_num_compare_b = structure_num_compare_b
        self.structure_area_compare_k = structure_area_compare_k
        self.structure_area_compare_b = structure_area_compare_b
        self.mongo_db = mongo_db
        self.mongo_host = mongo_host
        self.mongo_port = mongo_port
        self.mongo_user = mongo_user
        self.mongo_password = mongo_password

        # 初始化操作
        self.run_start_time = 0
        self.structure_check_num = 0  # 检查数量
        self.structure_find_num = 0  # 检查到钓鱼url的数量
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port,
                                           mongo_user, mongo_password)
        self.read_task_info()

    def read_task_info(self):
        self.task_start_time = self.mysql_handle.get_task_last_time(
            self.task_id)
        saved_urls_iters = self.mysql_handle.read_saved_urls(
            self.task_id, self.mongo_operate)
        self.get_gray_iter = saved_urls_iters['get_gray_iter']
        self.get_monitor_iter = saved_urls_iters['get_monitor_iter']
        self.protected_dict = self.mysql_handle.get_all_protected_feature(
            self.mongo_operate.get_web_tree)
        self.counterfeit_dict = self.mysql_handle.get_all_counterfeit_feature(
            self.mongo_operate.get_web_tree)

    # 任务执行中更新状态
    def update_running_state(self):
        '''
        在mysql中更新探测状态及结果
        '''
        table_name = 'task_result'
        fields = {
            'structure_check_num': [self.structure_check_num, 'd'],
            'structure_find_num': [self.structure_find_num, 'd'],
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')

    def update_finished_state(self):
        '''
        在mysql中更新探测状态及结果
        '''
        run_time = int(time.time()) - int(self.run_start_time)
        table_name = 'task_result'
        fields = {
            'e_structure_state': [03, 'd'],
            'structure_run_time': [run_time, 's'],
            'structure_check_num': [self.structure_check_num, 'd'],
            'structure_find_num': [self.structure_find_num, 'd']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')
        self.engine_over_handle()

    def engine_over_handle(self):
        # message to control
        send_result = self.message_other_engine(6, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(self.task_id,
                                                self.task_start_time, 0)
        sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))
        self.remove_process_pid(self.task_id)

    def run_structure_compare(self):
        structure_compare = StructureCompare(self.structure_num_compare_k,
                                             self.structure_num_compare_b,
                                             self.structure_area_compare_k,
                                             self.structure_area_compare_b)
        while True:
            try:
                gray_url = self.get_gray_iter.next()
                gray_block_list = self.mongo_operate.get_web_tree(
                    gray_url, 'gray')
                # mongo not have tree of url
                if gray_block_list is False or gray_block_list == []:
                    continue
                # cehck to protected
                for protected_url in self.protected_dict.keys():
                    protected_block_list = self.protected_dict[protected_url]
                    if protected_block_list == []:
                        continue
                    check_result = structure_compare.once_compare(
                        protected_block_list, gray_block_list)
                    if check_result == 1:
                        self.structure_find_num += 1
                        self.mysql_handle.undate_gray_list_check_result(
                            gray_url, 'structure', source_url=protected_url)
                        self.mysql_handle.undate_task_result_check_result(
                            self.task_id, self.task_start_time, gray_url,
                            'structure')
                        break
                # check to counterfeit
                for counterfeit_url in self.counterfeit_dict.keys():
                    counterfeit_block_list = self.counterfeit_dict[
                        counterfeit_url]
                    if counterfeit_block_list == []:
                        continue
                    check_result = structure_compare.once_compare(
                        counterfeit_block_list, gray_block_list)
                    if check_result == 1:
                        self.structure_find_num += 1
                        self.mysql_handle.undate_gray_list_check_result(
                            gray_url,
                            'structure',
                            counterfeit_url=counterfeit_url)
                        self.mysql_handle.undate_task_result_check_result(
                            self.task_id, self.task_start_time, gray_url,
                            'structure')
                        break
                self.structure_check_num += 1
                self.update_running_state()
            except StopIteration:
                break

    def run(self):
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.run_start_time = time.time()
        self.mysql_handle.update_engine_state(self.task_id,
                                              self.task_start_time,
                                              'structure', 2)
        self.run_structure_compare()
        self.update_finished_state()