class View_Emd_start(multiprocessing.Process): def __init__(self, task_id, current_path, mysql_host, mysql_db, mysql_user, mysql_password, mongo_db, mongo_host, mongo_port, mongo_user, mongo_password, message_other_engine, write_process_pid, remove_process_pid): super(View_Emd_start, self).__init__() self.task_id = task_id self.task_start_time = '' self.user_id = '' self.view_protected_objectid = '' self.view_gray_objectid = '' self.view_counterfeit_objectid = '' self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user, mysql_password) self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port, mongo_user, mongo_password) self.current_path = sys.path[0] self.message_other_engine = message_other_engine self.write_process_pid = write_process_pid self.remove_process_pid = remove_process_pid self.read_task_info() def read_task_info(self): ''' 读取任务信息 ''' self.task_start_time = self.mysql_handle.get_task_last_time( self.task_id) saved_urls_iters = self.mysql_handle.read_saved_urls( self.task_id, self.mongo_operate) self.get_protected_iter = saved_urls_iters['get_protected_iter'] self.get_gray_iter = saved_urls_iters['get_gray_iter'] self.get_counterfeit_iter = saved_urls_iters['get_counterfeit_iter'] self.get_monitor_iter = saved_urls_iters['get_monitor_iter'] self.protected_title_dict = self.mysql_handle.get_all_protected_feature( self.mongo_operate.get_web_title) self.counterfeit_title_dict = self.mysql_handle.get_all_counterfeit_feature( self.mongo_operate.get_web_title) def update_running_state(self, finish_num, view_find_num): # 任务执行中更新状态 ''' 在mysql中更新探测状态及结果 ''' table_name = 'task_result' fields = { 'view_check_num': [finish_num, 'd'], 'view_find_num': [view_find_num, 'd'] } wheres = { 'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's'] } self.mysql_handle.require_post(table_name, fields, wheres, 'update') def engine_over_handle(self): send_result = self.message_other_engine(6, ['00'], self.task_id) if send_result is False: # control engine no response, stop task self.mysql_handle.update_task_state(self.task_id, self.task_start_time, 0) self.remove_process_pid(self.task_id) sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id)) # 任务完成更新状态 def update_finished_state(self, run_time, finish_num): ''' 在mysql中更新探测状态及结果 ''' table_name = 'task_result' fields = { 'e_view_emd_state': [03, 'd'], 'view_emd_run_time': [run_time, 's'] } wheres = { 'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's'] } self.mysql_handle.require_post(table_name, fields, wheres, 'update') self.engine_over_handle() def run(self): finish_num = 0 view_find_num = 0 view_find_flags = 0 start_time = time.time() View_emd = ViewEmd(self.mysql_handle, self.mongo_operate, self.task_id, self.task_start_time, self.protected_title_dict, self.counterfeit_title_dict) while True: try: gray_url = self.get_gray_iter.next() view_find_flags = View_emd.emdcalculate(gray_url) finish_num += 1 view_find_num += view_find_flags self.update_running_state(finish_num, view_find_num) except StopIteration: break run_time = int(time.time()) - int(start_time) #run_time = time.ctime(run_time) self.update_finished_state(run_time, finish_num)
class Title_start(multiprocessing.Process): def __init__(self, task_id, mysql_host, mysql_db, mysql_user, mysql_password, mongo_db, mongo_host, mongo_port, mongo_user, mongo_password, message_other_engine, write_process_pid, remove_process_pid): super(Title_start, self).__init__() self.task_id = task_id self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user, mysql_password) self.task_start_time = '' self.user_id = '' self.gary_objectid = '' self.protected_list_id = [] self.get_protect_dict = {} self.message_other_engine = message_other_engine self.write_process_pid = write_process_pid self.remove_process_pid = remove_process_pid self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port, mongo_user, mongo_password) self.read_task_info() self.run_start_time = 0 self.title_check_num = 0 # 检查数量 self.title_find_num = 0 # 检查到钓鱼url的数量 # self.split_values = 10 # 设置数值,分割每多少个url更新入数据库 self.once_update_num = 1 def read_task_info(self): ''' 读取任务信息 ''' self.task_start_time = self.mysql_handle.get_task_last_time( self.task_id) saved_urls_iters = self.mysql_handle.read_saved_urls( self.task_id, self.mongo_operate) self.get_gray_iter = saved_urls_iters['get_gray_iter'] self.get_monitor_iter = saved_urls_iters['get_monitor_iter'] self.protected_title_dict = self.mysql_handle.get_all_protected_feature( self.mongo_operate.get_web_title) self.protected_text_dict = self.mysql_handle.get_all_protected_feature( self.mongo_operate.get_web_text) self.counterfeit_title_dict = self.mysql_handle.get_all_counterfeit_feature( self.mongo_operate.get_web_title) self.counterfeit_text_dict = self.mysql_handle.get_all_counterfeit_feature( self.mongo_operate.get_web_text) # 任务执行中更新状态 def update_running_state(self, title_check_num, title_find_num): ''' 在mysql中更新探测状态及结果 ''' table_name = 'task_result' fields = { 'title_check_num': [title_check_num, 'd'], 'title_find_num': [title_find_num, 'd'] } wheres = { 'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's'] } result = self.mysql_handle.require_post(table_name, fields, wheres, 'update') # 任务完成更新状态 def update_finished_state(self): ''' 在mysql中更新探测状态及结果 ''' run_time = int(time.time()) - int(self.run_start_time) table_name = 'task_result' fields = { 'e_title_state': [03, 'd'], 'title_run_time': [run_time, 's'], 'title_check_num': [self.title_check_num, 'd'], 'title_find_num': [self.title_find_num, 'd'] } wheres = { 'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's'] } result = self.mysql_handle.require_post(table_name, fields, wheres, 'update') self.engine_over_handle() def engine_over_handle(self): # message to control send_result = self.message_other_engine(6, ['00'], self.task_id) if send_result is False: # control engine no response, stop task self.mysql_handle.update_task_state(self.task_id, self.task_start_time, 0) sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id)) self.remove_process_pid(self.task_id) def run(self): self.run_start_time = time.time() self.write_process_pid(self.task_id) sys.stdout.write('%s |*|title engine start|*|, task_id: %s\n' % (time.ctime(), self.task_id)) title_main = TitleMain(self.task_id, self.task_start_time, self.protected_title_dict, self.protected_text_dict, self.mongo_operate, self.mysql_handle) update_count = 0 counterfeit_get_gray_iter = [] while True: try: gray_url = self.get_gray_iter.next() counterfeit_get_gray_iter.append(gray_url) check_result = title_main.title_run(gray_url) self.title_find_num += check_result self.title_check_num += 1 update_count += 1 if update_count >= self.once_update_num: update_count = 0 self.update_running_state(self.title_check_num, self.title_find_num) except StopIteration: break title_main2 = TitleMain(self.task_id, self.task_start_time, self.counterfeit_title_dict, self.counterfeit_text_dict, self.mongo_operate, self.mysql_handle, 'counterfeit') while True: try: gray_url = counterfeit_get_gray_iter.pop() check_result = title_main2.title_run(gray_url) self.title_find_num += check_result self.title_check_num += 1 update_count += 1 if update_count >= self.once_update_num: update_count = 0 self.update_running_state(self.title_check_num, self.title_find_num) except IndexError: break self.update_finished_state()
class View_Emd_start(multiprocessing.Process): def __init__(self, task_id, current_path, mysql_host, mysql_db, mysql_user, mysql_password, mongo_db, mongo_host, mongo_port, mongo_user, mongo_password, message_other_engine, write_process_pid, remove_process_pid): super(View_Emd_start, self).__init__() self.task_id = task_id self.task_start_time = '' self.user_id = '' self.view_protected_objectid = '' self.view_gray_objectid = '' self.view_counterfeit_objectid = '' self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user, mysql_password) self.mongo_operate = Mongo_Operate( mongo_db, mongo_host, mongo_port, mongo_user, mongo_password) self.current_path = sys.path[0] self.message_other_engine = message_other_engine self.write_process_pid = write_process_pid self.remove_process_pid = remove_process_pid self.read_task_info() def read_task_info(self): ''' 读取任务信息 ''' self.task_start_time = self.mysql_handle.get_task_last_time( self.task_id) saved_urls_iters = self.mysql_handle.read_saved_urls( self.task_id, self.mongo_operate) self.get_protected_iter = saved_urls_iters['get_protected_iter'] self.get_gray_iter = saved_urls_iters['get_gray_iter'] self.get_counterfeit_iter = saved_urls_iters['get_counterfeit_iter'] self.get_monitor_iter = saved_urls_iters['get_monitor_iter'] self.protected_title_dict = self.mysql_handle.get_all_protected_feature( self.mongo_operate.get_web_title) self.counterfeit_title_dict = self.mysql_handle.get_all_counterfeit_feature( self.mongo_operate.get_web_title) def update_running_state(self, finish_num, view_find_num): # 任务执行中更新状态 ''' 在mysql中更新探测状态及结果 ''' table_name = 'task_result' fields = { 'view_check_num': [finish_num, 'd'], 'view_find_num': [view_find_num, 'd']} wheres = {'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's']} self.mysql_handle.require_post( table_name, fields, wheres, 'update') def engine_over_handle(self): send_result = self.message_other_engine(6, ['00'], self.task_id) if send_result is False: # control engine no response, stop task self.mysql_handle.update_task_state( self.task_id, self.task_start_time, 0) self.remove_process_pid(self.task_id) sys.stdout.write( '%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id)) # 任务完成更新状态 def update_finished_state(self, run_time, finish_num): ''' 在mysql中更新探测状态及结果 ''' table_name = 'task_result' fields = {'e_view_emd_state': [03, 'd'], 'view_emd_run_time': [run_time, 's']} wheres = {'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's']} self.mysql_handle.require_post( table_name, fields, wheres, 'update') self.engine_over_handle() def run(self): finish_num = 0 view_find_num = 0 view_find_flags = 0 start_time = time.time() View_emd = ViewEmd(self.mysql_handle, self.mongo_operate, self.task_id, self.task_start_time, self.protected_title_dict, self.counterfeit_title_dict) while True: try: gray_url = self.get_gray_iter.next() view_find_flags = View_emd.emdcalculate(gray_url) finish_num += 1 view_find_num += view_find_flags self.update_running_state(finish_num, view_find_num) except StopIteration: break run_time = int(time.time()) - int(start_time) #run_time = time.ctime(run_time) self.update_finished_state(run_time, finish_num)
class StructureStart(multiprocessing.Process): def __init__(self, task_id, mysql_host, mysql_db, mysql_user, mysql_password, mongo_db, mongo_host, mongo_port, mongo_user, mongo_password, message_other_engine, write_process_pid, remove_process_pid, structure_num_compare_k, structure_num_compare_b, structure_area_compare_k, structure_area_compare_b): super(StructureStart, self).__init__() self.task_id = task_id self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user, mysql_password) self.message_other_engine = message_other_engine self.write_process_pid = write_process_pid self.remove_process_pid = remove_process_pid self.structure_num_compare_k = structure_num_compare_k self.structure_num_compare_b = structure_num_compare_b self.structure_area_compare_k = structure_area_compare_k self.structure_area_compare_b = structure_area_compare_b self.mongo_db = mongo_db self.mongo_host = mongo_host self.mongo_port = mongo_port self.mongo_user = mongo_user self.mongo_password = mongo_password # 初始化操作 self.run_start_time = 0 self.structure_check_num = 0 # 检查数量 self.structure_find_num = 0 # 检查到钓鱼url的数量 self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port, mongo_user, mongo_password) self.read_task_info() def read_task_info(self): self.task_start_time = self.mysql_handle.get_task_last_time( self.task_id) saved_urls_iters = self.mysql_handle.read_saved_urls( self.task_id, self.mongo_operate) self.get_gray_iter = saved_urls_iters['get_gray_iter'] self.get_monitor_iter = saved_urls_iters['get_monitor_iter'] self.protected_dict = self.mysql_handle.get_all_protected_feature( self.mongo_operate.get_web_tree) self.counterfeit_dict = self.mysql_handle.get_all_counterfeit_feature( self.mongo_operate.get_web_tree) # 任务执行中更新状态 def update_running_state(self): ''' 在mysql中更新探测状态及结果 ''' table_name = 'task_result' fields = { 'structure_check_num': [self.structure_check_num, 'd'], 'structure_find_num': [self.structure_find_num, 'd'], } wheres = { 'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's'] } self.mysql_handle.require_post(table_name, fields, wheres, 'update') def update_finished_state(self): ''' 在mysql中更新探测状态及结果 ''' run_time = int(time.time()) - int(self.run_start_time) table_name = 'task_result' fields = { 'e_structure_state': [03, 'd'], 'structure_run_time': [run_time, 's'], 'structure_check_num': [self.structure_check_num, 'd'], 'structure_find_num': [self.structure_find_num, 'd'] } wheres = { 'task_id': [self.task_id, 'd'], 'start_time': [self.task_start_time, 's'] } self.mysql_handle.require_post(table_name, fields, wheres, 'update') self.engine_over_handle() def engine_over_handle(self): # message to control send_result = self.message_other_engine(6, ['00'], self.task_id) if send_result is False: # control engine no response, stop task self.mysql_handle.update_task_state(self.task_id, self.task_start_time, 0) sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id)) self.remove_process_pid(self.task_id) def run_structure_compare(self): structure_compare = StructureCompare(self.structure_num_compare_k, self.structure_num_compare_b, self.structure_area_compare_k, self.structure_area_compare_b) while True: try: gray_url = self.get_gray_iter.next() gray_block_list = self.mongo_operate.get_web_tree( gray_url, 'gray') # mongo not have tree of url if gray_block_list is False or gray_block_list == []: continue # cehck to protected for protected_url in self.protected_dict.keys(): protected_block_list = self.protected_dict[protected_url] if protected_block_list == []: continue check_result = structure_compare.once_compare( protected_block_list, gray_block_list) if check_result == 1: self.structure_find_num += 1 self.mysql_handle.undate_gray_list_check_result( gray_url, 'structure', source_url=protected_url) self.mysql_handle.undate_task_result_check_result( self.task_id, self.task_start_time, gray_url, 'structure') break # check to counterfeit for counterfeit_url in self.counterfeit_dict.keys(): counterfeit_block_list = self.counterfeit_dict[ counterfeit_url] if counterfeit_block_list == []: continue check_result = structure_compare.once_compare( counterfeit_block_list, gray_block_list) if check_result == 1: self.structure_find_num += 1 self.mysql_handle.undate_gray_list_check_result( gray_url, 'structure', counterfeit_url=counterfeit_url) self.mysql_handle.undate_task_result_check_result( self.task_id, self.task_start_time, gray_url, 'structure') break self.structure_check_num += 1 self.update_running_state() except StopIteration: break def run(self): # write child process pid to engine pids self.write_process_pid(self.task_id) self.run_start_time = time.time() self.mysql_handle.update_engine_state(self.task_id, self.task_start_time, 'structure', 2) self.run_structure_compare() self.update_finished_state()