Ejemplo n.º 1
0
class WebSavestart(multiprocessing.Process):

    def __init__(self, task_id, mysql_host, mysql_db,
                 mysql_user, mysql_password, mongo_db, mongo_host, mongo_port,
                 mongo_user, mongo_password, message_other_engine,
                 write_process_pid, remove_process_pid):
        super(WebSavestart, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host,
                                         mysql_user, mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid

        # 初始化操作
        self.task_start_time = ''
        self.user_id = ''
        self.protected_urls = []
        self.counterfeit_urls = []
        self.gray_urls = []
        self.monitor_urls = []
        self.url_num = 0
        self.gary_objectid = ''
        self.file_context = ''
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host,
                                           mongo_port, mongo_user,
                                           mongo_password)
        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        table_name = 'task_info'
        fields = ['last_time', 'user_id', 'protected_id', 'gray_id',
                  'counterfeit_id', 'monitor_id']
        wheres = {'task_id': [self.task_id, 'd']}
        task_info = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        if task_info is False:
            sys.stderr.write(
                '%s  task no exist, task_id: %s\n' % (time.ctime(), self.task_id))
            os._exit(0)
        self.task_start_time = task_info['last_time']
        self.user_id = task_info['user_id']
        original_protected_list = task_info['protected_id']
        original_counterfeit_list = task_info['counterfeit_id']
        original_gray_list = task_info['gray_id']
        original_monitor_list = task_info['monitor_id']

        # get protected url, all test may have protected url to save
        if original_protected_list is not None and original_protected_list != '':
            protected_id_list = original_protected_list.split('-')
            table_name = 'protected_list'
            fields = ['url']
            for protected_id in protected_id_list:  # 读取mysql中的被保护名单
                wheres = {'id': [int(protected_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                protected_url = select_result['url'].encode('utf-8')
                self.protected_urls.append(protected_url)
        # get counterfeit url in mysql counterfeit_list
        if original_counterfeit_list is not None and original_counterfeit_list != '':
            counterfeit_id_list = original_counterfeit_list.split('-')
            table_name = 'counterfeit_list'
            fields = ['url']
            for counterfeit_id in counterfeit_id_list:
                wheres = {'id': [int(counterfeit_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                counterfeit_url = select_result['url'].encode('utf-8')
                self.counterfeit_urls.append(counterfeit_url)
        # get gray url in mysql gray_list
        if original_gray_list is not None and original_gray_list != '':
            gray_id_list = original_gray_list.split('-')
            table_name = 'gray_list'
            fields = ['url']
            for gray_id in gray_id_list:
                wheres = {'id': [int(gray_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                gray_url = select_result['url'].encode('utf-8')
                self.gray_urls.append(gray_url)
        # get monitor url in mysql monitor_list
        if original_monitor_list is not None and original_monitor_list != '':
            monitor_id_list = original_monitor_list.split('-')
            table_name = 'monitor_list'
            fields = ['url']
            for monitor_id in monitor_id_list:
                wheres = {'id': [int(monitor_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                monitor_url = select_result['url'].encode('utf-8')
                self.monitor_urls.append(monitor_url)
        # get suspected url
        table_name = 'task_result'
        fields = ['filtrate_objectid']
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        select_result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        if select_result is not False:
            self.gary_objectid = select_result['filtrate_objectid']
            if self.gary_objectid is None:
                self.get_gray_iter = iter([])
                self.gray_url_num = 0
            else:
                self.gray_url_num = self.mongo_operate.get_gray_num(
                    self.gary_objectid)
                self.gary_objectid = self.mongo_operate.expand_gray_list(
                    self.gary_objectid)
                self.get_gray_iter = self.mongo_operate.get_gray_list(
                    self.gary_objectid)
        else:
            self.get_gray_iter = iter([])
            self.gray_url_num = 0
        self.url_num = self.gray_url_num + \
            len(self.protected_urls) + len(self.gray_urls) + \
            len(self.counterfeit_urls) + len(self.monitor_urls)

    def update_running_state(self, saved_num, request_num):  # 任务执行中更新状态
        '''
        在mysql中更新探测状态及结果
        '''
        table_name = 'task_result'
        fields = {'web_save_num': [saved_num, 'd'],
                  'web_request_num': [request_num, 'd']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')

    def add_saved_ulr_mongo(self, ulist):
        '''
        将保存的URL分类存入mongo中
        '''
        saved_protected_urls = []
        saved_gray_urls = []
        saved_counterfeit_urls = []
        saved_monitor_urls = []
        # url is like ['http://www.taobao.com/', 'gray\n'] delete download_urls
        # url last '/n'
        for url in ulist:
            if url[1] == 'gray':
                saved_gray_urls.append(url[0])
            elif url[1] == 'protected':
                saved_protected_urls.append(url[0])
            elif url[1] == 'counterfeit':
                saved_counterfeit_urls.append(url[0])
            elif url[1] == 'monitor':
                saved_monitor_urls.append(url[0])
        if saved_gray_urls != []:
            self.save_gray_objectID = self.mongo_operate.create_gray(
                gray_name='save_gray_urls', gray_type='websave', usr_id=self.user_id)
            self.mongo_operate.add_gray_list(
                saved_gray_urls, self.save_gray_objectID)
        else:
            self.save_gray_objectID = ''
        if saved_protected_urls != []:
            self.save_protected_objectID = self.mongo_operate.create_gray(
                gray_name='saved_protected_urls', gray_type='websave', usr_id=self.user_id)
            self.mongo_operate.add_gray_list(
                saved_protected_urls, self.save_protected_objectID)
        else:
            self.save_protected_objectID = ''
        if saved_counterfeit_urls != []:
            self.save_counterfeit_objectID = self.mongo_operate.create_gray(
                gray_name='saved_counterfeit_urls', gray_type='websave', usr_id=self.user_id)
            self.mongo_operate.add_gray_list(
                saved_counterfeit_urls, self.save_counterfeit_objectID)
        else:
            self.save_counterfeit_objectID = ''
        if saved_monitor_urls != []:
            self.save_monitor_objectID = self.mongo_operate.create_gray(
                gray_name='saved_monitor_urls', gray_type='websave', usr_id=self.user_id)
            self.mongo_operate.add_gray_list(
                saved_monitor_urls, self.save_monitor_objectID)
        else:
            self.save_monitor_objectID = ''

    def update_finished_state(self, ulist, run_time, request_num):
        '''
        在mysql中更新探测状态及结果
        '''
        if ['http://cpuzt.cc/', 'gray'] not in ulist:
            ulist.append(['http://cpuzt.cc/', 'gray'])
        if ['http://www.138.gg/', 'gray'] not in ulist:
            ulist.append(['http://www.138.gg/', 'gray'])
        if ['http://www.bjstkc.com/', 'gray'] not in ulist:
            ulist.append(['http://www.bjstkc.com/', 'gray'])
        self.add_saved_ulr_mongo(ulist)
        saved_num = len(ulist)
        table_name = 'task_result'
        fields = {'e_web_save_state': [03, 'd'],
                  'web_save_num': [saved_num, 'd'],
                  'web_request_num': [request_num, 'd'],
                  'web_save_run_time': [run_time, 's'],
                  'save_protected_objectid': [self.save_protected_objectID, 's'],
                  'save_counterfeit_objectid': [self.save_counterfeit_objectID, 's'],
                  'save_monitor_objectid': [self.save_monitor_objectID, 's'],
                  'save_gray_objectid': [self.save_gray_objectID, 's']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')

        if ulist == []:
            send_result = self.message_other_engine(9, ['00'], self.task_id)
        else:
            send_result = self.message_other_engine(3, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(
                self.task_id, self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write(
            '%s |*|web_save engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id))

    def run(self):
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.mysql_handle.update_engine_state(
            self.task_id, self.task_start_time, 'web_save', 2)
        engine = WebSave(self.task_id, self.protected_urls, self.get_gray_iter, self.gray_urls,
                         self.counterfeit_urls, self.monitor_urls, self.url_num,
                         self.update_running_state, self.update_finished_state,
                         self.mongo_operate)
        engine.download()
        reactor.run(installSignalHandlers=0)
Ejemplo n.º 2
0
class WebSavestart(multiprocessing.Process):
    def __init__(self, task_id, mysql_host, mysql_db, mysql_user,
                 mysql_password, mongo_db, mongo_host, mongo_port, mongo_user,
                 mongo_password, message_other_engine, write_process_pid,
                 remove_process_pid):
        super(WebSavestart, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid

        # 初始化操作
        self.task_start_time = ''
        self.user_id = ''
        self.protected_urls = []
        self.counterfeit_urls = []
        self.gray_urls = []
        self.monitor_urls = []
        self.url_num = 0
        self.gary_objectid = ''
        self.file_context = ''
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port,
                                           mongo_user, mongo_password)
        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        table_name = 'task_info'
        fields = [
            'last_time', 'user_id', 'protected_id', 'gray_id',
            'counterfeit_id', 'monitor_id'
        ]
        wheres = {'task_id': [self.task_id, 'd']}
        task_info = self.mysql_handle.require_get(table_name, fields, wheres,
                                                  'select', 'one')
        if task_info is False:
            sys.stderr.write('%s  task no exist, task_id: %s\n' %
                             (time.ctime(), self.task_id))
            os._exit(0)
        self.task_start_time = task_info['last_time']
        self.user_id = task_info['user_id']
        original_protected_list = task_info['protected_id']
        original_counterfeit_list = task_info['counterfeit_id']
        original_gray_list = task_info['gray_id']
        original_monitor_list = task_info['monitor_id']

        # get protected url, all test may have protected url to save
        if original_protected_list is not None and original_protected_list != '':
            protected_id_list = original_protected_list.split('-')
            table_name = 'protected_list'
            fields = ['url']
            for protected_id in protected_id_list:  # 读取mysql中的被保护名单
                wheres = {'id': [int(protected_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                protected_url = select_result['url'].encode('utf-8')
                self.protected_urls.append(protected_url)
        # get counterfeit url in mysql counterfeit_list
        if original_counterfeit_list is not None and original_counterfeit_list != '':
            counterfeit_id_list = original_counterfeit_list.split('-')
            table_name = 'counterfeit_list'
            fields = ['url']
            for counterfeit_id in counterfeit_id_list:
                wheres = {'id': [int(counterfeit_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                counterfeit_url = select_result['url'].encode('utf-8')
                self.counterfeit_urls.append(counterfeit_url)
        # get gray url in mysql gray_list
        if original_gray_list is not None and original_gray_list != '':
            gray_id_list = original_gray_list.split('-')
            table_name = 'gray_list'
            fields = ['url']
            for gray_id in gray_id_list:
                wheres = {'id': [int(gray_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                gray_url = select_result['url'].encode('utf-8')
                self.gray_urls.append(gray_url)
        # get monitor url in mysql monitor_list
        if original_monitor_list is not None and original_monitor_list != '':
            monitor_id_list = original_monitor_list.split('-')
            table_name = 'monitor_list'
            fields = ['url']
            for monitor_id in monitor_id_list:
                wheres = {'id': [int(monitor_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                monitor_url = select_result['url'].encode('utf-8')
                self.monitor_urls.append(monitor_url)
        # get suspected url
        table_name = 'task_result'
        fields = ['filtrate_objectid']
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        select_result = self.mysql_handle.require_get(table_name, fields,
                                                      wheres, 'select', 'one')
        if select_result is not False:
            self.gary_objectid = select_result['filtrate_objectid']
            if self.gary_objectid is None:
                self.get_gray_iter = iter([])
                self.gray_url_num = 0
            else:
                self.gray_url_num = self.mongo_operate.get_gray_num(
                    self.gary_objectid)
                self.gary_objectid = self.mongo_operate.expand_gray_list(
                    self.gary_objectid)
                self.get_gray_iter = self.mongo_operate.get_gray_list(
                    self.gary_objectid)
        else:
            self.get_gray_iter = iter([])
            self.gray_url_num = 0
        self.url_num = self.gray_url_num + \
            len(self.protected_urls) + len(self.gray_urls) + \
            len(self.counterfeit_urls) + len(self.monitor_urls)

    def update_running_state(self, saved_num, request_num):  # 任务执行中更新状态
        '''
        在mysql中更新探测状态及结果
        '''
        table_name = 'task_result'
        fields = {
            'web_save_num': [saved_num, 'd'],
            'web_request_num': [request_num, 'd']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')

    def add_saved_ulr_mongo(self, ulist):
        '''
        将保存的URL分类存入mongo中
        '''
        saved_protected_urls = []
        saved_gray_urls = []
        saved_counterfeit_urls = []
        saved_monitor_urls = []
        # url is like ['http://www.taobao.com/', 'gray\n'] delete download_urls
        # url last '/n'
        for url in ulist:
            if url[1] == 'gray':
                saved_gray_urls.append(url[0])
            elif url[1] == 'protected':
                saved_protected_urls.append(url[0])
            elif url[1] == 'counterfeit':
                saved_counterfeit_urls.append(url[0])
            elif url[1] == 'monitor':
                saved_monitor_urls.append(url[0])
        if saved_gray_urls != []:
            self.save_gray_objectID = self.mongo_operate.create_gray(
                gray_name='save_gray_urls',
                gray_type='websave',
                usr_id=self.user_id)
            self.mongo_operate.add_gray_list(saved_gray_urls,
                                             self.save_gray_objectID)
        else:
            self.save_gray_objectID = ''
        if saved_protected_urls != []:
            self.save_protected_objectID = self.mongo_operate.create_gray(
                gray_name='saved_protected_urls',
                gray_type='websave',
                usr_id=self.user_id)
            self.mongo_operate.add_gray_list(saved_protected_urls,
                                             self.save_protected_objectID)
        else:
            self.save_protected_objectID = ''
        if saved_counterfeit_urls != []:
            self.save_counterfeit_objectID = self.mongo_operate.create_gray(
                gray_name='saved_counterfeit_urls',
                gray_type='websave',
                usr_id=self.user_id)
            self.mongo_operate.add_gray_list(saved_counterfeit_urls,
                                             self.save_counterfeit_objectID)
        else:
            self.save_counterfeit_objectID = ''
        if saved_monitor_urls != []:
            self.save_monitor_objectID = self.mongo_operate.create_gray(
                gray_name='saved_monitor_urls',
                gray_type='websave',
                usr_id=self.user_id)
            self.mongo_operate.add_gray_list(saved_monitor_urls,
                                             self.save_monitor_objectID)
        else:
            self.save_monitor_objectID = ''

    def update_finished_state(self, ulist, run_time, request_num):
        '''
        在mysql中更新探测状态及结果
        '''
        if ['http://cpuzt.cc/', 'gray'] not in ulist:
            ulist.append(['http://cpuzt.cc/', 'gray'])
        if ['http://www.138.gg/', 'gray'] not in ulist:
            ulist.append(['http://www.138.gg/', 'gray'])
        if ['http://www.bjstkc.com/', 'gray'] not in ulist:
            ulist.append(['http://www.bjstkc.com/', 'gray'])
        self.add_saved_ulr_mongo(ulist)
        saved_num = len(ulist)
        table_name = 'task_result'
        fields = {
            'e_web_save_state': [03, 'd'],
            'web_save_num': [saved_num, 'd'],
            'web_request_num': [request_num, 'd'],
            'web_save_run_time': [run_time, 's'],
            'save_protected_objectid': [self.save_protected_objectID, 's'],
            'save_counterfeit_objectid': [self.save_counterfeit_objectID, 's'],
            'save_monitor_objectid': [self.save_monitor_objectID, 's'],
            'save_gray_objectid': [self.save_gray_objectID, 's']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')

        if ulist == []:
            send_result = self.message_other_engine(9, ['00'], self.task_id)
        else:
            send_result = self.message_other_engine(3, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(self.task_id,
                                                self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write('%s |*|web_save engine win over|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))

    def run(self):
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.mysql_handle.update_engine_state(self.task_id,
                                              self.task_start_time, 'web_save',
                                              2)
        engine = WebSave(self.task_id, self.protected_urls, self.get_gray_iter,
                         self.gray_urls, self.counterfeit_urls,
                         self.monitor_urls, self.url_num,
                         self.update_running_state, self.update_finished_state,
                         self.mongo_operate)
        engine.download()
        reactor.run(installSignalHandlers=0)
Ejemplo n.º 3
0
class FiltrateStart(multiprocessing.Process):

    def __init__(self, task_id, mysql_host, mysql_db,
                 mysql_user, mysql_password, mongo_db, mongo_host, mongo_port,
                 mongo_user, mongo_password, message_other_engine,
                 write_process_pid, remove_process_pid):
        super(FiltrateStart, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host,
                                         mysql_user, mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid

        # 初始化操作
        self.user_id = ''
        self.gray_urls = []
        self.task_start_time = ''
        self.run_start_time = 0
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host,
                                           mongo_port, mongo_user,
                                           mongo_password)
        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        table_name = 'task_info'
        fields = ['last_time', 'user_id', 'gray_id']
        wheres = {'task_id': [self.task_id, 'd']}
        task_info = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        if task_info is False:
            sys.stderr.write(
                '%s  task no exist, task_id: %s\n' % (time.ctime(), self.task_id))
            os._exit(0)
        self.task_start_time = task_info['last_time']
        self.user_id = task_info['user_id']
        gray_id = task_info['gray_id']
        # read gray url
        if gray_id is not None and gray_id != '':
            gray_id = gray_id.split('-')
            table_name = 'gray_list'
            fields = ['url']
            for once_gray_id in gray_id:
                wheres = {'id': [int(once_gray_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                gray_url = select_result['url'].encode('utf-8')
                self.gray_urls.append(gray_url)
        # read detected url
        table_name = 'task_result'
        fields = ['original_grayid']
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        select_result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        gary_objectid = select_result['original_grayid']
        if gary_objectid is not None and gary_objectid != '':
            gary_objectid = self.mongo_operate.expand_gray_list(
                gary_objectid)
            self.get_gray_iter = self.mongo_operate.get_gray_list(
                gary_objectid)
        else:
            self.get_gray_iter = iter([])

    def update_finish_state(self, trusted_filtrate_num, counterfeit_filtrate_num, filtrate_objectid,
                            filtrate_trusted_objectid, filtrate_counterfeit_objectid):
        '''
        task run over, update information in mysql
        '''
        run_time = int(time.time() - self.run_start_time)
        table_name = 'task_result'
        fields = {'e_filtrate_state': [03, 'd'],
                  'filtrate_trusted_num': [trusted_filtrate_num, 'd'],
                  'filtrate_counterfeit_num': [counterfeit_filtrate_num, 'd'],
                  'filtrate_run_time': [run_time, 's'],
                  'filtrate_objectid': [filtrate_objectid, 's'],
                  'filtrate_trusted_objectid': [filtrate_trusted_objectid, 's'],
                  'filtrate_counterfeit_objectid': [filtrate_counterfeit_objectid, 's']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')
        # message to control
        send_result = self.message_other_engine(2, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(
                self.task_id, self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write(
            '%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id))

    def trusted_select(self, gray_url):
        '''
        在被信任名单中查询
        '''
        table_name = 'trusted_list'
        fields = ['*']
        wheres = {'url': [gray_url, 's']}
        select_result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one', 0)
        return select_result

    def counterfeit_select(self, gray_url):
        '''
        在仿冒名单中查询
        '''
        table_name = 'counterfeit_list'
        fields = ['*']
        wheres = {'url': [gray_url, 's']}
        select_result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one', 0)
        return select_result

    def run(self):
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.run_start_time = time.time()
        self.mysql_handle.update_engine_state(
            self.task_id, self.task_start_time, 'filtrate', 2)

        trusted_filtrate_num = 0
        counterfeit_filtrate_num = 0
        suspect_grays = []  # not filtrate url
        trusted_grays = []
        counterfeit_grays = []
        while 1:
            try:
                gray_url = self.get_gray_iter.next()
            except StopIteration:
                try:
                    gray_url = self.gray_urls.pop()
                except IndexError:
                    break
            '''
            对gray_url进行黑白名单比对,属于黑白名单则更新filtrate_num,
            否则放到suspect_grays中
            '''
            select_result = self.trusted_select(gray_url)
            if select_result is not False:
                trusted_filtrate_num += 1
                trusted_grays.append(gray_url)
                continue
            else:
                select_result = self.counterfeit_select(gray_url)
                if select_result is not False:
                    counterfeit_filtrate_num += 1
                    counterfeit_grays.append(gray_url)
                    continue
                else:
                    suspect_grays.append(gray_url)
        # not filtrate url add gray_list in mongo
        filtrate_objectid = self.mongo_operate.create_gray(
            gray_name='suspect_grays', gray_type='filtrate',
            usr_id=self.user_id, task_id=self.task_id)
        self.mongo_operate.add_gray_list(
            suspect_grays, filtrate_objectid)
        filtrate_trusted_objectid = self.mongo_operate.create_gray(
            gray_name='trusted_grays', gray_type='filtrate',
            usr_id=self.user_id, task_id=self.task_id)
        self.mongo_operate.add_gray_list(
            trusted_grays, filtrate_trusted_objectid)
        filtrate_counterfeit_objectid = self.mongo_operate.create_gray(
            gray_name='counterfeit_grays', gray_type='filtrate',
            usr_id=self.user_id, task_id=self.task_id)
        self.mongo_operate.add_gray_list(
            counterfeit_grays, filtrate_counterfeit_objectid)
        self.update_finish_state(
            trusted_filtrate_num, counterfeit_filtrate_num, filtrate_objectid,
            filtrate_trusted_objectid, filtrate_counterfeit_objectid)
Ejemplo n.º 4
0
class MainControl(ServerBase):

    def __init__(self):
        super(MainControl, self).__init__('control')
        self.mysql_handle = MysqlOperate(self.mysql_db, self.mysql_host,
                                         self.mysql_user, self. mysql_password)

    def read_task_info(self, task_id):
        '''
        read task type and run engine
        '''
        table_name = 'task_info'
        fields = ['task_type', 'task_engine']
        wheres = {'task_id': [task_id, 'd']}
        task_info = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        if task_info is False:
            sys.stderr.write(
                '%s  task no exist, task_id: %s\n' % (time.ctime(), task_id))
            os._exit(0)
        task_type = task_info['task_type']
        task_engines = task_info['task_engine'].split('-')
        return task_type, task_engines

    def read_running_engine(self, task_id):
        task_start_time = self.mysql_handle.get_task_last_time(task_id)
        table_name = 'task_result'
        fields = ['e_domain_state', 'e_search_state', 'e_filtrate_state',
                  'e_web_save_state', 'e_qt_crawler_state', 'e_feature_save_state',
                  'e_whois_search_state', 'e_title_state', 'e_structure_state',
                  'e_view_collect_state', 'e_view_emd_state']
        wheres = {'task_id': [task_id, 'd'],
                  'start_time': [task_start_time, 's']}
        select_result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        if select_result is False:
            return False
        running_engine_list = []
        for engine in select_result:
            engine_state = select_result[engine]
            if engine_state == 2:
                engine_num = engine_list[engine[2:-6]]
                running_engine_list.append(engine_num)
        print 'running_engine_list', running_engine_list
        return running_engine_list

    def update_start_state(self, task_id):
        task_start_time = self.mysql_handle.get_task_last_time(task_id)
        self.mysql_handle.update_task_state(task_id, task_start_time, 2)

    def update_finished_state(self, task_id, task_state=3):
        # update task finished state to mysql: task_state, task_run_time, task_stop_time
        # get task last_time in task_info
        task_start_time = self.mysql_handle.get_task_last_time(task_id)
        # update task_state, task_run_time, task_stop_time
        task_start_time_stamp = time.mktime(time.strptime(str(task_start_time),
                                                          "%Y-%m-%d %H:%M:%S"))
        task_stop_time_stamp = time.time()
        task_run_time = task_stop_time_stamp - task_start_time_stamp
        task_stop_time = time.strftime(
            '%Y-%m-%d %H:%M:%S', time.localtime(task_stop_time_stamp))
        table_name = 'task_result'
        fields = {'task_state': [task_state, 'd'],
                  'task_run_time': [task_run_time, 'd'],
                  'task_stop_time': [task_stop_time, 's']}
        wheres = {'task_id': [task_id, 'd'],
                  'start_time': [task_start_time, 's']}
        result = self.mysql_handle.require_post(
            table_name, fields, wheres, post_type='update')
        sys.stdout.write(
            '%s |*|task win over|*|, task_id: %s, task_state: %s\n' % (time.ctime(), task_id, task_state))
        return result

    def check_engine_state(self, task_id, task_type, engines):
        '''
        Determine whether all the detection engine run over
        '''
        task_start_time = self.mysql_handle.get_task_last_time(task_id)
        table_name = 'task_result'
        fields = ['e_title_state', 'e_structure_state', 'e_view_emd_state']
        wheres = {'task_id': [task_id, 'd'],
                  'start_time': [task_start_time, 's']}
        task_result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        e_title_state = task_result['e_title_state']
        e_structure_state = task_result['e_structure_state']
        e_view_emd_state = task_result['e_view_emd_state']
        if '08' in engines or task_type == 5:
            if e_title_state != 3:
                return False
        if '09' in engines or task_type == 5:
            if e_structure_state != 3:
                return False
        if '10' in engines or task_type == 5:
            if e_view_emd_state != 3:
                return False
        return True

    '''
    重写响应函数
    '''

    def web_request_start(self, task_id):
        '''
        重写守护进程基类,响应请求
        '''
        sys.stdout.write('%s  control receive task_id start request: %s\n' %
                         (time.ctime(), task_id))
        message_result = False
        task_type, task_engines = self.read_task_info(task_id)
        if task_type == 1 or task_type == 3:
            if '01' in task_engines:
                message_result = self.message_other_engine(0, ['01'], task_id)
                self.update_start_state(task_id)
            if '02' in task_engines:
                message_result = self.message_other_engine(0, ['02'], task_id)
                self.update_start_state(task_id)
            if '13' in task_engines:
                message_result = self.message_other_engine(0, ['13'], task_id)
                self.update_start_state(task_id)
        elif task_type == 2:
            # 04: filtrate engine, check first filtrate
            message_result = self.message_other_engine(0, ['04'], task_id)
            self.update_start_state(task_id)
        elif task_type == 4:
            # 05: web save engine
            message_result = self.message_other_engine(0, ['05'], task_id)
            self.update_start_state(task_id)
        elif task_type == 5:
            # 13: whois search engine
            message_result = self.message_other_engine(0, ['13'], task_id)
            self.update_start_state(task_id)
        else:
            sys.stderr.write(
                '%s  task_type error, task_id: %s, task_type: %d' % (time.ctime(), task_id, task_type))
        return message_result

    def web_request_stop(self, task_id):
        '''
        主控服务响应前台客户端任务结束请求
        '''
        sys.stdout.write('%s  control receive task_id stop request: %s\n' %
                         (time.ctime(), task_id))
        running_engine_list = self.read_running_engine(task_id)
        message_result = self.message_other_engine(
            1, running_engine_list, task_id)
        if message_result is True:
            stop_result = self.update_finished_state(task_id)
        else:
            stop_result = False
        return stop_result

    def filtrate_to_control(self, task_id):
        '''
        message 2: filtrate engine finished message control
        '''
        sys.stdout.write('%s  control receive from filtrate engine task_id: %s\n' %
                         (time.ctime(), task_id))
        # 05: web_save engine, end filtrate,start web_save
        self.message_other_engine(0, ['05'], task_id)

    def web_save_to_control(self, task_id):
        '''
        message 3: web_save engine finished message control
        '''
        sys.stdout.write('%s  control receive from web_save engine task_id: %s\n' %
                         (time.ctime(), task_id))
        # 06: qt_crawler engine, 08: title engine
        # end web_save_,start qt_crawler and title engine
        task_type, task_engines = self.read_task_info(task_id)
        self.message_other_engine(0, ['06'], task_id)
        if '08' in task_engines or task_type == 5:
            self.message_other_engine(0, ['08'], task_id)

    def qt_crawler_to_control(self, task_id):
        '''
        message 4: qt_crawler engine finished message control
        '''
        sys.stdout.write('%s  control receive from qt_crawler engine task_id: %s\n' %
                         (time.ctime(), task_id))
        # 09: structure engine, 10: view engine
        # end qt_crawler,start structure and view engine
        task_type, task_engines = self.read_task_info(task_id)
        self.message_other_engine(0, ['12'], task_id)
        self.message_other_engine(0, ['07'], task_id)
        if task_type == 5 or '09' in task_engines:
            self.message_other_engine(0, ['09'], task_id)

    def detect_to_control(self, task_id):
        '''
        message 5: detect(domain or search or whois) engine finished message control
        '''
        sys.stdout.write('%s  control receive from detect engine task_id: %s\n' %
                         (time.ctime(), task_id))
        task_type, task_engines = self.read_task_info(task_id)
        if task_type == 3 or task_type == 5:
            self.message_other_engine(0, ['04'], task_id)
        elif task_type == 1:  # task over
            self.update_finished_state(task_id)

    def check_to_control(self, task_id):
        '''
        message 6: check(title or structure or view) engine finished message control,
        over task
        '''
        task_type, task_engines = self.read_task_info(task_id)
        check_result = self.check_engine_state(
            task_id, task_type, task_engines)
        if check_result is True:  # all check engine overf, task over
            self.update_finished_state(task_id)

    def feature_save_to_control(self, task_id):
        '''
        message 7: feature_save is task_type 4 last engine, other task_type no last
        over task
        '''
        task_type, task_engines = self.read_task_info(task_id)
        if task_type == 4:
            self.update_finished_state(task_id)

    def engine_failure_to_control(self, task_id):
        '''
        message 8: engine_failure,
        over task is error
        '''
        self.update_finished_state(task_id, 0)

    def engine_win_over_to_control(self, task_id):
        '''
        message 9: engine over, After engine need not start
        over task
        '''
        self.update_finished_state(task_id)

    def view_collect_to_control(self, task_id):
        '''
        message 10: view_collect engine over, After start view_emd
        '''
        task_type, task_engines = self.read_task_info(task_id)
        if task_type == 5 or '10' in task_engines:
            self.message_other_engine(0, ['10'], task_id)
        elif task_type == 4:
            self.update_finished_state(task_id)
Ejemplo n.º 5
0
class FiltrateStart(multiprocessing.Process):
    def __init__(self, task_id, mysql_host, mysql_db, mysql_user,
                 mysql_password, mongo_db, mongo_host, mongo_port, mongo_user,
                 mongo_password, message_other_engine, write_process_pid,
                 remove_process_pid):
        super(FiltrateStart, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid

        # 初始化操作
        self.user_id = ''
        self.gray_urls = []
        self.task_start_time = ''
        self.run_start_time = 0
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port,
                                           mongo_user, mongo_password)
        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        table_name = 'task_info'
        fields = ['last_time', 'user_id', 'gray_id']
        wheres = {'task_id': [self.task_id, 'd']}
        task_info = self.mysql_handle.require_get(table_name, fields, wheres,
                                                  'select', 'one')
        if task_info is False:
            sys.stderr.write('%s  task no exist, task_id: %s\n' %
                             (time.ctime(), self.task_id))
            os._exit(0)
        self.task_start_time = task_info['last_time']
        self.user_id = task_info['user_id']
        gray_id = task_info['gray_id']
        # read gray url
        if gray_id is not None and gray_id != '':
            gray_id = gray_id.split('-')
            table_name = 'gray_list'
            fields = ['url']
            for once_gray_id in gray_id:
                wheres = {'id': [int(once_gray_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                gray_url = select_result['url'].encode('utf-8')
                self.gray_urls.append(gray_url)
        # read detected url
        table_name = 'task_result'
        fields = ['original_grayid']
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        select_result = self.mysql_handle.require_get(table_name, fields,
                                                      wheres, 'select', 'one')
        gary_objectid = select_result['original_grayid']
        if gary_objectid is not None and gary_objectid != '':
            gary_objectid = self.mongo_operate.expand_gray_list(gary_objectid)
            self.get_gray_iter = self.mongo_operate.get_gray_list(
                gary_objectid)
        else:
            self.get_gray_iter = iter([])

    def update_finish_state(self, trusted_filtrate_num,
                            counterfeit_filtrate_num, filtrate_objectid,
                            filtrate_trusted_objectid,
                            filtrate_counterfeit_objectid):
        '''
        task run over, update information in mysql
        '''
        run_time = int(time.time() - self.run_start_time)
        table_name = 'task_result'
        fields = {
            'e_filtrate_state': [03, 'd'],
            'filtrate_trusted_num': [trusted_filtrate_num, 'd'],
            'filtrate_counterfeit_num': [counterfeit_filtrate_num, 'd'],
            'filtrate_run_time': [run_time, 's'],
            'filtrate_objectid': [filtrate_objectid, 's'],
            'filtrate_trusted_objectid': [filtrate_trusted_objectid, 's'],
            'filtrate_counterfeit_objectid':
            [filtrate_counterfeit_objectid, 's']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')
        # message to control
        send_result = self.message_other_engine(2, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(self.task_id,
                                                self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))

    def trusted_select(self, gray_url):
        '''
        在被信任名单中查询
        '''
        table_name = 'trusted_list'
        fields = ['*']
        wheres = {'url': [gray_url, 's']}
        select_result = self.mysql_handle.require_get(table_name, fields,
                                                      wheres, 'select', 'one',
                                                      0)
        return select_result

    def counterfeit_select(self, gray_url):
        '''
        在仿冒名单中查询
        '''
        table_name = 'counterfeit_list'
        fields = ['*']
        wheres = {'url': [gray_url, 's']}
        select_result = self.mysql_handle.require_get(table_name, fields,
                                                      wheres, 'select', 'one',
                                                      0)
        return select_result

    def run(self):
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.run_start_time = time.time()
        self.mysql_handle.update_engine_state(self.task_id,
                                              self.task_start_time, 'filtrate',
                                              2)

        trusted_filtrate_num = 0
        counterfeit_filtrate_num = 0
        suspect_grays = []  # not filtrate url
        trusted_grays = []
        counterfeit_grays = []
        while 1:
            try:
                gray_url = self.get_gray_iter.next()
            except StopIteration:
                try:
                    gray_url = self.gray_urls.pop()
                except IndexError:
                    break
            '''
            对gray_url进行黑白名单比对,属于黑白名单则更新filtrate_num,
            否则放到suspect_grays中
            '''
            select_result = self.trusted_select(gray_url)
            if select_result is not False:
                trusted_filtrate_num += 1
                trusted_grays.append(gray_url)
                continue
            else:
                select_result = self.counterfeit_select(gray_url)
                if select_result is not False:
                    counterfeit_filtrate_num += 1
                    counterfeit_grays.append(gray_url)
                    continue
                else:
                    suspect_grays.append(gray_url)
        # not filtrate url add gray_list in mongo
        filtrate_objectid = self.mongo_operate.create_gray(
            gray_name='suspect_grays',
            gray_type='filtrate',
            usr_id=self.user_id,
            task_id=self.task_id)
        self.mongo_operate.add_gray_list(suspect_grays, filtrate_objectid)
        filtrate_trusted_objectid = self.mongo_operate.create_gray(
            gray_name='trusted_grays',
            gray_type='filtrate',
            usr_id=self.user_id,
            task_id=self.task_id)
        self.mongo_operate.add_gray_list(trusted_grays,
                                         filtrate_trusted_objectid)
        filtrate_counterfeit_objectid = self.mongo_operate.create_gray(
            gray_name='counterfeit_grays',
            gray_type='filtrate',
            usr_id=self.user_id,
            task_id=self.task_id)
        self.mongo_operate.add_gray_list(counterfeit_grays,
                                         filtrate_counterfeit_objectid)
        self.update_finish_state(trusted_filtrate_num,
                                 counterfeit_filtrate_num, filtrate_objectid,
                                 filtrate_trusted_objectid,
                                 filtrate_counterfeit_objectid)
Ejemplo n.º 6
0
class WhoisReverse():
    def __init__(self, mysql_host, mysql_db, mysql_user, mysql_password):

        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.email = ['', '', '']
        self.name = ['', '', '']
        self.domain = []

    def get_top_host(self, url):
        if not url.startswith('http'):
            url = 'http://' + url
        parts = urlparse.urlparse(url)
        host = parts.netloc
        topHostPostfix = ('.com', '.la', '.io', '.co', '.info', '.net', '.org',
                          '.me', '.mobi', '.us', '.biz', '.xxx', '.ca',
                          '.co.jp', '.com.cn', '.net.cn', '.org.cn', '.mx',
                          '.tv', '.ws', '.ag', '.com.ag', '.net.ag', '.org.ag',
                          '.am', '.asia', '.at', '.be', '.com.br', '.net.br',
                          '.bz', '.com.bz', '.net.bz', '.cc', '.com.co',
                          '.net.co', '.nom.co', '.de', '.es', '.com.es',
                          '.nom.es', '.org.es', '.eu', '.fm', '.fr', '.gs',
                          '.in', '.co.in', '.firm.in', '.gen.in', '.ind.in',
                          '.net.in', '.org.in', '.it', '.jobs', '.jp', '.ms',
                          '.com.mx', '.nl', '.nu', '.co.nz', '.net.nz',
                          '.org.nz', '.se', '.tc', '.tk', '.tw', '.com.tw',
                          '.idv.tw', '.org.tw', '.hk', '.co.uk', '.me.uk',
                          '.org.uk', '.vg', '.br', '.fr')
        extractPattern = r'[^\.]+(' + '|'.join(
            [h.replace('.', r'\.') for h in topHostPostfix]) + ')$'
        pattern = re.compile(extractPattern, re.IGNORECASE)
        m = pattern.search(host)
        return m.group() if m else host

    def get_reverse_whois(self, url):
        if url is None or url is '':
            return False
        self.original_domain = self.get_top_host(url)
        table_name = 'whois_domain'
        fields = ['admin', 'tech', 'registrant']  # wait to select fields
        wheres = {'name': [self.original_domain, 's']}
        self.contactid = self.mysql_handle.require_get(table_name,
                                                       fields,
                                                       wheres,
                                                       get_type='select',
                                                       fetch_type='one')
        if self.contactid is False:
            return []
        return self.get_source()

    def get_source(self):
        table_name = 'whois_contacts'
        fields = ['name', 'email']
        wheres = {'contacts_id': [self.contactid['admin'], 'd']}
        result = self.mysql_handle.require_get(table_name,
                                               fields,
                                               wheres,
                                               get_type='select',
                                               fetch_type='one')
        if result is False:
            self.email[0] = ''
            self.name[0] = ''
        else:
            self.email[0] = result['email']
            self.name[0] = result['name']
        if self.contactid['tech'] != self.contactid['admin']:
            wheres = {'contacts_id': [self.contactid['tech'], 'd']}
            result = self.mysql_handle.require_get(table_name,
                                                   fields,
                                                   wheres,
                                                   get_type='select',
                                                   fetch_type='one')
            if result is False:
                self.email[1] = ''
                self.name[1] = ''
            else:
                self.email[1] = result['email']
                self.name[1] = result['name']
        if self.contactid['registrant'] != self.contactid[
                'admin'] and self.contactid['registrant'] != self.contactid[
                    'tech']:
            wheres = {'contacts_id': [self.contactid['registrant'], 'd']}
            result = self.mysql_handle.require_get(table_name,
                                                   fields,
                                                   wheres,
                                                   get_type='select',
                                                   fetch_type='one')
            if result is False:
                self.email[2] = ''
                self.name[2] = ''
            else:
                self.email[2] = result['email']
                self.name[2] = result['name']
        if self.email[2] == self.email[1] or self.email[2] == self.email[0]:
            self.email[2] = ''
        if self.email[1] == self.email[0]:
            self.email[1] = ''
        if self.name[2] == self.name[1] or self.name[2] == self.name[0]:
            self.name[2] = ''
        if self.name[1] == self.name[0]:
            self.name[1] = ''
        for i in self.email:
            if i != '':
                # print i
                self.search(i, 1)
                self.write_todb()
        for i in self.name:
            if i != '':
                # print i
                self.search(i, 2)
                self.write_todb()
        return self.domain

    def search(self, source, search_mod):
        self.source = source
        self.search_mod = search_mod
        if self.search_mod == 1:
            target_url = 'http://whois.chinaz.com/reverse?host=' + \
                self.source + '&ddlSearchMode=1'
        if self.search_mod == 2:
            self.source = '+'.join(self.source.split(' '))
            target_url = 'http://whois.chinaz.com/reverse?host=' + \
                self.source + '&ddlSearchMode=2'

        find = urllib2.urlopen(target_url).read()
        if chardet.detect(find)['encoding'] == 'GB2312':
            find = unicode(find, "gb2312").encode('utf-8')
        page = etree.HTML(find)
        original_urls = page.xpath(
            '//*[@id="detail"]/table/tbody/tr/td[1]/a/@href')
        j = 0
        for i in original_urls:
            original_urls[j] = urlparse.urlparse(i).path[1:]
            j = j + 1
        self.domain = original_urls
        # print self.domain

    def write_todb(self):
        table_name = 'whois_reverse'
        print 'whois_reverse domain', self.domain
        for i in self.domain:
            table_name = 'whois_reverse'
            fields = ['id']  # wait to select fields
            wheres = {
                'domain': [i, 's'],
                'original_domain': [self.original_domain, 's']
            }
            flag = self.mysql_handle.require_get(table_name,
                                                 fields,
                                                 wheres,
                                                 get_type='select',
                                                 fetch_type='one')
            if flag is False:
                fields = {
                    'domain': [i, 's'],
                    'original_domain': [self.original_domain, 's']
                }
                self.mysql_handle.require_post(table_name,
                                               fields,
                                               post_type='insert')
Ejemplo n.º 7
0
class WhoisReverse():

    def __init__(self, mysql_host, mysql_db, mysql_user, mysql_password):

        self.mysql_handle = MysqlOperate(mysql_db, mysql_host,
                                         mysql_user, mysql_password)
        self.email = ['', '', '']
        self.name = ['', '', '']
        self.domain = []

    def get_top_host(self, url):
        if not url.startswith('http'):
            url = 'http://' + url
        parts = urlparse.urlparse(url)
        host = parts.netloc
        topHostPostfix = (
            '.com', '.la', '.io', '.co', '.info', '.net', '.org', '.me',
            '.mobi', '.us', '.biz', '.xxx', '.ca', '.co.jp', '.com.cn',
            '.net.cn', '.org.cn', '.mx', '.tv', '.ws', '.ag', '.com.ag',
            '.net.ag', '.org.ag', '.am', '.asia', '.at', '.be', '.com.br',
            '.net.br', '.bz', '.com.bz', '.net.bz', '.cc', '.com.co',
            '.net.co', '.nom.co', '.de', '.es', '.com.es', '.nom.es',
            '.org.es', '.eu', '.fm', '.fr', '.gs', '.in', '.co.in', '.firm.in',
            '.gen.in', '.ind.in', '.net.in', '.org.in', '.it', '.jobs',
            '.jp', '.ms', '.com.mx', '.nl', '.nu', '.co.nz', '.net.nz',
            '.org.nz', '.se', '.tc', '.tk', '.tw', '.com.tw', '.idv.tw',
            '.org.tw', '.hk', '.co.uk', '.me.uk', '.org.uk', '.vg', '.br', '.fr')
        extractPattern = r'[^\.]+(' + '|'.join([h.replace('.', r'\.')
                                                for h in topHostPostfix]) + ')$'
        pattern = re.compile(extractPattern, re.IGNORECASE)
        m = pattern.search(host)
        return m.group() if m else host

    def get_reverse_whois(self, url):
        if url is None or url is '':
            return False
        self.original_domain = self.get_top_host(url)
        table_name = 'whois_domain'
        fields = ['admin', 'tech', 'registrant']  # wait to select fields
        wheres = {'name': [self.original_domain, 's']}
        self.contactid = self.mysql_handle.require_get(
            table_name, fields, wheres, get_type='select', fetch_type='one')
        if self.contactid is False:
            return []
        return self.get_source()

    def get_source(self):
        table_name = 'whois_contacts'
        fields = ['name', 'email']
        wheres = {'contacts_id': [self.contactid['admin'], 'd']}
        result = self.mysql_handle.require_get(
            table_name, fields, wheres, get_type='select', fetch_type='one')
        if result is False:
            self.email[0] = ''
            self.name[0] = ''
        else:
            self.email[0] = result['email']
            self.name[0] = result['name']
        if self.contactid['tech'] != self.contactid['admin']:
            wheres = {'contacts_id': [self.contactid['tech'], 'd']}
            result = self.mysql_handle.require_get(
                table_name, fields, wheres, get_type='select', fetch_type='one')
            if result is False:
                self.email[1] = ''
                self.name[1] = ''
            else:
                self.email[1] = result['email']
                self.name[1] = result['name']
        if self.contactid['registrant'] != self.contactid['admin'] and self.contactid['registrant'] != self.contactid['tech']:
            wheres = {'contacts_id': [self.contactid['registrant'], 'd']}
            result = self.mysql_handle.require_get(
                table_name, fields, wheres, get_type='select', fetch_type='one')
            if result is False:
                self.email[2] = ''
                self.name[2] = ''
            else:
                self.email[2] = result['email']
                self.name[2] = result['name']
        if self.email[2] == self.email[1] or self.email[2] == self.email[0]:
            self.email[2] = ''
        if self.email[1] == self.email[0]:
            self.email[1] = ''
        if self.name[2] == self.name[1] or self.name[2] == self.name[0]:
            self.name[2] = ''
        if self.name[1] == self.name[0]:
            self.name[1] = ''
        for i in self.email:
            if i != '':
                # print i
                self.search(i, 1)
                self.write_todb()
        for i in self.name:
            if i != '':
                # print i
                self.search(i, 2)
                self.write_todb()
        return self.domain

    def search(self, source, search_mod):
        self.source = source
        self.search_mod = search_mod
        if self.search_mod == 1:
            target_url = 'http://whois.chinaz.com/reverse?host=' + \
                self.source + '&ddlSearchMode=1'
        if self.search_mod == 2:
            self.source = '+'.join(self.source.split(' '))
            target_url = 'http://whois.chinaz.com/reverse?host=' + \
                self.source + '&ddlSearchMode=2'

        find = urllib2.urlopen(target_url).read()
        if chardet.detect(find)['encoding'] == 'GB2312':
            find = unicode(find, "gb2312").encode('utf-8')
        page = etree.HTML(find)
        original_urls = page.xpath(
            '//*[@id="detail"]/table/tbody/tr/td[1]/a/@href')
        j = 0
        for i in original_urls:
            original_urls[j] = urlparse.urlparse(i).path[1:]
            j = j + 1
        self.domain = original_urls
        # print self.domain

    def write_todb(self):
        table_name = 'whois_reverse'
        print 'whois_reverse domain', self.domain
        for i in self.domain:
            table_name = 'whois_reverse'
            fields = ['id']  # wait to select fields
            wheres = {'domain': [i, 's'],
                      'original_domain': [self.original_domain, 's']}
            flag = self.mysql_handle.require_get(
                table_name, fields, wheres, get_type='select', fetch_type='one')
            if flag is False:
                fields = {'domain': [i, 's'],
                          'original_domain': [self.original_domain, 's']}
                self.mysql_handle.require_post(
                    table_name, fields, post_type='insert')
Ejemplo n.º 8
0
class _ServerSession(object):

    def __init__(self, server_ip='127.0.0.1', server_port='1234', server_type='default',
                 server_num=5, mysql_host='127.0.0.1', mysql_user='******',
                 mysql_password='', mysql_db='test', message_len=''):
        self.server_ip = server_ip  # 守护进程的ip
        self.server_port = server_port  # 监听的端口号
        self.server_type = server_type  # 服务类型
        self.server_num = server_num  # 能接受链接的服务数量,socket listen 数量
        # 服务之间通信协议定义,由short int和int构成,分别代表消息类型和任务ID,
        self.message_len = message_len
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host,
                                         mysql_user, mysql_password)
        self.try_send_message_num = 3

    def register_sever(self):
        '''
        守护进程(服务)在数据库server_live表中注册信息
        '''
        current_time = time.strftime(
            '%Y-%m-%d %H:%M', time.localtime(time.time()))
        table_name = 'server_live'
        fields = ['*']  # wait to select fields
        # select condition  wheres={field:[value,field_type]}
        wheres = {
            'ip': [self.server_ip, 's'],
            'port': [self.server_port, 's']}
        result = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one', 0)
        table_name = 'server_live'
        fields = {'ip': [self.server_ip, 's'],
                  'port': [self.server_port, 's'],
                  'type': [self.server_type, 's'],
                  'status': [1, 'd'],
                  'time': [current_time, 's']}
        if result is None:
            result = self.mysql_handle.require_post(
                table_name, fields, {}, 'insert')
        else:
            table_name_del = 'server_live'
            wheres_del = {
                'ip': [self.server_ip, 's'],
                'port': [self.server_port, 's']}
            self.mysql_handle.require_post(
                table_name_del, {}, wheres_del, 'delete')
            result = self.mysql_handle.require_post(
                table_name, fields, {}, 'insert')
        if result is True:
            sys.stdout.write('%s: server register\n' % (time.ctime(),))

    def update_sever(self):
        '''
        执行线程工作,定时更新数据库,记录服务存活
        '''
        table_name = 'server_live'
        wheres = {
            'ip': [self.server_ip, 's'],
            'port': [self.server_port, 's']}
        while True:
            time.sleep(60)
            current_time = time.strftime(
                '%Y-%m-%d %H:%M', time.localtime(time.time()))
            fields = {'time': [current_time, 's']}
            self.mysql_handle.require_post(
                table_name, fields, wheres, 'update')

    def start_update_state(self):
        '''
        开启子线程,定期检查服务是否存活
        '''
        t1 = threading.Thread(target=self.update_sever)
        t1.start()

    def over_sever(self):
        '''
        守护进程(服务)将之前注册在数据库server_live表中信息删除。
        '''
        table_name = 'server_live'
        wheres = {
            'ip': [self.server_ip, 's'], 'port': [self.server_port, 's']}
        result = self.mysql_handle.require_post(
            table_name, {}, wheres, 'delete')
        if result is True:
            sys.stdout.write('%s: server logout\n' % (time.ctime(),))

    def run_server(self):
        '''
        运行相应服务,建立socket连接,监听端口
        '''
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        except socket.error, msg:
            sys.stderr.write(
                '%s\n' % SocketError(msg, 'run_server socket create'))
            sys.exit()
        # port re run
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        try:
            sock.bind((self.server_ip, self.server_port))  # 绑定于端口和ip
            sock.listen(self.server_num)
        except socket.error, msg:
            sys.stderr.write('%s' % SocketError(msg, 'run_server bind listen'))
            sys.stderr.write('  ip: %s port: %s \n' %
                             (self.server_ip, self.server_port))
            sys.exit()
Ejemplo n.º 9
0
class DomainStart(multiprocessing.Process):
    def __init__(self, task_id, mysql_host, mysql_db, mysql_user,
                 mysql_password, mongo_db, mongo_host, mongo_port, mongo_user,
                 mongo_password, message_other_engine, write_process_pid,
                 remove_process_pid):
        super(DomainStart, self).__init__()
        self.task_id = task_id
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid

        # 初始化操作
        self.user_id = ''
        # 待变换网站列表, 包括已知仿冒网站和被保护网站,
        # 对已知仿冒网站和对被保护网站域名变换方式一样, 故统一处理
        self.wait_change_url_list = []
        self.original_host_rules = []
        self.original_top_rules = []
        self.original_path_rules = []
        self.exist_list = []  # 记录存在的url
        self.task_start_time = ''
        self.run_start_time = 0
        self.url_create_list = []
        self.protect_url = ''
        self.deferreds = []
        self.read_task_info()
        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host, mongo_port,
                                           mongo_user, mongo_password)
        # 创建域名生成器对象
        self.url_gen = URLGenerator(self.task_id, self.mongo_operate,
                                    self.update_running_state,
                                    self.wait_change_url_list,
                                    self.original_host_rules,
                                    self.original_top_rules,
                                    self.original_path_rules)
        self.domain_change_url = self.url_gen.URL_Generator()  # 创建生成器

        self.domain_save_path = '/tmp/' + \
            str(task_id) + '_domain_request_urls.txt'
        self.domain_live_path = '/tmp/' + \
            str(task_id) + '_domain_live.txt'
        self.file_request_urls = open(self.domain_save_path, 'w')
        self.file_live_url = open(self.domain_live_path, 'w')

    def read_task_info(self):
        '''
        读取任务信息
        '''
        table_name = 'task_info'
        fields = [
            'last_time', 'user_id', 'protected_id', 'counterfeit_id',
            'host_rule_id', 'top_rule_id', 'path_rule_id'
        ]
        wheres = {'task_id': [self.task_id, 'd']}
        task_info = self.mysql_handle.require_get(table_name, fields, wheres,
                                                  'select', 'one')
        if task_info is False:
            os._exit(0)
        self.task_start_time = task_info['last_time']
        self.user_id = task_info['user_id']
        protected_list_id = task_info['protected_id']
        counterfeit_list_id = task_info['counterfeit_id']
        host_rule_id = task_info['host_rule_id']
        top_rule_id = task_info['top_rule_id']
        path_rule_id = task_info['path_rule_id']
        self.read_rule_config(protected_list_id, counterfeit_list_id,
                              host_rule_id, top_rule_id, path_rule_id)

    def read_rule_config(self, protected_list_id, counterfeit_list_id,
                         host_rule_id, top_rule_id, path_rule_id):
        '''
        从mysql中读取变换规则和被保护名单
        '''
        if protected_list_id is not None and protected_list_id != '':
            protected_list_id = protected_list_id.split('-')
            for protected_id in protected_list_id:  # 读取mysql中的被保护名单
                table_name = 'protected_list'
                fields = ['url']
                wheres = {'id': [int(protected_id), 'd']}
                task_info = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if task_info is False:
                    continue
                protected = task_info['url']
                self.wait_change_url_list.append(protected)
        if counterfeit_list_id is not None and counterfeit_list_id != '':
            counterfeit_list_id = counterfeit_list_id.split('-')
            for counterfeit_id in counterfeit_list_id:  # 读取mysql中的待变换已知仿冒网站
                table_name = 'counterfeit_list'
                fields = ['url']
                wheres = {'id': [int(counterfeit_id), 'd']}
                task_info = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if task_info is False:
                    continue
                counterfeit = task_info['url']
                self.wait_change_url_list.append(counterfeit)
        if host_rule_id is not None and host_rule_id != '':
            host_rule_id = host_rule_id.split('-')
            for rule_id in host_rule_id:  # 读取mysql中的主机域名变换规则
                table_name = 'host_change_rule'
                fields = ['change_rule']
                wheres = {'id': [int(rule_id), 'd']}
                task_info = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if task_info is False:
                    continue
                result = task_info['change_rule']
                result = result.split('|')
                for once_result in result:
                    self.original_host_rules.append(str(once_result))
        if top_rule_id is not None and top_rule_id != '':
            top_rule_id = top_rule_id.split('-')
            for top_id in top_rule_id:  # 读取mysql中的顶级域名变换规则
                table_name = 'top_change_rule'
                fields = ['change_rule']
                wheres = {'id': [int(top_id), 'd']}
                task_info = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if task_info is False:
                    continue
                result = task_info['change_rule']
                self.original_top_rules.append(str(result))
        if path_rule_id is not None and path_rule_id != '':
            path_rule_id = path_rule_id.split('-')
            for path_id in path_rule_id:  # 读取mysql中的路径变换规则
                table_name = 'path_change_rule'
                fields = ['change_rule']
                wheres = {'id': [int(path_id), 'd']}
                task_info = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if task_info is False:
                    continue
                result = task_info['change_rule']
                self.original_path_rules.append(str(result))

    def update_running_state(self,
                             all_change_num,
                             all_exist_change_num,
                             changed_num,
                             gray_exist_num,
                             update_type=0):
        '''
        在mysql中更新探测状态及结果
        update_type=0: domain change update
        update_type=1: url exist check update
        '''
        table_name = 'task_result'
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        if update_type == 0:
            fields = {
                'domain_changed_all_num': [all_change_num, 'd'],
                'domain_changed_exist_num': [all_exist_change_num, 'd'],
                'domain_detected_num': [changed_num, 'd']
            }
            self.mysql_handle.require_post(table_name, fields, wheres,
                                           'update')
        if update_type == 1:
            fields = {'domain_gray_url_num': [gray_exist_num, 'd']}
            self.mysql_handle.require_post(table_name, fields, wheres,
                                           'update')

    def create_gray_mongo(self, exist_list):
        gray_name = 'NO.' + str(self.task_id) + ' task domian'
        detect_objectID = self.mongo_operate.create_gray(
            gray_name=gray_name,
            gray_type='domain_change',
            usr_id=self.user_id,
            task_id=self.task_id)
        self.mongo_operate.add_gray_list(exist_list, detect_objectID)
        return detect_objectID

    def update_finish_state(self, exist_list, run_time):
        '''
        task run over, update information in mysql
        '''
        detect_objectID = self.create_gray_mongo(exist_list)
        exist_url_num = len(exist_list)
        # save domain engine result in mysql task_result
        table_name = 'task_result'
        fields = {
            'e_domain_state': [03, 'd'],
            'domain_gray_url_num': [exist_url_num, 'd'],
            'original_grayid': [detect_objectID, 's'],
            'domain_run_time': [run_time, 's']
        }
        wheres = {
            'task_id': [self.task_id, 'd'],
            'start_time': [self.task_start_time, 's']
        }
        self.mysql_handle.require_post(table_name, fields, wheres, 'update')
        if exist_list == []:
            send_result = self.message_other_engine(9, ['00'], self.task_id)
        else:
            # save gray_list info in mysql
            self.mysql_handle.insert_suspect_list(detect_objectID,
                                                  self.user_id,
                                                  self.task_id,
                                                  'domain_change',
                                                  exist_url_num,
                                                  suspect_type=2)
            self.mysql_handle.insert_gray_list(exist_list,
                                               source='domain_change')
            # quit deal
            # message to control
            send_result = self.message_other_engine(5, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(self.task_id,
                                                self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write('%s |*|engine win over|*|, task_id: %s\n' %
                         (time.ctime(), self.task_id))

    def pageCallback(self, result, url, protect):
        '''
        用 getpage检测 网页存在
        网页存在,调用此回调函数
        '''
        global _NUM
        match = re.search(r"<title>(.*?)</title>", result)
        try:
            title = match.group(1)
        except:
            title = 'None'
        if title.find("Redirect") == -1:
            self.exist_list.append(url)
            self.update_running_state(gray_exist_num=len(self.exist_list),
                                      update_type=1)

        self.file_request_urls.write(url + '\n')
        self.file_request_urls.flush()

        self.file_live_url.seek(0)
        self.file_live_url.truncate(0)
        self.file_live_url.write(url + ' ' + str(self.engine_pid))
        self.file_live_url.flush()
        _NUM += 1
        self.download()

    def finish(self, ign):
        '''
        所有的defer处理完后调用finish结束reacter循环
        '''
        try:
            reactor.stop()
            os.remove(self.domain_save_path)
            os.remove(self.domain_live_path)
        except:
            pass

    def fetch_error(self, error, url, protect):
        '''
        用getpage检测,网页不存在调用此回调函数
        '''
        global _NUM
        if error.getErrorMessage().find(
                'User timeout caused connection failure') != -1:
            d = getPage(url)
            d.addCallback(self.pageCallback, url, protect)
            d.addErrback(self.fetch_error, url, protect)
        else:
            self.file_live_url.seek(0)
            self.file_live_url.truncate(0)
            self.file_live_url.write(url + ' ' + str(self.engine_pid))
            self.file_live_url.flush()
            _NUM += 1
            self.download()

    def download(self):
        global _NUM
        while _NUM > 0:
            try:
                url = self.url_create_list.pop(0)
                d = getPage(url.encode('utf-8'))
                d.addCallback(self.pageCallback, url.encode('utf-8'),
                              self.protect_url)
                d.addErrback(self.fetch_error, url.encode('utf-8'),
                             self.protect_url)
                _NUM -= 1
                self.deferreds.append(d)
            except IndexError:
                try:
                    self.url_create_list = []
                    self.url_create_list = self.domain_change_url.next()
                    # print 'download', self.url_create_list
                    self.protect_url = self.url_create_list[0]
                    self.url_create_list = self.url_create_list[1:]
                except StopIteration:
                    dl = defer.DeferredList(self.deferreds)
                    dl.addCallback(self.finish)
                    break

    def run(self):
        '''
        程序入口
        '''
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.engine_pid = os.getpid()
        self.run_start_time = time.time()
        self.mysql_handle.update_engine_state(self.task_id,
                                              self.task_start_time, 'domain',
                                              2)
        try:
            self.url_create_list = self.domain_change_url.next()
            self.protect_url = self.url_create_list[0]
            self.url_create_list = self.url_create_list[1:]
        except StopIteration:
            pass
        self.download()
        # start
        reactor.run()
        # finaish
        run_time = int(time.time()) - int(self.run_start_time)
        self.update_finish_state(self.exist_list, run_time)
Ejemplo n.º 10
0
class WhoisSearchStart(multiprocessing.Process):

    def __init__(self, task_id, mysql_host, mysql_db,
                 mysql_user, mysql_password, mongo_db, mongo_host, mongo_port,
                 mongo_user, mongo_password, message_other_engine,
                 write_process_pid, remove_process_pid):
        super(WhoisSearchStart, self).__init__()
        self.task_id = task_id
        self.mysql_host = mysql_host
        self.mysql_db = mysql_db
        self.mysql_user = mysql_user
        self.mysql_password = mysql_password
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host,
                                         mysql_user, mysql_password)
        self.message_other_engine = message_other_engine
        self.write_process_pid = write_process_pid
        self.remove_process_pid = remove_process_pid

        self.mongo_operate = Mongo_Operate(mongo_db, mongo_host,
                                           mongo_port, mongo_user,
                                           mongo_password)

        # 初始化操作
        self.task_start_time = ''
        self.user_id = ''
        self.whois_search_url = ''
        self.whois_reverse_url = ''
        self.counterfeit_urls = []
        self.task_state = 0

        self.read_task_info()

    def read_task_info(self):
        '''
        读取任务信息
        '''
        table_name = 'task_info'
        fields = ['last_time', 'user_id', 'counterfeit_id',
                  'whois_search_url', 'whois_reverse_url']
        wheres = {'task_id': [self.task_id, 'd']}
        task_info = self.mysql_handle.require_get(
            table_name, fields, wheres, 'select', 'one')
        if task_info is False:
            sys.stderr.write(
                '%s  task no exist, task_id: %s\n' % (time.ctime(), self.task_id))
            os._exit(0)
        self.task_start_time = task_info['last_time']
        self.user_id = task_info['user_id']
        self.whois_search_url = task_info['whois_search_url']
        self.whois_reverse_url = task_info['whois_reverse_url']
        original_counterfeit_list = task_info['counterfeit_id']
        # get counterfeit url in mysql counterfeit_list
        if original_counterfeit_list is not None and original_counterfeit_list != '':
            counterfeit_id_list = original_counterfeit_list.split('-')
            table_name = 'counterfeit_list'
            fields = ['url']
            for counterfeit_id in counterfeit_id_list:
                wheres = {'id': [int(counterfeit_id), 'd']}
                select_result = self.mysql_handle.require_get(
                    table_name, fields, wheres, 'select', 'one')
                if select_result is False:
                    continue
                counterfeit_url = select_result['url'].encode('utf-8')
                self.counterfeit_urls.append(counterfeit_url)

    def add_gray_list(self, url_list):
        if url_list == []:
            return False
        gray_objectid = self.mongo_operate.create_gray(
            gray_name='whois_reverse_gray', gray_type='whois_reverse',
            usr_id=self.user_id, task_id=self.task_id)
        self.mongo_operate.add_gray_list(
            url_list, gray_objectid)
        table_name = 'task_result'
        fields = {'original_grayid': [gray_objectid, 's']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')
        # save gray_list info in mysql suspect_list
        url_num = len(url_list)
        self.mysql_handle.insert_suspect_list(gray_objectid, self.user_id, self.task_id,
                                              'whois_reverse', url_num, suspect_type=2)
        self.mysql_handle.insert_gray_list(url_list, source='whois_reverse')

    def update_finish_state(self, new_gray_lsit):
        run_time = int(time.time() - self.run_start_time)
        table_name = 'task_result'
        fields = {'e_whois_search_state': [03, 'd'],
                  'whois_search_run_time': [run_time, 's']}
        wheres = {'task_id': [self.task_id, 'd'],
                  'start_time': [self.task_start_time, 's']}
        self.mysql_handle.require_post(
            table_name, fields, wheres, 'update')
        # message to control
        if new_gray_lsit == []:
            send_result = self.message_other_engine(9, ['00'], self.task_id)
        else:
            self.add_gray_list(new_gray_lsit)
            send_result = self.message_other_engine(5, ['00'], self.task_id)
        if send_result is False:  # control engine no response, stop task
            self.mysql_handle.update_task_state(
                self.task_id, self.task_start_time, 0)
        self.remove_process_pid(self.task_id)
        sys.stdout.write(
            '%s |*|engine win over|*|, task_id: %s\n' % (time.ctime(), self.task_id))

    def run_whois_reverse(self, url):
        whois_reverse = WhoisReverse(self.mysql_host, self.mysql_db,
                                     self.mysql_user, self.mysql_password)
        try:
            reverse_url_list = []
            reverse_domain_list = whois_reverse.get_reverse_whois(url)
            for domian in reverse_domain_list:
                reverse_url = 'http://' + domian + '/'
                reverse_url_list.append(reverse_url)
            return reverse_url_list
        except:
            traceback.print_exc()
            return []

    def run_whois_search(self, url):
        '''
        通过使whois查询模块在子线程中运行,从而避免对主线程造成影响
        '''
        url_analysis = Urlanalysis(1, self.mysql_host, self.mysql_user,
                                   self.mysql_password, self.mysql_db)
        url_list = [url]
        try:
            url_analysis.getUrllist_list(url_list)
        except:
            traceback.print_exc()

    def web_save_transfer(self, url):
        self.mongo_operate.transfer_web_save(
            url, source_type='gray', goal_type='counterfeit')
        h = WebSavePath()
        source_file_path, target_file_path = h.get_transfer_path(
            url, 'gray', 'counterfeit')
        web_info_transfer(source_file_path, target_file_path)

    def whois_operation(self):
        if self.whois_search_url != '' and self.whois_search_url is not None:
            self.run_whois_search(self.whois_search_url)
        if self.whois_reverse_url != '' and self.whois_reverse_url is not None:
            self.run_whois_reverse(self.whois_reverse_url)
        new_gray_lsit = []
        while 1:
            try:
                url = self.counterfeit_urls.pop()
                #self.web_save_transfer(url)
                self.mysql_handle.update_counterfeit_list_statistic(url)
                self.run_whois_search(url)
                reverse_url_list = self.run_whois_reverse(url)
                new_gray_lsit.extend(reverse_url_list)
            except IndexError:
                break
        self.update_finish_state(new_gray_lsit)

    def run(self):
        # write child process pid to engine pids
        self.write_process_pid(self.task_id)
        self.run_start_time = time.time()
        self.mysql_handle.update_engine_state(
            self.task_id, self.task_start_time, 'whois_search', 2)
        self.whois_operation()
Ejemplo n.º 11
0
class _ServerSession(object):
    def __init__(self,
                 server_ip='127.0.0.1',
                 server_port='1234',
                 server_type='default',
                 server_num=5,
                 mysql_host='127.0.0.1',
                 mysql_user='******',
                 mysql_password='',
                 mysql_db='test',
                 message_len=''):
        self.server_ip = server_ip  # 守护进程的ip
        self.server_port = server_port  # 监听的端口号
        self.server_type = server_type  # 服务类型
        self.server_num = server_num  # 能接受链接的服务数量,socket listen 数量
        # 服务之间通信协议定义,由short int和int构成,分别代表消息类型和任务ID,
        self.message_len = message_len
        self.mysql_handle = MysqlOperate(mysql_db, mysql_host, mysql_user,
                                         mysql_password)
        self.try_send_message_num = 3

    def register_sever(self):
        '''
        守护进程(服务)在数据库server_live表中注册信息
        '''
        current_time = time.strftime('%Y-%m-%d %H:%M',
                                     time.localtime(time.time()))
        table_name = 'server_live'
        fields = ['*']  # wait to select fields
        # select condition  wheres={field:[value,field_type]}
        wheres = {'ip': [self.server_ip, 's'], 'port': [self.server_port, 's']}
        result = self.mysql_handle.require_get(table_name, fields, wheres,
                                               'select', 'one', 0)
        table_name = 'server_live'
        fields = {
            'ip': [self.server_ip, 's'],
            'port': [self.server_port, 's'],
            'type': [self.server_type, 's'],
            'status': [1, 'd'],
            'time': [current_time, 's']
        }
        if result is None:
            result = self.mysql_handle.require_post(table_name, fields, {},
                                                    'insert')
        else:
            table_name_del = 'server_live'
            wheres_del = {
                'ip': [self.server_ip, 's'],
                'port': [self.server_port, 's']
            }
            self.mysql_handle.require_post(table_name_del, {}, wheres_del,
                                           'delete')
            result = self.mysql_handle.require_post(table_name, fields, {},
                                                    'insert')
        if result is True:
            sys.stdout.write('%s: server register\n' % (time.ctime(), ))

    def update_sever(self):
        '''
        执行线程工作,定时更新数据库,记录服务存活
        '''
        table_name = 'server_live'
        wheres = {'ip': [self.server_ip, 's'], 'port': [self.server_port, 's']}
        while True:
            time.sleep(60)
            current_time = time.strftime('%Y-%m-%d %H:%M',
                                         time.localtime(time.time()))
            fields = {'time': [current_time, 's']}
            self.mysql_handle.require_post(table_name, fields, wheres,
                                           'update')

    def start_update_state(self):
        '''
        开启子线程,定期检查服务是否存活
        '''
        t1 = threading.Thread(target=self.update_sever)
        t1.start()

    def over_sever(self):
        '''
        守护进程(服务)将之前注册在数据库server_live表中信息删除。
        '''
        table_name = 'server_live'
        wheres = {'ip': [self.server_ip, 's'], 'port': [self.server_port, 's']}
        result = self.mysql_handle.require_post(table_name, {}, wheres,
                                                'delete')
        if result is True:
            sys.stdout.write('%s: server logout\n' % (time.ctime(), ))

    def run_server(self):
        '''
        运行相应服务,建立socket连接,监听端口
        '''
        try:
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        except socket.error, msg:
            sys.stderr.write('%s\n' %
                             SocketError(msg, 'run_server socket create'))
            sys.exit()
        # port re run
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        try:
            sock.bind((self.server_ip, self.server_port))  # 绑定于端口和ip
            sock.listen(self.server_num)
        except socket.error, msg:
            sys.stderr.write('%s' % SocketError(msg, 'run_server bind listen'))
            sys.stderr.write('  ip: %s port: %s \n' %
                             (self.server_ip, self.server_port))
            sys.exit()