Esempio n. 1
0
 def task_finish(self):
     """
     任务执行结束检测
     1.等待任务执行结束,任务队列中无任务且没有进行中的任务
     2.执行任务结束后的任务,监控报告发送
     :return:
     """
     while True:
         sql = 'select status, id, topic, job_params from t_job where status in (0,1);'
         jobs = self.db.query(sql)
         # TODO 任务设置超时,强制终止时间机制实现
         finish_flag = True if len(jobs) <= 0 else False
         if finish_flag:
             Logging.info('所有任务执行完成!')
             # TODO 统一调度入库操作
             # TODO 监控告警操作
             pass
         task_waiting = 0
         task_running = 0
         for x in jobs:
             if x[0] == 0:
                 task_waiting = task_waiting + 1
             if x[0] == 1:
                 task_running = task_running + 1
         Logging.info('heartbeat 待执行任务数:', task_waiting, '执行中任务数:',
                      task_running)
         sleep(30)
Esempio n. 2
0
 def operation_page(self):
     self.driver.get(
         'https://branding.taobao.com/#!/report/index?productid=101005202&effect=15&startdate=2019-06-05&enddate=2019-06-19'
     )
     Time.sleep(3)
     self.driver.find_element_by_xpath(
         '//*[@id="brix_12290"]/div[4]/a').click()
     Time.sleep(3)
     self.wait_download_finish()
     Logging.info(self.source_data_list)
     Logging.info('end')
Esempio n. 3
0
 def get_task(self):
     """获取任务"""
     # TODO 数据库事务操作
     sql = 'select id, job_params from t_job where status = 0 order by job_sort,RAND();'
     jobs = self.db.query(sql)
     if len(jobs) > 0:
         job = jobs[0]
         Logging.info('总任务数:', len(jobs), ' 获取任务:', job)
         job_id = int(job[0])
         store_id = int(job[1].split('|')[0])
         _page_data_ids = job[1].split('|')[1].split(',')
         _page_data_ids.remove('')
         shuffle(_page_data_ids)
         page_data_ids = []
         for s in _page_data_ids:
             page_data_ids.append(int(s))
         return job_id, store_id, page_data_ids
     return None, None, None
Esempio n. 4
0
    def init_web_driver(self):
        """
        根据端口获取浏览器driver
        :return: True/False
        """
        try:
            chrome_options = Options()
            chrome_options.add_experimental_option(
                "debuggerAddress", "127.0.0.1:{}".format(self.port))
            self.driver = webdriver.Chrome(chrome_options=chrome_options)

            Logging.info('{} - Chrome[{}]连接成功。'.format(self.store.name,
                                                       self.port))
        except Exception as e:
            print(e)
            Logging.error('port:{} 无法接管浏览器'.format(self.port))
            self.error = ErrorEnum.ERROR_1003
            raise Exception
        return True
Esempio n. 5
0
 def __init__(self, name, param={}):
     """
     对象/任务实例化
     :param name: 对象标识,规则:从目录至最终对象,handle.xxx.Obj
     :param param: 对象实例化参数,类型:dict
     """
     self.error = None
     self.obj = None
     self.obj_name = name
     self.obj_param = param
     try:
         Logging.info(self.obj_name, self.obj_param, ' 实例化 start!')
         if self.obj_name == 'handle.task_creator.TaskCreator':
             self.obj = TaskCreator()
         elif self.obj_name == 'handle.login.tb_login.TaoLogin':
             try:
                 self.obj = tb_login()
             except Exception as e:
                 Logging.error(e)
                 self.error = ErrorEnum.ERROR_2000
         # ========================== 抓取页面实例配置 START ==========================
         elif self.obj_name == 'handle.website.subway.report.SubReportDay':
             self.obj = SpreadReportDay(self.obj_param['store_id'],
                                        self.obj_param['page_data_id'],
                                        self.obj_param['port'])
         elif self.obj_name == 'handle.website.subway.direct_report.SpreadReportDay':
             self.obj = SpreadReportDay1(self.obj_param['store_id'],
                                         self.obj_param['page_data_id'],
                                         self.obj_param['port'])
         # ========================== 抓取页面实例配置 END ==========================
         else:
             self.error = ErrorEnum.ERROR_9001
             self.error.value.set_msg(('未匹配到任务实例 name:' + self.obj_name +
                                       ',param:' + self.obj_param))
         if self.is_success():
             Logging.info(self.obj_name, self.obj_param, ' 实例化成功 end!')
         else:
             Logging.info(self.obj_name, self.obj_param, ' 实例化失败 error:',
                          self.error, ' end!')
     except Exception as e:
         Logging.error(e)
         if self.is_success() and self.obj and self.obj.error:
             self.error = self.obj.error
         elif self.is_success():
             self.error = ErrorEnum.ERROR_9999
Esempio n. 6
0
 def insert_many(self, sql, data_list):
     Logging.info('db.insert_many sql:', sql, data_list)
     self.db_cur.executemany(sql, data_list)
Esempio n. 7
0
 def delete(self, sql):
     Logging.info('db.delete sql:', sql)
     self.db_cur.execute(sql)
Esempio n. 8
0
 def insert(self, sql, tuple_data):
     Logging.info('db.insert sql:', sql, tuple_data)
     self.db_cur.execute(sql, tuple_data)
     data = self.query('select last_insert_id() as id')
     key = data[0][0]
     return key
Esempio n. 9
0
 def execute(self, sql):
     Logging.info('db.execute sql:', sql)
     result = self.db_cur.execute(sql)
     self.commit()
     return result
Esempio n. 10
0
 def query(self, sql):
     Logging.info('db.query sql:', sql)
     self.db_cur.execute(sql)
     data = self.db_cur.fetchall()
     return data
Esempio n. 11
0
 def wait_download_finish(self, file_type=None):
     """
     根据文件前缀规则匹配,文件是否下载完成
     :param file_type:
     :return:
     """
     # 文件下载超时3分钟
     timeout_num = 180
     while timeout_num >= 0:
         # 匹配到的文件数量
         match_file_cnt = 0
         files = os.listdir(self.FILE_DOWNLOAD_PATH)
         for file in files:
             file_path = os.path.join(self.FILE_DOWNLOAD_PATH, file)
             # 文件下载中,文件后缀
             if '.crdownload' in file or '.tmp' in file:
                 Time.sleep(1)
                 timeout_num = timeout_num - 1
                 continue
             match_file_cnt = 0
             if self.page_data.rule_read_file_prefix is None and os.path.isfile(
                     file_path):
                 match_file_cnt = match_file_cnt + 1
             elif file.find(self.page_data.rule_read_file_prefix
                            ) == 0 and os.path.isfile(file_path):
                 match_file_cnt = match_file_cnt + 1
         if match_file_cnt == 0:
             Time.sleep(1)
             timeout_num = timeout_num - 1
             continue
         elif match_file_cnt == 1:
             self.file_names.append(file)
             # 将文件移到处理目录
             if self.page_data.rule_save_path_suffix is None:
                 file_process_path = self.FILE_PROCESS_PATH
             else:
                 path_suffix = self.page_data.rule_save_path_suffix
                 for key in self.data_dimension_dict.keys():
                     path_suffix = path_suffix.replace(
                         key, self.data_dimension_dict[key])
                 file_process_path = self.FILE_PROCESS_PATH + '/' + path_suffix
                 if not os.path.exists(file_process_path):
                     os.makedirs(file_process_path)
             remote_path = os.path.join(file_process_path, file)
             # TODO 目标文件已存在文件需重命名,时间戳.原文件名
             if os.path.exists(remote_path):
                 os.remove(remote_path)
             shutil.move(file_path, remote_path)  # 移动文件
             Logging.info("move %s -> %s" % (file_path, remote_path))
             # 文件读取
             # TODO 解压文件操作,多文件、多sheet操作
             # TODO 通用需要文件类型配置,常规文件类型支持
             if file_type is None:
                 if file[-3:] == 'csv':
                     file_type = 'csv'
                 elif file[-3:] == 'xls' or file[-4:] == 'xlsx':
                     file_type = 'excel'
             if file_type == 'excel':
                 df = pd.read_excel(remote_path)
             elif file_type == 'csv':
                 df = pd.read_csv(remote_path)
             else:
                 Logging.error('解析文件类型,未找到!')
                 raise Exception('解析文件类型,未找到!')
             self.source_data_list.append(df)
             return True
         else:
             raise Exception('文件下载失败')
     return False
Esempio n. 12
0
def worker_task_run():
    tc = TaskController('handle.task_creator.TaskCreator')
    job_id, store_id, page_data_ids = tc.run('get_task')
    while job_id:
        flag = tc.run('task_set_start', {'job_id': job_id})
        # 任务获取成功
        if not flag:
            # 继续获取任务
            Logging.info('job:', job_id, store_id, page_data_ids,
                         ' 任务领取慢了一拍,继续获取其他任务!')
            job_id, store_id, page_data_ids = tc.run('get_task')
            continue
        try:
            port = None
            for page_data_id in page_data_ids:
                # step1:Worker:取数-初始化任务
                param = {
                    'store_id': store_id,
                    'page_data_id': page_data_id,
                    'port': port,
                    'job_id': job_id
                }
                task = TaskController(
                    'handle.website.subway.report.SubReportDay', param)
                # 店铺未登录
                if not task.obj.login_flag:
                    # step2:Worker:取数-登录操作
                    login_tc = TaskController('handle.login.tb_login.TaoLogin',
                                              task.store)
                    login_tc.run('run')
                    if login_tc.is_success():
                        port = login_tc.port
                        param['port'] = port
                        task = TaskController(
                            'handle.website.subway.report.SubReportDay', param)
                    else:
                        Logging.error('param:', param, '登录失败!')
                        raise Exception('param:', param, '登录失败!')
                if not task.is_success():
                    Logging.error('param:', param, '任务初始化失败!')
                    raise Exception('param:', param, '任务初始化失败!')
                try:
                    # step3:Worker:取数-页面操作
                    task.run('operation_page')
                    if not task.is_success():
                        Logging.error('param:', param, '取数-页面操作失败!')
                        raise Exception('param:', param, '取数-页面操作失败!')
                    # step4:Worker:取数-页面文件下载及读取
                    task.run('operation_page_download')
                    if not task.is_success():
                        Logging.error('param:', param, '取数-页面文件下载及读取失败!')
                        raise Exception('param:', param, '取数-页面文件下载及读取失败!')
                    # step5:Worker:取数-数据处理
                    task.run('operation_data_process')
                    if not task.is_success():
                        Logging.error('param:', param, '取数-数据处理失败!')
                        raise Exception('param:', param, '取数-数据处理失败!')
                    # step6:Worker:取数-数据入库
                    task.run('operation_data_input')
                    if not task.is_success():
                        Logging.error('param:', param, '取数-数据入库失败!')
                        raise Exception('param:', param, '取数-数据入库失败!')
                    # step7:Worker:取数-数据备份
                    task.run('operation_data_backup')
                    if not task.is_success():
                        Logging.error('param:', param, '取数-数据备份失败!')
                        raise Exception('param:', param, '取数-数据备份失败!')
                except Exception as e:
                    Logging.error(e)
                    Logging.error('param:', param, ' 页面取数过程失败!')
                tc.run('task_set_end', {'job_id': job_id, 'result': 'success'})
        except Exception as e:
            Logging.error(e)
            Logging.error('job_id:', job_id, ' 任务执行失败!')
            tc.run('task_set_end', {'job_id': job_id, 'result': 'fail'})
        # 继续获取任务
        job_id, store_id, page_data_ids = tc.run('get_task')
Esempio n. 13
0
 def run(self, func, param={}):
     """
     对象任务执行调度控制模板
     :param func:
     :return:
     """
     results = None
     try:
         Logging.info(self.obj_name, func, param, ' 步骤执行 start!')
         if self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_init':
             results = self.obj.task_init()
         elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_added':
             results = self.obj.task_added()
         elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'get_task':
             results = self.obj.get_task()
         elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_finish':
             results = self.obj.task_finish()
         elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_set_start':
             results = self.obj.task_set_start(param)
         elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_set_end':
             results = self.obj.task_set_end(param)
         elif self.obj_name == 'handle.login.tb_login.TaoLogin' and func == 'run':
             results = self.obj.run(param)
         elif self.obj_name.find(
                 'handle.website') == 0 and func == 'operation_page':
             try:
                 results = self.obj.operation_page()
             except Exception as e:
                 Logging.error(e)
                 self.error = ErrorEnum.ERROR_3000
         elif self.obj_name.find(
                 'handle.website'
         ) == 0 and func == 'operation_data_process':
             try:
                 results = self.obj.operation_data_process()
             except Exception as e:
                 Logging.error(e)
                 self.error = ErrorEnum.ERROR_4000
         elif self.obj_name.find(
                 'handle.website') == 0 and func == 'operation_data_input':
             try:
                 results = self.obj.operation_data_input()
             except Exception as e:
                 Logging.error(e)
                 self.error = ErrorEnum.ERROR_5000
         elif self.obj_name.find(
                 'handle.website') == 0 and func == 'operation_data_backup':
             try:
                 results = self.obj.operation_data_backup()
             except Exception as e:
                 Logging.error(e)
                 self.error = ErrorEnum.ERROR_6000
         else:
             self.error = ErrorEnum.ERROR_9002
             self.error.value.set_msg(
                 ('未匹配到任务func name:' + self.obj_name + ',func:' + func))
         if self.is_success():
             Logging.info(self.obj_name, func, param, ' 步骤执行成功 end!')
         else:
             Logging.info(self.obj_name, func, param, ' 步骤执行失败 error:',
                          self.error, ' end!')
     except Exception as e:
         Logging.error(e)
         if self.is_success() and self.obj and self.obj.error:
             self.error = self.obj.error
         elif self.is_success():
             self.error = ErrorEnum.ERROR_9999
         raise Exception
     return results
Esempio n. 14
0
 def operation_data_process(self):
     Logging.info(self.data_list)
     Logging.info('operation_data_process')