def task_finish(self): """ 任务执行结束检测 1.等待任务执行结束,任务队列中无任务且没有进行中的任务 2.执行任务结束后的任务,监控报告发送 :return: """ while True: sql = 'select status, id, topic, job_params from t_job where status in (0,1);' jobs = self.db.query(sql) # TODO 任务设置超时,强制终止时间机制实现 finish_flag = True if len(jobs) <= 0 else False if finish_flag: Logging.info('所有任务执行完成!') # TODO 统一调度入库操作 # TODO 监控告警操作 pass task_waiting = 0 task_running = 0 for x in jobs: if x[0] == 0: task_waiting = task_waiting + 1 if x[0] == 1: task_running = task_running + 1 Logging.info('heartbeat 待执行任务数:', task_waiting, '执行中任务数:', task_running) sleep(30)
def operation_page(self): self.driver.get( 'https://branding.taobao.com/#!/report/index?productid=101005202&effect=15&startdate=2019-06-05&enddate=2019-06-19' ) Time.sleep(3) self.driver.find_element_by_xpath( '//*[@id="brix_12290"]/div[4]/a').click() Time.sleep(3) self.wait_download_finish() Logging.info(self.source_data_list) Logging.info('end')
def get_task(self): """获取任务""" # TODO 数据库事务操作 sql = 'select id, job_params from t_job where status = 0 order by job_sort,RAND();' jobs = self.db.query(sql) if len(jobs) > 0: job = jobs[0] Logging.info('总任务数:', len(jobs), ' 获取任务:', job) job_id = int(job[0]) store_id = int(job[1].split('|')[0]) _page_data_ids = job[1].split('|')[1].split(',') _page_data_ids.remove('') shuffle(_page_data_ids) page_data_ids = [] for s in _page_data_ids: page_data_ids.append(int(s)) return job_id, store_id, page_data_ids return None, None, None
def init_web_driver(self): """ 根据端口获取浏览器driver :return: True/False """ try: chrome_options = Options() chrome_options.add_experimental_option( "debuggerAddress", "127.0.0.1:{}".format(self.port)) self.driver = webdriver.Chrome(chrome_options=chrome_options) Logging.info('{} - Chrome[{}]连接成功。'.format(self.store.name, self.port)) except Exception as e: print(e) Logging.error('port:{} 无法接管浏览器'.format(self.port)) self.error = ErrorEnum.ERROR_1003 raise Exception return True
def __init__(self, name, param={}): """ 对象/任务实例化 :param name: 对象标识,规则:从目录至最终对象,handle.xxx.Obj :param param: 对象实例化参数,类型:dict """ self.error = None self.obj = None self.obj_name = name self.obj_param = param try: Logging.info(self.obj_name, self.obj_param, ' 实例化 start!') if self.obj_name == 'handle.task_creator.TaskCreator': self.obj = TaskCreator() elif self.obj_name == 'handle.login.tb_login.TaoLogin': try: self.obj = tb_login() except Exception as e: Logging.error(e) self.error = ErrorEnum.ERROR_2000 # ========================== 抓取页面实例配置 START ========================== elif self.obj_name == 'handle.website.subway.report.SubReportDay': self.obj = SpreadReportDay(self.obj_param['store_id'], self.obj_param['page_data_id'], self.obj_param['port']) elif self.obj_name == 'handle.website.subway.direct_report.SpreadReportDay': self.obj = SpreadReportDay1(self.obj_param['store_id'], self.obj_param['page_data_id'], self.obj_param['port']) # ========================== 抓取页面实例配置 END ========================== else: self.error = ErrorEnum.ERROR_9001 self.error.value.set_msg(('未匹配到任务实例 name:' + self.obj_name + ',param:' + self.obj_param)) if self.is_success(): Logging.info(self.obj_name, self.obj_param, ' 实例化成功 end!') else: Logging.info(self.obj_name, self.obj_param, ' 实例化失败 error:', self.error, ' end!') except Exception as e: Logging.error(e) if self.is_success() and self.obj and self.obj.error: self.error = self.obj.error elif self.is_success(): self.error = ErrorEnum.ERROR_9999
def insert_many(self, sql, data_list): Logging.info('db.insert_many sql:', sql, data_list) self.db_cur.executemany(sql, data_list)
def delete(self, sql): Logging.info('db.delete sql:', sql) self.db_cur.execute(sql)
def insert(self, sql, tuple_data): Logging.info('db.insert sql:', sql, tuple_data) self.db_cur.execute(sql, tuple_data) data = self.query('select last_insert_id() as id') key = data[0][0] return key
def execute(self, sql): Logging.info('db.execute sql:', sql) result = self.db_cur.execute(sql) self.commit() return result
def query(self, sql): Logging.info('db.query sql:', sql) self.db_cur.execute(sql) data = self.db_cur.fetchall() return data
def wait_download_finish(self, file_type=None): """ 根据文件前缀规则匹配,文件是否下载完成 :param file_type: :return: """ # 文件下载超时3分钟 timeout_num = 180 while timeout_num >= 0: # 匹配到的文件数量 match_file_cnt = 0 files = os.listdir(self.FILE_DOWNLOAD_PATH) for file in files: file_path = os.path.join(self.FILE_DOWNLOAD_PATH, file) # 文件下载中,文件后缀 if '.crdownload' in file or '.tmp' in file: Time.sleep(1) timeout_num = timeout_num - 1 continue match_file_cnt = 0 if self.page_data.rule_read_file_prefix is None and os.path.isfile( file_path): match_file_cnt = match_file_cnt + 1 elif file.find(self.page_data.rule_read_file_prefix ) == 0 and os.path.isfile(file_path): match_file_cnt = match_file_cnt + 1 if match_file_cnt == 0: Time.sleep(1) timeout_num = timeout_num - 1 continue elif match_file_cnt == 1: self.file_names.append(file) # 将文件移到处理目录 if self.page_data.rule_save_path_suffix is None: file_process_path = self.FILE_PROCESS_PATH else: path_suffix = self.page_data.rule_save_path_suffix for key in self.data_dimension_dict.keys(): path_suffix = path_suffix.replace( key, self.data_dimension_dict[key]) file_process_path = self.FILE_PROCESS_PATH + '/' + path_suffix if not os.path.exists(file_process_path): os.makedirs(file_process_path) remote_path = os.path.join(file_process_path, file) # TODO 目标文件已存在文件需重命名,时间戳.原文件名 if os.path.exists(remote_path): os.remove(remote_path) shutil.move(file_path, remote_path) # 移动文件 Logging.info("move %s -> %s" % (file_path, remote_path)) # 文件读取 # TODO 解压文件操作,多文件、多sheet操作 # TODO 通用需要文件类型配置,常规文件类型支持 if file_type is None: if file[-3:] == 'csv': file_type = 'csv' elif file[-3:] == 'xls' or file[-4:] == 'xlsx': file_type = 'excel' if file_type == 'excel': df = pd.read_excel(remote_path) elif file_type == 'csv': df = pd.read_csv(remote_path) else: Logging.error('解析文件类型,未找到!') raise Exception('解析文件类型,未找到!') self.source_data_list.append(df) return True else: raise Exception('文件下载失败') return False
def worker_task_run(): tc = TaskController('handle.task_creator.TaskCreator') job_id, store_id, page_data_ids = tc.run('get_task') while job_id: flag = tc.run('task_set_start', {'job_id': job_id}) # 任务获取成功 if not flag: # 继续获取任务 Logging.info('job:', job_id, store_id, page_data_ids, ' 任务领取慢了一拍,继续获取其他任务!') job_id, store_id, page_data_ids = tc.run('get_task') continue try: port = None for page_data_id in page_data_ids: # step1:Worker:取数-初始化任务 param = { 'store_id': store_id, 'page_data_id': page_data_id, 'port': port, 'job_id': job_id } task = TaskController( 'handle.website.subway.report.SubReportDay', param) # 店铺未登录 if not task.obj.login_flag: # step2:Worker:取数-登录操作 login_tc = TaskController('handle.login.tb_login.TaoLogin', task.store) login_tc.run('run') if login_tc.is_success(): port = login_tc.port param['port'] = port task = TaskController( 'handle.website.subway.report.SubReportDay', param) else: Logging.error('param:', param, '登录失败!') raise Exception('param:', param, '登录失败!') if not task.is_success(): Logging.error('param:', param, '任务初始化失败!') raise Exception('param:', param, '任务初始化失败!') try: # step3:Worker:取数-页面操作 task.run('operation_page') if not task.is_success(): Logging.error('param:', param, '取数-页面操作失败!') raise Exception('param:', param, '取数-页面操作失败!') # step4:Worker:取数-页面文件下载及读取 task.run('operation_page_download') if not task.is_success(): Logging.error('param:', param, '取数-页面文件下载及读取失败!') raise Exception('param:', param, '取数-页面文件下载及读取失败!') # step5:Worker:取数-数据处理 task.run('operation_data_process') if not task.is_success(): Logging.error('param:', param, '取数-数据处理失败!') raise Exception('param:', param, '取数-数据处理失败!') # step6:Worker:取数-数据入库 task.run('operation_data_input') if not task.is_success(): Logging.error('param:', param, '取数-数据入库失败!') raise Exception('param:', param, '取数-数据入库失败!') # step7:Worker:取数-数据备份 task.run('operation_data_backup') if not task.is_success(): Logging.error('param:', param, '取数-数据备份失败!') raise Exception('param:', param, '取数-数据备份失败!') except Exception as e: Logging.error(e) Logging.error('param:', param, ' 页面取数过程失败!') tc.run('task_set_end', {'job_id': job_id, 'result': 'success'}) except Exception as e: Logging.error(e) Logging.error('job_id:', job_id, ' 任务执行失败!') tc.run('task_set_end', {'job_id': job_id, 'result': 'fail'}) # 继续获取任务 job_id, store_id, page_data_ids = tc.run('get_task')
def run(self, func, param={}): """ 对象任务执行调度控制模板 :param func: :return: """ results = None try: Logging.info(self.obj_name, func, param, ' 步骤执行 start!') if self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_init': results = self.obj.task_init() elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_added': results = self.obj.task_added() elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'get_task': results = self.obj.get_task() elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_finish': results = self.obj.task_finish() elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_set_start': results = self.obj.task_set_start(param) elif self.obj_name == 'handle.task_creator.TaskCreator' and func == 'task_set_end': results = self.obj.task_set_end(param) elif self.obj_name == 'handle.login.tb_login.TaoLogin' and func == 'run': results = self.obj.run(param) elif self.obj_name.find( 'handle.website') == 0 and func == 'operation_page': try: results = self.obj.operation_page() except Exception as e: Logging.error(e) self.error = ErrorEnum.ERROR_3000 elif self.obj_name.find( 'handle.website' ) == 0 and func == 'operation_data_process': try: results = self.obj.operation_data_process() except Exception as e: Logging.error(e) self.error = ErrorEnum.ERROR_4000 elif self.obj_name.find( 'handle.website') == 0 and func == 'operation_data_input': try: results = self.obj.operation_data_input() except Exception as e: Logging.error(e) self.error = ErrorEnum.ERROR_5000 elif self.obj_name.find( 'handle.website') == 0 and func == 'operation_data_backup': try: results = self.obj.operation_data_backup() except Exception as e: Logging.error(e) self.error = ErrorEnum.ERROR_6000 else: self.error = ErrorEnum.ERROR_9002 self.error.value.set_msg( ('未匹配到任务func name:' + self.obj_name + ',func:' + func)) if self.is_success(): Logging.info(self.obj_name, func, param, ' 步骤执行成功 end!') else: Logging.info(self.obj_name, func, param, ' 步骤执行失败 error:', self.error, ' end!') except Exception as e: Logging.error(e) if self.is_success() and self.obj and self.obj.error: self.error = self.obj.error elif self.is_success(): self.error = ErrorEnum.ERROR_9999 raise Exception return results
def operation_data_process(self): Logging.info(self.data_list) Logging.info('operation_data_process')