def run_task(self, module, task, response): """ Processing the task, catching exceptions and logs, return a `ProcessorResult` object """ logger = module.logger result = None exception = None stdout = sys.stdout self.task = task if isinstance(response, dict): response = rebuild_response(response) self.response = response self.save = (task.get('track') or {}).get('save', {}) try: if self.__env__.get('enable_stdout_capture', True): sys.stdout = ListO(module.log_buffer) self._reset() result = self._run_task(task, response) if inspect.isgenerator(result): for r in result: self._run_func(self.on_result, r, response, task) else: self._run_func(self.on_result, result, response, task) except Exception as e: logger.exception(e) exception = e finally: follows = self._follows messages = self._messages logs = list(module.log_buffer) extinfo = self._extinfo save = self.save sys.stdout = stdout self.task = None self.response = None self.save = None module.log_buffer[:] = [] return ProcessorResult(result, follows, messages, logs, exception, extinfo, save)
def run(project): start_time = time.time() try: task = utils.decode_unicode_obj(json.loads(request.form['task'])) except Exception: result = { 'fetch_result': "", 'logs': u'task json error', 'follows': [], 'messages': [], 'result': None, 'time': time.time() - start_time, } return json.dumps(utils.unicode_obj(result)), \ 200, {'Content-Type': 'application/json'} project_info = { 'name': project, 'status': 'DEBUG', 'script': request.form['script'], } if request.form.get('webdav_mode') == 'true': projectdb = app.config['projectdb'] info = projectdb.get(project, fields=['name', 'script']) if not info: result = { 'fetch_result': "", 'logs': u' in wevdav mode, cannot load script', 'follows': [], 'messages': [], 'result': None, 'time': time.time() - start_time, } return json.dumps(utils.unicode_obj(result)), \ 200, {'Content-Type': 'application/json'} project_info['script'] = info['script'] fetch_result = {} try: fetch_result = app.config['fetch'](task) response = rebuild_response(fetch_result) module = ProjectManager.build_module(project_info, { 'debugger': True }) ret = module['instance'].run_task(module['module'], task, response) except Exception: type, value, tb = sys.exc_info() tb = utils.hide_me(tb, globals()) logs = ''.join(traceback.format_exception(type, value, tb)) result = { 'fetch_result': fetch_result, 'logs': logs, 'follows': [], 'messages': [], 'result': None, 'time': time.time() - start_time, } else: result = { 'fetch_result': fetch_result, 'logs': ret.logstr(), 'follows': ret.follows, 'messages': ret.messages, 'result': ret.result, 'time': time.time() - start_time, } result['fetch_result']['content'] = response.text if (response.headers.get('content-type', '').startswith('image')): result['fetch_result']['dataurl'] = dataurl.encode( response.content, response.headers['content-type']) try: # binary data can't encode to JSON, encode result as unicode obj # before send it to frontend return json.dumps(utils.unicode_obj(result)), 200, {'Content-Type': 'application/json'} except Exception: type, value, tb = sys.exc_info() tb = utils.hide_me(tb, globals()) logs = ''.join(traceback.format_exception(type, value, tb)) result = { 'fetch_result': "", 'logs': logs, 'follows': [], 'messages': [], 'result': None, 'time': time.time() - start_time, } return json.dumps(utils.unicode_obj(result)), 200, {'Content-Type': 'application/json'}
def on_task(self, task, response): """Deal one task""" start_time = time.time() response = rebuild_response(response) try: assert "taskid" in task, "need taskid in task" project = task["project"] updatetime = task.get("project_updatetime", None) md5sum = task.get("project_md5sum", None) project_data = self.project_manager.get(project, updatetime, md5sum) assert project_data, "no such project!" if project_data.get("exception"): ret = ProcessorResult(logs=(project_data.get("exception_log"),), exception=project_data["exception"]) else: ret = project_data["instance"].run_task(project_data["module"], task, response) except Exception as e: logstr = traceback.format_exc() ret = ProcessorResult(logs=(logstr,), exception=e) process_time = time.time() - start_time if not ret.extinfo.get("not_send_status", False): if ret.exception: track_headers = dict(response.headers) else: track_headers = {} for name in ("etag", "last-modified"): if name not in response.headers: continue track_headers[name] = response.headers[name] status_pack = { "taskid": task["taskid"], "project": task["project"], "url": task.get("url"), "track": { "fetch": { "ok": response.isok(), "redirect_url": response.url if response.url != response.orig_url else None, "time": response.time, "error": response.error, "status_code": response.status_code, "encoding": response.encoding, "headers": track_headers, "content": response.text[:500] if ret.exception else None, }, "process": { "ok": not ret.exception, "time": process_time, "follows": len(ret.follows), "result": (None if ret.result is None else utils.text(ret.result)[: self.RESULT_RESULT_LIMIT]), "logs": ret.logstr()[-self.RESULT_LOGS_LIMIT :], "exception": ret.exception, }, "save": ret.save, }, } if "schedule" in task: status_pack["schedule"] = task["schedule"] # FIXME: unicode_obj should used in scheduler before store to database # it's used here for performance. self.status_queue.put(utils.unicode_obj(status_pack)) # FIXME: unicode_obj should used in scheduler before store to database # it's used here for performance. if ret.follows: self.newtask_queue.put([utils.unicode_obj(newtask) for newtask in ret.follows]) for project, msg, url in ret.messages: try: self.on_task( { "taskid": utils.md5string(url), "project": project, "url": url, "process": {"callback": "_on_message"}, }, {"status_code": 200, "url": url, "save": (task["project"], msg)}, ) except Exception as e: logger.exception("Sending message error.") continue if ret.exception: logger_func = logger.error else: logger_func = logger.info logger_func( "process %s:%s %s -> [%d] len:%d -> result:%.10r fol:%d msg:%d err:%r" % ( task["project"], task["taskid"], task.get("url"), response.status_code, len(response.content), ret.result, len(ret.follows), len(ret.messages), ret.exception, ) ) return True