def do_doom(self, t, code, queue_at, deadline): logger = logging.getLogger('mwtm_cleaner') if queue_at != None and (deadline == None or deadline == 0 or \ deadline > queue_at): logger.debug('to retry task %s, queue at %s', t.uuid, queue_at) yield db_execute(RETRY_TASK, queue_at, code, t.id) #yield db_execute(RENEW_EVENT, t.uuid, 'retry') g_logger.info( trans2json(message="task_uuid:%s, " "site_asset_id:%s, deadline:%s, external_id:%s " % (t.uuid, t.site_asset_id, deadline, t.external_id), action="retry task")) else: logger.debug('to fail task %s', t.uuid) g_logger.info( trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s" % (t.uuid, t.site_asset_id, t.external_id), action="to fail task")) rc, _ = yield db_query(CHECK_TASK, t.id) if rc <= 0: yield db_execute(FAIL_TASK, code, t.id) self.send_matches(t, unrecognized=True) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) stats.incr(QUERY_FAILED, 1)
def checkParams(self, res): if res.has_key("params"): res = res['params'] self.parent_info = res['parent_info'] if \ res.has_key('parent_info') else [] self.match_type = res['match_type'] if\ res.has_key('match_type') else None self.matches = res['matches'] if\ res.has_key('matches') else [] self.extra_info = res['extra_info'] if\ res.has_key('extra_info') else None self.crr = res['notification'] if\ res.has_key('notification') else None self.url = res['extra_info_url'] if\ res.has_key('extra_info_url') else None if res.has_key('match_type') and res['match_type']!='match': self.matches = [] if not res.has_key('site_asset_id'): g_logger.info(trans2json(message='params is error , params has' ' no key site_asset_id', atcion='checkParams')) return False else : if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \ not isinstance(res['site_asset_id'], list): g_logger.info(trans2json(message="params is error , site_asset_id='' or []", atcion='checkParams')) return False else: self.site_asset_id = res['site_asset_id'] return True
def send_matches(self, task, matches=[], crr="", unrecognized=False): match_type = "no_match" if unrecognized: match_type = "unrecognized" elif len(matches): match_type = 'match' data = dict(id="null", jsonrpc="2.0", method="matches", params=dict(matches=matches, site_asset_id=eval(task.site_asset_id), notification=crr, match_type=match_type)) params = dict(source="auto_match") req = None try: req = requests.post(self.matches_server, params=params, data=json.dumps(data)) if req.status_code != 200: self.logger.error("send matches failed, code:%s", req.status_code) raise SendMatchesError("send matches faild, task_id:%s" % task.uuid) except RequestException: self.logger.error("send matches failed, %s", task.uuid, exc_info=True) raise SendMatchesError("send matches faild") self.logger.info("send matches success, task_uuid:%s, site_asset_id:%s," "external_id:%s", task.uuid, task.site_asset_id, task.external_id) g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s " % \ (task.uuid, task.site_asset_id, task.external_id), action="send matches success"))
def checkParams(self, res): if res.has_key("params"): res = res['params'] self.parent_info = res['parent_info'] if \ res.has_key('parent_info') else [] self.match_type = res['match_type'] if\ res.has_key('match_type') else None self.matches = res['matches'] if\ res.has_key('matches') else [] self.extra_info = res['extra_info'] if\ res.has_key('extra_info') else None self.crr = res['notification'] if\ res.has_key('notification') else None self.url = res['extra_info_url'] if\ res.has_key('extra_info_url') else None if res.has_key('match_type') and res['match_type'] != 'match': self.matches = [] if not res.has_key('site_asset_id'): g_logger.info( trans2json(message='params is error , params has' ' no key site_asset_id', atcion='checkParams')) return False else: if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \ not isinstance(res['site_asset_id'], list): g_logger.info( trans2json( message="params is error , site_asset_id='' or []", atcion='checkParams')) return False else: self.site_asset_id = res['site_asset_id'] return True
def process_task(self, body, message): self.logger.info('get task from broker :%s, type:%s', body, type(body)) g_logger.info( trans2json(message="get task from broker :" "%s" % str(body), action="fetchTask")) stats.incr(FETCH_TASKS, 1) if isinstance(body, dict): body = json.dumps(body) else: body = json.loads(body) if self.checkParams(body): try: self.process(config, body) message.ack() stats.incr(FINISHED_TASKS, 1) except sotreError: message.ack() stats.incr(DROP_TASKS, 1) g_logger.error( trans2json(message='some unexpected thing' ' happen, maybe db error ', action='store task to db')) self.logger.error('some unexpected thing happen ,' 'Error:', exc_info=True) else: self.logger.error("params is error: %s", body) g_logger.error( trans2json(message='message from mq , params is error')) stats.incr(DROP_TASKS, 1) message.ack()
def load(self): ''' :return code `None`, `dict` `None`: no such task `{}`: in progress `{"keys1":"value1"...}`:normal results ''' cache = self.load_from_cache() if cache is None: if self.has_parent(): self.prefix_search = True g_logger.info( trans2json(message="site_asset_id:%s, " "prefix_search:%s" % (self.site_asset_id, self.prefix_search), action="no hit cache")) tids = self.get_tids() self.logger.debug("task_ids: %s" % tids) format_data = None if tids == []: format_data = wrap_error(NO_TASK_ERROR['code'], NO_TASK_ERROR['message'], []) else: format_data = self.format_matches(tids) self.save_cache(format_data) return format_data else: self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" % (self.site_asset_id, cache)) return cache
def task_finished(self, celery_id, query_res): try: t = self.taskm[celery_id]['task'] self.logger.info("finished query, task_id:%s, " "site_asset_id: %s, celery_id:%s, " "ret: %s, err: %s " % (t.uuid, t.site_asset_id, celery_id, query_res.ret, query_res.err)) self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out) self.logger.debug("finished task info: %s" % str(t)) #parse query result g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s " % (t.site_asset_id, t.uuid, t.external_id), action="task finished query from celery")) if not isinstance(query_res, TaskRes): #means catch some exception self.cleaner.request((t, BAD_OUTPUT, None)) else: _, state, res = self.parse_query_res(query_res) self.cleaner.request((t, state, res)) except Exception, ex: self.cleaner.request((t, BAD_OUTPUT, None)) self.logger.error("task finished catch unhandle exception, " "task_uuid:%s" % t.uuid, exc_info=True)
def process_task(self, body, message): self.logger.info('get task from broker :%s, type:%s', body, type(body)) g_logger.info(trans2json(message="get task from broker :" "%s"%str(body), action ="fetchTask")) stats.incr(FETCH_TASKS, 1) if isinstance(body, dict): body = json.dumps(body) else: body = json.loads(body) if self.checkParams(body): try: self.process(config, body) message.ack() stats.incr(FINISHED_TASKS, 1) except sotreError: message.ack() stats.incr(DROP_TASKS, 1) g_logger.error(trans2json(message= 'some unexpected thing' ' happen, maybe db error ', action = 'store task to db')) self.logger.error('some unexpected thing happen ,' 'Error:', exc_info=True ) else: self.logger.error("params is error: %s" , body) g_logger.error(trans2json(message= 'message from mq , params is error')) stats.incr(DROP_TASKS, 1) message.ack()
def load(self): ''' :return code `None`, `dict` `None`: no such task `{}`: in progress `{"keys1":"value1"...}`:normal results ''' cache = self.load_from_cache() if cache is None: if self.has_parent(): self.prefix_search = True g_logger.info(trans2json(message="site_asset_id:%s, " "prefix_search:%s" % (self.site_asset_id, self.prefix_search), action="no hit cache")) tids = self.get_tids() self.logger.debug("task_ids: %s" % tids) format_data = None if tids == []: format_data = wrap_error(NO_TASK_ERROR['code'], NO_TASK_ERROR['message'], []) else: format_data = self.format_matches(tids) self.save_cache(format_data) return format_data else: self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" % (self.site_asset_id, cache)) return cache
def run(queue): pro = producer() push = pusher(pro) while True: t = queue.get(block=True) logger.info('get a task to push, task_id: %s', t) g_logger.info( trans2json(message='get a task to push, task_id: %s' % t, atcion='get task')) task = getTask(t) logger.info('------task:%s', task) try: re = push.getMatch(t) #logger.info('type:%s, %s', type(re), re) push.pushResult(re) updateFinished(t, 'success') dropUnpush(t) logger.info( 'succeed to push the match result , task_id: %s,' 'result: %s', t, re) g_logger.info( trans2json(message='succeed to push the match result ,' 'external_id :%s, task_id: %s' % ( task['i:external_id'], t, ), action='push result')) except resultError: logger.error("failed to get the result , task_id: %s", t) g_logger.error( trans2json(message='failed to push result,' 'external_id: %s, task_id: %s' % (task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except AssertionError: logger.error("failed to get matches, task_id: %s", t) g_logger.error( trans2json(message='failed to push result,' 'external_id: %s, task_id: %s' % (task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except: logger.error( 'failed to push result, rest status to new,' 'task_id: %s, Error:', t, exc_info=True) g_logger.error( trans2json(message='failed to push result, res status' 'to new, external_id: %s, task_id: %s ' % (task['i:external_id'], t), action='retry to push')) changeStatus(t, 'new') logger.error("reset status to new , task_id: %s", t)
def start(self): g_logger.info('wait for time:{}'.format(self.start_date_time)) while True: if self.local_time() >= self.start_date_time_ms: g_logger.info('run') break else: time.sleep(self.sleep_interval)
def process(self, config, body): #db_txn(pool, partial(store), data, cre) self.logger.info('start to process message') task_id = None task = defaultdict(list) try: #self.process(config, task) req = requests.post(config['matches_server'] + '?source=init', data=json.dumps(body)) self.logger.info('get request :%s, type:%s', req.content, type(req.content)) res = json.loads(req.content) task_id = res['result']['task_id'] parseParams(body, task) if res['result']['source'] == 'auto_match': db_txn(pool, partial(updateStatus), task_id, task['site_asset_id'], task['external_id']) self.logger.info( 'this task has already been in hbase reset' ' status to new , site_asset_ids : %s', task['site_asset_id']) g_logger.info( trans2json( message='task has already been in' ' hbase, external_id:%s, site_asset_id:%s, task_id:%s' % ( task['external_id'], task['site_asset_id'], task_id, ), action="reset status to new")) else: genTask(task) task['task_uuid'] = task_id db_txn(pool, partial(storeTaskMysql), task) g_logger.info( trans2json(message="succeed to store task external_id:%s," " site_asset_id:%s, task_id:%s" % (task['external_id'], task['site_asset_id'], task['task_uuid']), action='store task to db')) except: self.logger.info( 'failed to store task, start to retry, task_uuid: %s' ' site_asset_id: %s', task_id, task['site_asset_id'], exc_info=True) g_logger.error( trans2json( message='failed to store task, start to' ' retry ,external_id:%s, site_asset_id: %s, task_id: %s' % (task['external_id'], task['site_asset_id'], task_id), action='store task to db')) raise sotreError
def pick(self, t): self.picked[t.account].add(t.id) if t.account in self.fetching: self.fetching[t.account].add(t.id) self.manager.request(t) self.logger.info('picked task %s for account %s, site_asset_id %s', t.uuid, t.account, t.site_asset_id) g_logger.info(trans2json(action="picked task to query", message="task_uuid: %s, site_asset_id: %s, external_id: %s" % \ (t.uuid, t.site_asset_id, t.external_id))) stats.incr(PICKED, 1)
def store_result(self): task_id = None source = None res = self.get_sid_info() if res: source = res['t:source'] self.logger.info('start update res:%s', res) task_id = res['t:task_uuid'] self.logger.info('task_id:%s', task_id) if self.source == 'manual'\ or self.source == 'auto_match' \ or self.source == 'init'\ or (res['t:source'] == 'manual_tmp'and self.source =='manual_tmp')\ or (self.source == 'manual_tmp' and res['t:source'] ==\ 'auto_match' and not self.check_finished(task_id)): self.logger.info('source:%s, type:%s, check_finished:%s', self.source, type(self.source), self.check_finished(task_id)) self.update_result(self.source, task_id, self.match_type, self.matches, self.extra_info, self.url, self.site_asset_id) if not self.has_parent(self.site_asset_id): self.save_redis(task_id, self.source, self.match_type) g_logger.info( trans2json(message='succeed to update result,' 'task_id:%s, match_type:%s, source:%s' % (task_id, self.source, self.match_type), atcion='update result')) else: self.logger.info('start to store new task') task_id = str(uuid.uuid1()) self.store_sid_tid(self.site_asset_id, task_id, self.source) self.store_tid_sid(task_id, self.site_asset_id) self.save_matches(self.match_type, task_id, self.matches) self.store_task_info(task_id, self.extra_info, self.url) self.logger.info('succeed to store task: %s, task_id %s', self.site_asset_id, task_id) #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'}) #storeFinished('finished', ) #self.store_unpush(task_id, 'new') self.save_redis(task_id, self.source, self.match_type) #if self.source !='init' and self.source != 'manual_tmp': # self.store_unpush(task_id, 'new') if self.parent_info: for i in self.parent_info: for k, v in i.items(): redis_conn.delete(k + "-s") self.store_parent_task(k, v) self.record_result_statsd() return task_id, source
def buf_tasks(self, reqs): accs = self.accounts() backends = self.backends() for t in reqs: try: self.logger.info("receivce task from picker, task_uuid: %s, " "site_asset_id: %s" % (t.uuid, t.site_asset_id)) self.logger.debug("receive task info:%s" % t._asdict()) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="receive picked task")) if not self.task_check(t, accs, backends): self.reply(t) continue acc = accs[t.account]._asdict() acc["backends"] = [v._asdict() for v in acc["backends"]] backs = {} for k, v in backends.iteritems(): backs[k] = v._asdict() self.logger.debug("add task's account: %s, backends: %s" % (acc, backs)) ct = Task().query.delay(t._asdict(), acc, backs) self.taskm[ct.task_id]['celery_task'] = ct self.taskm[ct.task_id]['task'] = t self.tasks.add(ct) self.logger.info("add task to celery, task_uuid: %s, " "site_asset_id: %s, celery_uuid: %s " % \ (t.uuid, t.site_asset_id, ct.task_id)) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="add task to celery")) except Exception, ex: self.reply(t) self.logger.error("catch exception from buf tasks, " "task_uuid: %s , site_asset_id: %s" % (t.uuid, t.site_asset_id), exc_info=True) continue try: db_txn(self.pool, partial(self.record, t)) except Exception: self.logger.error("failed to record execution for task %s" % t.uuid)
def store_result(self): task_id = None source = None res = self.get_sid_info() if res: source = res['t:source'] self.logger.info('start update res:%s', res) task_id = res['t:task_uuid'] self.logger.info('task_id:%s', task_id) if self.source == 'manual'\ or self.source == 'auto_match' \ or self.source == 'init'\ or (res['t:source'] == 'manual_tmp'and self.source =='manual_tmp')\ or (self.source == 'manual_tmp' and res['t:source'] ==\ 'auto_match' and not self.check_finished(task_id)): self.logger.info('source:%s, type:%s, check_finished:%s', self.source, type(self.source), self.check_finished(task_id)) self.update_result(self.source, task_id, self.match_type, self.matches, self.extra_info, self.url, self.site_asset_id) if not self.has_parent(self.site_asset_id): self.save_redis(task_id, self.source, self.match_type) g_logger.info(trans2json(message='succeed to update result,' 'task_id:%s, match_type:%s, source:%s'%(task_id, self.source, self.match_type), atcion='update result')) else: self.logger.info('start to store new task') task_id = str(uuid.uuid1()) self.store_sid_tid(self.site_asset_id, task_id, self.source) self.store_tid_sid(task_id, self.site_asset_id) self.save_matches(self.match_type, task_id, self.matches) self.store_task_info(task_id, self.extra_info, self.url) self.logger.info('succeed to store task: %s, task_id %s', self.site_asset_id, task_id) #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'}) #storeFinished('finished', ) #self.store_unpush(task_id, 'new') self.save_redis(task_id, self.source, self.match_type) #if self.source !='init' and self.source != 'manual_tmp': # self.store_unpush(task_id, 'new') if self.parent_info: for i in self.parent_info: for k, v in i.items(): redis_conn.delete(k+"-s") self.store_parent_task(k, v) self.record_result_statsd() return task_id, source
def finish(self, t, p, res): self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s', t.uuid, t.site_asset_id) self.logger.debug("res:%s " % str(res)) assert res.matches != None code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH if code == WITHOUT_MATCH: try: if db_txn(self.pool, partial(self.check_matches, t)): code = WITH_MATCH except: pass tr = 'match' if code == WITH_MATCH else 'no_match' self.logger.debug('record finished task %s, site_asset_id: %s', t.uuid, t.site_asset_id) try: ms = self.filter_matches(res.matches) for m in ms: g_logger.info( trans2json( message="company_id:%s, " "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" % (t.account, m['meta_uuid'], m['instance_id'], m['company_id']), action='matches info')) mc = len(ms) #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr) #m.save() self.send_matches(t, ms, res.crr) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) db_txn(self.pool, partial(self.update_task, t, code, mc, tr)) except: self.logger.error('failed to finish task: %s, site_asset_id: %s' % (t.uuid, t.site_asset_id), exc_info=True) # dooming may succeed, as it touches fewer tables self.doom(t, INTERNAL, p, res) return g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="task query complete")) stats.incr(QUERY_SUCCESS, 1)
def POST(self): try: stats.incr(RESULT_INSERT, 1) web.header("Content-Type", "application/json") res = web.data() req = web.input() self.logger.info('input:%s', req) self.source = req.get("source", "") res = json.loads(res) self.logger.debug('get input :%s', req) self.logger.debug('get message :%s', res) g_logger.info( trans2json(message='get message ,input: %s, ' 'msg: %s' % (req, res), action='get resquest post')) if not self.checkParams(res): self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest( wrap_error(self.error_code, self.error_msg, self.error_data)) ins = insert(self.site_asset_id, self.source, self.match_type, self.matches, self.extra_info, self.crr, self.url, self.parent_info) tid, src = ins.store_result() g_logger.info( trans2json(message='reply to caller, task_id:%s,' 'source:%s' % (tid, src), action='reply to caller')) stats.incr(RESULT_INSERT_SUCCESS, 1) if self.source == 'init': return init_result(tid, src) else: return insert_result() except Exception: stats.incr(RESULT_INSERT_FAILED, 1) self.logger.error("va-interface catch unhandle exception", exc_info=True) self.error_code = INTERNAL_ERROR["code"] self.error_msg = INTERNAL_ERROR["message"] raise web.internalerror(message=wrap_error( self.error_code, self.error_msg, self.error_data))
def run(queue): pro = producer() push = pusher(pro) while True: t = queue.get(block=True) logger.info('get a task to push, task_id: %s', t) g_logger.info(trans2json(message ='get a task to push, task_id: %s'%t, atcion= 'get task')) task = getTask(t) logger.info('------task:%s', task) try: re = push.getMatch(t) #logger.info('type:%s, %s', type(re), re) push.pushResult(re) updateFinished(t, 'success') dropUnpush(t) logger.info('succeed to push the match result , task_id: %s,' 'result: %s', t, re) g_logger.info(trans2json(message='succeed to push the match result ,' 'external_id :%s, task_id: %s' %(task['i:external_id'], t, ), action='push result')) except resultError: logger.error("failed to get the result , task_id: %s", t) g_logger.error(trans2json(message ='failed to push result,' 'external_id: %s, task_id: %s' %(task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except AssertionError: logger.error("failed to get matches, task_id: %s", t) g_logger.error(trans2json(message ='failed to push result,' 'external_id: %s, task_id: %s' %(task['i:external_id'], t), action='push result')) dropUnpush(t) updateFinished(t, 'failed') except : logger.error('failed to push result, rest status to new,' 'task_id: %s, Error:', t, exc_info=True) g_logger.error(trans2json(message ='failed to push result, res status' 'to new, external_id: %s, task_id: %s ' %(task['i:external_id'], t), action = 'retry to push')) changeStatus(t, 'new') logger.error("reset status to new , task_id: %s", t)
def buf_tasks(self, reqs): accs = self.accounts() backends = self.backends() for t in reqs: try: self.logger.info("receivce task from picker, task_uuid: %s, " "site_asset_id: %s" % (t.uuid, t.site_asset_id)) self.logger.debug("receive task info:%s" % t._asdict()) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="receive picked task")) if not self.task_check(t, accs, backends): self.reply(t) continue acc = accs[t.account]._asdict() acc["backends"] = [v._asdict() for v in acc["backends"]] backs = {} for k, v in backends.iteritems(): backs[k] = v._asdict() self.logger.debug("add task's account: %s, backends: %s" % (acc, backs)) ct = Task().query.delay(t._asdict(), acc, backs) self.taskm[ct.task_id]['celery_task'] = ct self.taskm[ct.task_id]['task'] = t self.tasks.add(ct) self.logger.info("add task to celery, task_uuid: %s, " "site_asset_id: %s, celery_uuid: %s " % \ (t.uuid, t.site_asset_id, ct.task_id)) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="add task to celery")) except Exception, ex: self.reply(t) self.logger.error("catch exception from buf tasks, " "task_uuid: %s , site_asset_id: %s" % (t.uuid, t.site_asset_id), exc_info=True) continue try: db_txn(self.pool, partial(self.record, t)) except Exception: self.logger.error("failed to record execution for task %s" % t.uuid)
def GET(self): try: stats.incr(RECEIVE_REQUEST, 1) web.header("Content-Type", "application/json") req = web.input() self.site_asset_id = str(req.get('site_asset_id', "")) if req.get('all_matches', 'true').lower() == 'false': self.all_matches = False if self.site_asset_id == "": self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest( wrap_error(self.error_code, self.error_msg, self.error_data)) self.logger.info( "get history matches, site_asset_id: %s, " "all_matches: %s", self.site_asset_id, self.all_matches) g_logger.info(trans2json(message="site_asset_id: %s, " "all_matches: %s" % \ (self.site_asset_id, self.all_matches), action="get history matches")) mr = match_result(self.site_asset_id, self.all_matches) res = mr.load() self.logger.debug( "site_asset_id: %s," "all_matches: %s, history matches: %s", self.site_asset_id, self.all_matches, res) if res == None: g_logger.info( trans2json(message="site_asset_id:%s, " "all_matches: %s" % (self.site_asset_id, self.all_matches), action="no such task")) return res except web.BadRequest, e: stats.incr(REQUEST_ERROR, 1) self.logger.error("site_asset_id is null") raise e
def finish(self, t, p, res): self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s', t.uuid, t.site_asset_id) self.logger.debug("res:%s " % str(res)) assert res.matches != None code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH if code == WITHOUT_MATCH: try: if db_txn(self.pool, partial(self.check_matches, t)): code = WITH_MATCH except: pass tr = 'match' if code == WITH_MATCH else 'no_match' self.logger.debug('record finished task %s, site_asset_id: %s', t.uuid, t.site_asset_id) try: ms = self.filter_matches(res.matches) for m in ms: g_logger.info(trans2json(message="company_id:%s, " "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" % (t.account, m['meta_uuid'], m['instance_id'], m['company_id']), action='matches info')) mc = len(ms) #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr) #m.save() self.send_matches(t, ms, res.crr) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) db_txn(self.pool, partial(self.update_task, t, code, mc, tr)) except: self.logger.error('failed to finish task: %s, site_asset_id: %s' % (t.uuid, t.site_asset_id), exc_info=True) # dooming may succeed, as it touches fewer tables self.doom(t, INTERNAL, p, res) return g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id), action="task query complete")) stats.incr(QUERY_SUCCESS, 1)
def process(self, config, body): #db_txn(pool, partial(store), data, cre) self.logger.info('start to process message') task_id = None task = defaultdict(list) try : #self.process(config, task) req =requests.post(config['matches_server']+'?source=init', data= json.dumps(body)) self.logger.info('get request :%s, type:%s', req.content, type(req.content)) res = json.loads(req.content) task_id = res['result']['task_id'] parseParams(body, task) if res['result']['source'] == 'auto_match': db_txn(pool, partial(updateStatus), task_id, task['site_asset_id'], task['external_id']) self.logger.info('this task has already been in hbase reset' ' status to new , site_asset_ids : %s',task['site_asset_id']) g_logger.info(trans2json(message='task has already been in' ' hbase, external_id:%s, site_asset_id:%s, task_id:%s'% (task['external_id'], task['site_asset_id'], task_id, ), action = "reset status to new")) else: genTask(task) task['task_uuid'] = task_id db_txn(pool, partial(storeTaskMysql), task) g_logger.info(trans2json(message="succeed to store task external_id:%s," " site_asset_id:%s, task_id:%s"%(task['external_id'], task['site_asset_id'], task['task_uuid']), action = 'store task to db')) except : self.logger.info('failed to store task, start to retry, task_uuid: %s' ' site_asset_id: %s', task_id, task['site_asset_id'], exc_info=True ) g_logger.error(trans2json(message='failed to store task, start to' ' retry ,external_id:%s, site_asset_id: %s, task_id: %s' %(task['external_id'], task['site_asset_id'], task_id), action='store task to db')) raise sotreError
def task_finished(self, celery_id, query_res): try: t = self.taskm[celery_id]['task'] self.logger.info("finished query, task_id:%s, " "site_asset_id: %s, celery_id:%s, " "ret: %s, err: %s " % (t.uuid, t.site_asset_id, celery_id, query_res.ret, query_res.err)) self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out) self.logger.debug("finished task info: %s" % str(t)) #parse query result g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s " % (t.site_asset_id, t.uuid, t.external_id), action="task finished query from celery")) if not isinstance(query_res, TaskRes): #means catch some exception self.cleaner.request((t, BAD_OUTPUT, None)) else: _, state, res = self.parse_query_res(query_res) self.cleaner.request((t, state, res)) except Exception, ex: self.cleaner.request((t, BAD_OUTPUT, None)) self.logger.error("task finished catch unhandle exception, " "task_uuid:%s" % t.uuid, exc_info=True)
def do_doom(self, t, code, queue_at, deadline): logger = logging.getLogger('mwtm_cleaner') if queue_at != None and (deadline == None or deadline == 0 or \ deadline > queue_at): logger.debug('to retry task %s, queue at %s', t.uuid, queue_at) yield db_execute(RETRY_TASK, queue_at, code, t.id) #yield db_execute(RENEW_EVENT, t.uuid, 'retry') g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, deadline:%s, external_id:%s " % (t.uuid, t.site_asset_id, deadline, t.external_id), action="retry task")) else: logger.debug('to fail task %s', t.uuid) g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s" % (t.uuid, t.site_asset_id, t.external_id), action="to fail task")) rc, _ = yield db_query(CHECK_TASK, t.id) if rc <= 0: yield db_execute(FAIL_TASK, code, t.id) self.send_matches(t, unrecognized=True) task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid)) self.update_hbase_task(task_status) stats.incr(QUERY_FAILED, 1)
def POST(self): try: stats.incr(RESULT_INSERT, 1) web.header("Content-Type", "application/json") res = web.data() req = web.input() self.logger.info('input:%s', req) self.source = req.get("source","") res = json.loads(res) self.logger.debug('get input :%s', req) self.logger.debug('get message :%s', res) g_logger.info(trans2json(message='get message ,input: %s, ' 'msg: %s'%(req, res), action='get resquest post')) if not self.checkParams(res): self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest(wrap_error(self.error_code, self.error_msg, self.error_data)) ins = insert(self.site_asset_id, self.source, self.match_type, self.matches, self.extra_info, self.crr, self.url, self.parent_info) tid, src = ins.store_result() g_logger.info(trans2json(message='reply to caller, task_id:%s,' 'source:%s'%(tid, src), action='reply to caller')) stats.incr(RESULT_INSERT_SUCCESS, 1) if self.source == 'init': return init_result(tid, src) else: return insert_result() except Exception: stats.incr(RESULT_INSERT_FAILED, 1) self.logger.error("va-interface catch unhandle exception", exc_info=True) self.error_code = INTERNAL_ERROR["code"] self.error_msg = INTERNAL_ERROR["message"] raise web.internalerror(message=wrap_error(self.error_code, self.error_msg, self.error_data))
def GET(self): try: stats.incr(RECEIVE_REQUEST, 1) web.header("Content-Type", "application/json") req = web.input() self.site_asset_id = str(req.get('site_asset_id', "")) if req.get('all_matches', 'true').lower() == 'false': self.all_matches = False if self.site_asset_id == "": self.error_code = PARAMS_ERROR["code"] self.error_msg = PARAMS_ERROR["message"] self.error_data.append("site_asset_id") raise web.BadRequest(wrap_error(self.error_code, self.error_msg, self.error_data)) self.logger.info("get history matches, site_asset_id: %s, " "all_matches: %s", self.site_asset_id, self.all_matches) g_logger.info(trans2json(message="site_asset_id: %s, " "all_matches: %s" % \ (self.site_asset_id, self.all_matches), action="get history matches")) mr = match_result(self.site_asset_id, self.all_matches) res = mr.load() self.logger.debug("site_asset_id: %s," "all_matches: %s, history matches: %s", self.site_asset_id, self.all_matches, res) if res == None: g_logger.info(trans2json(message="site_asset_id:%s, " "all_matches: %s" % (self.site_asset_id, self.all_matches), action="no such task")) return res except web.BadRequest, e: stats.incr(REQUEST_ERROR, 1) self.logger.error("site_asset_id is null") raise e
def send_matches(self, task, matches=[], crr="", unrecognized=False): match_type = "no_match" if unrecognized: match_type = "unrecognized" elif len(matches): match_type = 'match' data = dict(id="null", jsonrpc="2.0", method="matches", params=dict(matches=matches, site_asset_id=eval(task.site_asset_id), notification=crr, match_type=match_type)) params = dict(source="auto_match") req = None try: req = requests.post(self.matches_server, params=params, data=json.dumps(data)) if req.status_code != 200: self.logger.error("send matches failed, code:%s", req.status_code) raise SendMatchesError("send matches faild, task_id:%s" % task.uuid) except RequestException: self.logger.error("send matches failed, %s", task.uuid, exc_info=True) raise SendMatchesError("send matches faild") self.logger.info( "send matches success, task_uuid:%s, site_asset_id:%s," "external_id:%s", task.uuid, task.site_asset_id, task.external_id) g_logger.info(trans2json(message="task_uuid:%s, " "site_asset_id:%s, external_id:%s " % \ (task.uuid, task.site_asset_id, task.external_id), action="send matches success"))
def query(t, account, backends): #account {..., backends:[{'extra':, 'account':, 'backend':}]} #backends {id:{'capacity':, 'id':, 'vddb_address':}} logger = logging.getLogger("mw_celery_task") try: logger.info("receive task to query: task_uuid: %s, " "site_asset_id: %s" % (t['uuid'], t['site_asset_id'])) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="get task from celery")) urls = [] for b in account['backends']: be = backends[b['backend']]#{'capacity':, 'id':, 'vddb_address':} opts = dict(level=b['level'], mode=b['mode'], extra=b['extra']) if account['hot'] == 'true': opts['hot_user'] = account['hot_user'] opts['hot_pass'] = account['hot_pass'] # if account['slicing'] == 'true': # opts['slice'] = p['slice_duration'] qs = urlencode(opts.items()) urls.append('vdna://%s:%s@%s/?%s' % (account['backend_user'], account['backend_pass'], be['vddb_address'], qs)) args = ['/'.join([os.getenv('MW_HOME'), 'lib', 'task_executor.py']), '--task', str(t['id']), '--task-uuid', t['uuid'], '--timestamp', str(t['created_at']), '--account', str(account['id']), '--site-domain', account['domain'], '--site-asset-id', t['site_asset_id'], '--clip-format', t['format'], '--dna-url', t['dna_url']] if account['slicing'] == 'true': args.extend(['--slice-duration', str(account['slice_duration'])]) if account['allow_partial'] == 'false': args.append('--fail-partial') # reverse query ingestion triggering is done by reverse_trigger.py # if account['do_reverse'] == 'true': # args.extend('--reverse-query') for u in urls: args.extend(['--backend', u]) if t['scope'] != None: for s in t['scope']: args.extend(['--scope', s]) for r in account['rules']: args.append('--' + r) except Exception: logger.error("generate command line failed, " "uuid: %s, site_asset_id: %s" % \ (t['uuid'], t['site_asset_id']), exc_info=True) logger.debug("task: %s, account: %s, backends: %s" % \ (t, account, backends)) raise TaskException(err="query failed, generate execute cmd failed") else: # bufsize=-1 usually means fully buffer the output, usually, ugh # please contact [email protected] if stdout is blocked proc = None try: start_time = time.time() proc = Popen(args, close_fds=True, stdout=PIPE, bufsize= -1) #row = db_txn(pool, partial(self.record, t, proc.pid)) logger.info("spawn a process to query, task_uuid: %s, " "site_asset_id: %s, just wait til finished" % (t['uuid'], t['site_asset_id'])) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="start query vddb")) out, err = proc.communicate() ret = proc.wait() logger.info("query finished, return to manager, " "task_uuid: %s, site_asset_id: %s, " "ret: %s, out: %s, err: %s" % (t['uuid'], t['site_asset_id'], ret, out, err)) g_logger.info(trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="query vddb finished")) end_time = time.time() stats.timing(QUERY_VDDB, int(end_time-start_time)*1000) return TaskRes(t, ret, out, err) except: logger.error("spawn process catch exception, uuid: %s, " "site_asset_id: %s, " % \ (t['uuid'], t['site_asset_id']), exc_info=True) logger.debug("task: %s, account: %s" % (t, account)) raise TaskException(err="query failed, spawn process failed")
def query(t, account, backends): #account {..., backends:[{'extra':, 'account':, 'backend':}]} #backends {id:{'capacity':, 'id':, 'vddb_address':}} logger = logging.getLogger("mw_celery_task") try: logger.info("receive task to query: task_uuid: %s, " "site_asset_id: %s" % (t['uuid'], t['site_asset_id'])) g_logger.info( trans2json(message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="get task from celery")) urls = [] for b in account['backends']: be = backends[ b['backend']] #{'capacity':, 'id':, 'vddb_address':} opts = dict(level=b['level'], mode=b['mode'], extra=b['extra']) if account['hot'] == 'true': opts['hot_user'] = account['hot_user'] opts['hot_pass'] = account['hot_pass'] # if account['slicing'] == 'true': # opts['slice'] = p['slice_duration'] qs = urlencode(opts.items()) urls.append('vdna://%s:%s@%s/?%s' % (account['backend_user'], account['backend_pass'], be['vddb_address'], qs)) args = [ '/'.join([os.getenv('MW_HOME'), 'lib', 'task_executor.py']), '--task', str(t['id']), '--task-uuid', t['uuid'], '--timestamp', str(t['created_at']), '--account', str(account['id']), '--site-domain', account['domain'], '--site-asset-id', t['site_asset_id'], '--clip-format', t['format'], '--dna-url', t['dna_url'] ] if account['slicing'] == 'true': args.extend( ['--slice-duration', str(account['slice_duration'])]) if account['allow_partial'] == 'false': args.append('--fail-partial') # reverse query ingestion triggering is done by reverse_trigger.py # if account['do_reverse'] == 'true': # args.extend('--reverse-query') for u in urls: args.extend(['--backend', u]) if t['scope'] != None: for s in t['scope']: args.extend(['--scope', s]) for r in account['rules']: args.append('--' + r) except Exception: logger.error("generate command line failed, " "uuid: %s, site_asset_id: %s" % \ (t['uuid'], t['site_asset_id']), exc_info=True) logger.debug("task: %s, account: %s, backends: %s" % \ (t, account, backends)) raise TaskException( err="query failed, generate execute cmd failed") else: # bufsize=-1 usually means fully buffer the output, usually, ugh # please contact [email protected] if stdout is blocked proc = None try: start_time = time.time() proc = Popen(args, close_fds=True, stdout=PIPE, bufsize=-1) #row = db_txn(pool, partial(self.record, t, proc.pid)) logger.info("spawn a process to query, task_uuid: %s, " "site_asset_id: %s, just wait til finished" % (t['uuid'], t['site_asset_id'])) g_logger.info( trans2json( message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="start query vddb")) out, err = proc.communicate() ret = proc.wait() logger.info("query finished, return to manager, " "task_uuid: %s, site_asset_id: %s, " "ret: %s, out: %s, err: %s" % (t['uuid'], t['site_asset_id'], ret, out, err)) g_logger.info( trans2json( message="site_asset_id:%s, " "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'], t['external_id']), action="query vddb finished")) end_time = time.time() stats.timing(QUERY_VDDB, int(end_time - start_time) * 1000) return TaskRes(t, ret, out, err) except: logger.error("spawn process catch exception, uuid: %s, " "site_asset_id: %s, " % \ (t['uuid'], t['site_asset_id']), exc_info=True) logger.debug("task: %s, account: %s" % (t, account)) raise TaskException(err="query failed, spawn process failed")