Ejemplo n.º 1
0
 def do_doom(self, t, code, queue_at, deadline):
     logger = logging.getLogger('mwtm_cleaner')
     if queue_at != None and (deadline == None or deadline == 0 or \
                              deadline > queue_at):
         logger.debug('to retry task %s, queue at %s', t.uuid, queue_at)
         yield db_execute(RETRY_TASK, queue_at, code, t.id)
         #yield db_execute(RENEW_EVENT, t.uuid, 'retry')
         g_logger.info(
             trans2json(message="task_uuid:%s, "
                        "site_asset_id:%s, deadline:%s, external_id:%s " %
                        (t.uuid, t.site_asset_id, deadline, t.external_id),
                        action="retry task"))
     else:
         logger.debug('to fail task %s', t.uuid)
         g_logger.info(
             trans2json(message="task_uuid:%s, "
                        "site_asset_id:%s, external_id:%s" %
                        (t.uuid, t.site_asset_id, t.external_id),
                        action="to fail task"))
         rc, _ = yield db_query(CHECK_TASK, t.id)
         if rc <= 0:
             yield db_execute(FAIL_TASK, code, t.id)
             self.send_matches(t, unrecognized=True)
             task_status = db_txn(self.pool,
                                  partial(self.load_task_status, t.uuid))
             self.update_hbase_task(task_status)
         stats.incr(QUERY_FAILED, 1)
Ejemplo n.º 2
0
 def checkParams(self, res):
     if res.has_key("params"):
         res = res['params']
         self.parent_info = res['parent_info'] if \
         res.has_key('parent_info') else []
         self.match_type = res['match_type'] if\
         res.has_key('match_type') else None
         self.matches = res['matches'] if\
         res.has_key('matches') else []
         self.extra_info = res['extra_info'] if\
         res.has_key('extra_info') else None
         self.crr = res['notification'] if\
         res.has_key('notification') else None
         self.url = res['extra_info_url'] if\
         res.has_key('extra_info_url') else None
         if res.has_key('match_type') and res['match_type']!='match':
             self.matches = []
         if not res.has_key('site_asset_id'):
             g_logger.info(trans2json(message='params is error , params has'
                  ' no key site_asset_id', atcion='checkParams'))
             return False
         else :
             if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \
                    not isinstance(res['site_asset_id'], list):
                 g_logger.info(trans2json(message="params is error , site_asset_id='' or []",
                     atcion='checkParams'))
                 return False
             else:
                 self.site_asset_id = res['site_asset_id']
     return True
Ejemplo n.º 3
0
 def process_task(self, body, message):
     self.logger.info('get task from broker :%s, type:%s', body,
                 type(body))
     g_logger.info(trans2json(message="get task from broker :"
             "%s"%str(body), action ="fetchTask"))
     stats.incr(FETCH_TASKS, 1)
     if isinstance(body, dict):
         body = json.dumps(body)
     else:
         body = json.loads(body)
     if self.checkParams(body):
         try:
             self.process(config, body)
             message.ack()
             stats.incr(FINISHED_TASKS, 1)
         except sotreError:
             message.ack()
             stats.incr(DROP_TASKS, 1)
             g_logger.error(trans2json(message= 'some unexpected thing'
                 ' happen, maybe db error ', action = 'store task to db'))
             self.logger.error('some unexpected thing happen ,'
                      'Error:', exc_info=True )
     else:
         self.logger.error("params is error: %s" , body)
         g_logger.error(trans2json(message= 'message from mq , params is error'))
         stats.incr(DROP_TASKS, 1)
         message.ack()
Ejemplo n.º 4
0
 def process_task(self, body, message):
     self.logger.info('get task from broker :%s, type:%s', body, type(body))
     g_logger.info(
         trans2json(message="get task from broker :"
                    "%s" % str(body),
                    action="fetchTask"))
     stats.incr(FETCH_TASKS, 1)
     if isinstance(body, dict):
         body = json.dumps(body)
     else:
         body = json.loads(body)
     if self.checkParams(body):
         try:
             self.process(config, body)
             message.ack()
             stats.incr(FINISHED_TASKS, 1)
         except sotreError:
             message.ack()
             stats.incr(DROP_TASKS, 1)
             g_logger.error(
                 trans2json(message='some unexpected thing'
                            ' happen, maybe db error ',
                            action='store task to db'))
             self.logger.error('some unexpected thing happen ,'
                               'Error:',
                               exc_info=True)
     else:
         self.logger.error("params is error: %s", body)
         g_logger.error(
             trans2json(message='message from mq , params is error'))
         stats.incr(DROP_TASKS, 1)
         message.ack()
Ejemplo n.º 5
0
 def process_task(self, body, message):
     try:
         data = body
         #data = json.loads(body)
         utils.digest = data['params']['digest']
         g_logger_info.info(
             trans2json("receive gateway task message %s" % (body),
                        'qb_push_receive_gateway'))
         #g_logger_info.info(trans2json("task_uuid:%s"%data['params']['external_id']))
         gv.statsd_conn.incr(
             "thunder.querybroker.qbpush.receive_gateway_message", 1)
         result = self.check_input_params(data)
         if result[0] != 0:
             error_message = self.trans_error_json(result, data)
             g_logger.error(trans2json("response info %s" % error_message))
         else:
             ret_code, result = query_hash(data)
             if ret_code is None:
                 self.send_task_priority_escalator(data)
         message.ack()
         return
     except Exception:
         g_logger.error(
             trans2json("process_task errors happend %s" %
                        str(traceback.format_exc())))
         message.ack()
Ejemplo n.º 6
0
def main():
    try:
        args = docopt.docopt(__doc__, version=gv.version)
        cfg_file = get_conf_abspath(args)
        check_conf_validation(cfg_file)
        cfg = parse_conf_file(cfg_file)
        init_logger(cfg)
        get_global_vars(cfg)
        init_mysql_keyword(cfg)
        init_statsd()
        init_redis()
    except:
        g_logger.error(traceback.format_exc())
        sys.exit(1)

    gv.dp.start()
    while True:
        with Connection(gv.taskpriorit_url) as conn:
            try:
                worker = Worker(conn, gv.taskpriorit_exchange, gv.taskpriorit_queue, gv.taskpriorit_routing_key)
                worker.run()
                g_logger.info(trans2json('task priority escalator start'))
            except Exception:
                g_logger.error(trans2json("task priority escalator %s happend!" % str(traceback.format_exc())))
    gv.dp.join()
Ejemplo n.º 7
0
 def checkParams(self, res):
     if res.has_key("params"):
         res = res['params']
         self.parent_info = res['parent_info'] if \
         res.has_key('parent_info') else []
         self.match_type = res['match_type'] if\
         res.has_key('match_type') else None
         self.matches = res['matches'] if\
         res.has_key('matches') else []
         self.extra_info = res['extra_info'] if\
         res.has_key('extra_info') else None
         self.crr = res['notification'] if\
         res.has_key('notification') else None
         self.url = res['extra_info_url'] if\
         res.has_key('extra_info_url') else None
         if res.has_key('match_type') and res['match_type'] != 'match':
             self.matches = []
         if not res.has_key('site_asset_id'):
             g_logger.info(
                 trans2json(message='params is error , params has'
                            ' no key site_asset_id',
                            atcion='checkParams'))
             return False
         else:
             if res['site_asset_id'] == "" or res['site_asset_id'] == [] or \
                    not isinstance(res['site_asset_id'], list):
                 g_logger.info(
                     trans2json(
                         message="params is error , site_asset_id='' or []",
                         atcion='checkParams'))
                 return False
             else:
                 self.site_asset_id = res['site_asset_id']
     return True
Ejemplo n.º 8
0
def run(queue):
    pro = producer()
    push = pusher(pro)
    while True:
        t = queue.get(block=True)
        logger.info('get a task to push, task_id: %s', t)
        g_logger.info(
            trans2json(message='get a task to push, task_id: %s' % t,
                       atcion='get task'))
        task = getTask(t)
        logger.info('------task:%s', task)
        try:
            re = push.getMatch(t)
            #logger.info('type:%s, %s', type(re), re)
            push.pushResult(re)
            updateFinished(t, 'success')
            dropUnpush(t)
            logger.info(
                'succeed to push the match result , task_id: %s,'
                'result: %s', t, re)
            g_logger.info(
                trans2json(message='succeed to push the match result ,'
                           'external_id :%s, task_id: %s' % (
                               task['i:external_id'],
                               t,
                           ),
                           action='push result'))
        except resultError:
            logger.error("failed to get the result , task_id: %s", t)
            g_logger.error(
                trans2json(message='failed to push result,'
                           'external_id: %s, task_id: %s' %
                           (task['i:external_id'], t),
                           action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except AssertionError:
            logger.error("failed to get matches, task_id: %s", t)
            g_logger.error(
                trans2json(message='failed to push result,'
                           'external_id: %s, task_id: %s' %
                           (task['i:external_id'], t),
                           action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except:
            logger.error(
                'failed to push result, rest status to new,'
                'task_id: %s,  Error:',
                t,
                exc_info=True)
            g_logger.error(
                trans2json(message='failed to push result, res status'
                           'to new, external_id: %s, task_id: %s ' %
                           (task['i:external_id'], t),
                           action='retry to push'))
            changeStatus(t, 'new')
            logger.error("reset status to new , task_id: %s", t)
Ejemplo n.º 9
0
 def process(self, config, body):
     #db_txn(pool, partial(store), data, cre)
     self.logger.info('start to process message')
     task_id = None
     task = defaultdict(list)
     try:
         #self.process(config, task)
         req = requests.post(config['matches_server'] + '?source=init',
                             data=json.dumps(body))
         self.logger.info('get request :%s, type:%s', req.content,
                          type(req.content))
         res = json.loads(req.content)
         task_id = res['result']['task_id']
         parseParams(body, task)
         if res['result']['source'] == 'auto_match':
             db_txn(pool, partial(updateStatus), task_id,
                    task['site_asset_id'], task['external_id'])
             self.logger.info(
                 'this task has already been in hbase reset'
                 ' status to new , site_asset_ids : %s',
                 task['site_asset_id'])
             g_logger.info(
                 trans2json(
                     message='task has already been in'
                     ' hbase, external_id:%s, site_asset_id:%s, task_id:%s'
                     % (
                         task['external_id'],
                         task['site_asset_id'],
                         task_id,
                     ),
                     action="reset status to new"))
         else:
             genTask(task)
             task['task_uuid'] = task_id
             db_txn(pool, partial(storeTaskMysql), task)
             g_logger.info(
                 trans2json(message="succeed to store task external_id:%s,"
                            " site_asset_id:%s, task_id:%s" %
                            (task['external_id'], task['site_asset_id'],
                             task['task_uuid']),
                            action='store task to db'))
     except:
         self.logger.info(
             'failed to store task, start to retry, task_uuid: %s'
             ' site_asset_id: %s',
             task_id,
             task['site_asset_id'],
             exc_info=True)
         g_logger.error(
             trans2json(
                 message='failed to store task, start to'
                 ' retry ,external_id:%s, site_asset_id: %s, task_id: %s' %
                 (task['external_id'], task['site_asset_id'], task_id),
                 action='store task to db'))
         raise sotreError
Ejemplo n.º 10
0
def connect_rabbitmq(mq_url, queue_name):
    try:
        exchange = Exchange(queue_name, type='fanout')
        queue = kombu.Queue(queue_name, exchange, routing_key=queue_name)
        #connection = Connection('amqp://*****:*****@localhost:5672//')
        g_logger.debug(
            trans2json('connect to %s, queue is %s' % (mq_url, queue_name)))
        connection = Connection(mq_url)
        return connection
    except Exception, msg:
        #cas_system_log('error', 'connect rabbitmq failed [%s]' %(msg))
        g_logger.error(trans2json('connect rabbitmq failed [%s]' % (msg)))
Ejemplo n.º 11
0
 def send_bttask_to_cas(self, rds_conn, data, url_hot):
     try:
         message = json.dumps(data)
         rds_conn.set_hot(message, url_hot)
         g_logger_info.info(
             trans2json("send to cas bt download task %s" % (data),
                        "qb_pull_send_cas"))
         gv.statsd_conn.incr(
             "thunder.querybroker.qbpull.send_to_cas_download", 1)
     except Exception:
         g_logger.error(
             trans2json("send cas redis bt task errors happend %s" %
                        str(traceback.format_exc())))
Ejemplo n.º 12
0
def upload_file(upload_path, file_path):
    ret = 0
    err = ''
    try:
        args = "%s/swift upload '%s' '%s' " % (
            gv.swith_path, upload_path, file_path)
        ret, _, err = popen(args)
        os.remove(file_path)
        g_logger.info(trans2json('upload file success'))
    except OSError:
        g_logger.error(trans2json(
            "delete or upload bt file %s  error %s" % (file_path, traceback.format_exc())))
    finally:
        return False if ret or err else True
Ejemplo n.º 13
0
    def buf_tasks(self, reqs):
        accs = self.accounts()
        backends = self.backends()
        for t in reqs:
            try:
                self.logger.info("receivce task from picker, task_uuid: %s, "
                                 "site_asset_id: %s" %
                                 (t.uuid, t.site_asset_id))
                self.logger.debug("receive task info:%s" % t._asdict())
                g_logger.info(
                    trans2json(message="site_asset_id:%s, "
                               "task_uuid:%s, external_id:%s" %
                               (t.site_asset_id, t.uuid, t.external_id),
                               action="receive picked task"))
                if not self.task_check(t, accs, backends):
                    self.reply(t)
                    continue
                acc = accs[t.account]._asdict()
                acc["backends"] = [v._asdict() for v in acc["backends"]]
                backs = {}
                for k, v in backends.iteritems():
                    backs[k] = v._asdict()
                self.logger.debug("add task's account: %s, backends: %s" %
                                  (acc, backs))

                ct = Task().query.delay(t._asdict(), acc, backs)
                self.taskm[ct.task_id]['celery_task'] = ct
                self.taskm[ct.task_id]['task'] = t
                self.tasks.add(ct)
                self.logger.info("add task to celery, task_uuid: %s, "
                                 "site_asset_id: %s, celery_uuid: %s " % \
                                 (t.uuid, t.site_asset_id, ct.task_id))
                g_logger.info(
                    trans2json(message="site_asset_id:%s, "
                               "task_uuid:%s, external_id:%s" %
                               (t.site_asset_id, t.uuid, t.external_id),
                               action="add task to celery"))

            except Exception, ex:
                self.reply(t)
                self.logger.error("catch exception from buf tasks, "
                                  "task_uuid: %s , site_asset_id: %s" %
                                  (t.uuid, t.site_asset_id),
                                  exc_info=True)
                continue
            try:
                db_txn(self.pool, partial(self.record, t))
            except Exception:
                self.logger.error("failed to record execution for task %s" %
                                  t.uuid)
Ejemplo n.º 14
0
def query_vddb_async(req_hash, data):

    g_logger.debug(trans2json("query vddb async by hash %s" % str(req_hash)))

    mysystem = mysystem(gv.mysystem_user, gv.mysystem_passwd,
                          gv.mysystem_url, False, MEDIA_REQ_TIMEOUT, g_logger)
    uuid = data['params']['external_id']
    ret, status_listing = mysystem.query(req_hash, uuid)

    working_cnt = 0
    copyrighted_cnt = 0
    uncopyrighted_cnt = 0
    status_cnt = len(status_listing)
    for status in status_listing:
        if status['status'] == STATUS_COPYRIGHTED:
            copyrighted_cnt += 1
        if status['status'] == STATUS_UNCOPYRIGHTED:
            uncopyrighted_cnt += 1
        if status['status'] == STATUS_WORKING:
            working_cnt += 1
    # all can not check
    if ret == STATUS_UNDETECTED:
        ret_code = 2
        return ret_code, status_listing
    if status_cnt > 0:
        if copyrighted_cnt == status_cnt or working_cnt == status_cnt or uncopyrighted_cnt == status_cnt:
            ret_code = 1
            return ret_code, status_listing
    return 4, None
Ejemplo n.º 15
0
def query_vddb_async(req_hash, data):

    g_logger.debug(trans2json("query vddb async by hash %s" % str(req_hash)))

    mysystem = mysystem(gv.mysystem_user, gv.mysystem_passwd, gv.mysystem_url,
                        False, MEDIA_REQ_TIMEOUT, g_logger)
    uuid = data['params']['external_id']
    ret, status_listing = mysystem.query(req_hash, uuid)

    working_cnt = 0
    copyrighted_cnt = 0
    uncopyrighted_cnt = 0
    status_cnt = len(status_listing)
    for status in status_listing:
        if status['status'] == STATUS_COPYRIGHTED:
            copyrighted_cnt += 1
        if status['status'] == STATUS_UNCOPYRIGHTED:
            uncopyrighted_cnt += 1
        if status['status'] == STATUS_WORKING:
            working_cnt += 1
    # all can not check
    if ret == STATUS_UNDETECTED:
        ret_code = 2
        return ret_code, status_listing
    if status_cnt > 0:
        if copyrighted_cnt == status_cnt or working_cnt == status_cnt or uncopyrighted_cnt == status_cnt:
            ret_code = 1
            return ret_code, status_listing
    return 4, None
Ejemplo n.º 16
0
 def task_finished(self, celery_id, query_res):
     try:
         t = self.taskm[celery_id]['task']
         self.logger.info("finished query, task_id:%s, "
                          "site_asset_id: %s, celery_id:%s, "
                          "ret: %s, err: %s " %
                          (t.uuid, t.site_asset_id, celery_id,
                           query_res.ret, query_res.err))
         self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out)
         self.logger.debug("finished task info: %s" % str(t))
         #parse query result
         g_logger.info(
             trans2json(message="site_asset_id:%s, "
                        "task_uuid:%s, external_id:%s " %
                        (t.site_asset_id, t.uuid, t.external_id),
                        action="task finished query from celery"))
         if not isinstance(query_res, TaskRes):  #means catch some exception
             self.cleaner.request((t, BAD_OUTPUT, None))
         else:
             _, state, res = self.parse_query_res(query_res)
             self.cleaner.request((t, state, res))
     except Exception, ex:
         self.cleaner.request((t, BAD_OUTPUT, None))
         self.logger.error("task finished catch unhandle exception, "
                           "task_uuid:%s" % t.uuid,
                           exc_info=True)
Ejemplo n.º 17
0
 def send_matches(self, task, matches=[], crr="", unrecognized=False):
     match_type = "no_match"
     if unrecognized:
         match_type = "unrecognized"
     elif len(matches):
         match_type = 'match'
     data = dict(id="null", jsonrpc="2.0",
                 method="matches",
                 params=dict(matches=matches,
                 site_asset_id=eval(task.site_asset_id), notification=crr,
                 match_type=match_type))
     params = dict(source="auto_match")
     req = None
     try:
         req = requests.post(self.matches_server, params=params,
                             data=json.dumps(data))
         if req.status_code != 200:
             self.logger.error("send matches failed, code:%s", req.status_code)
             raise SendMatchesError("send matches faild, task_id:%s" % task.uuid)
     except RequestException:
         self.logger.error("send matches failed, %s", task.uuid, exc_info=True)
         raise SendMatchesError("send matches faild")
     self.logger.info("send matches success, task_uuid:%s, site_asset_id:%s,"
                      "external_id:%s", task.uuid, task.site_asset_id,
                      task.external_id)
     g_logger.info(trans2json(message="task_uuid:%s, "
                   "site_asset_id:%s, external_id:%s " % \
                   (task.uuid, task.site_asset_id,
                    task.external_id), action="send matches success"))
Ejemplo n.º 18
0
 def load(self):
     '''
         :return code `None`, `dict`
             `None`: no such task
             `{}`: in progress
             `{"keys1":"value1"...}`:normal results
     '''
     cache = self.load_from_cache()
     if cache is None:
         if self.has_parent():
             self.prefix_search = True
         g_logger.info(
             trans2json(message="site_asset_id:%s, "
                        "prefix_search:%s" %
                        (self.site_asset_id, self.prefix_search),
                        action="no hit cache"))
         tids = self.get_tids()
         self.logger.debug("task_ids: %s" % tids)
         format_data = None
         if tids == []:
             format_data = wrap_error(NO_TASK_ERROR['code'],
                                      NO_TASK_ERROR['message'], [])
         else:
             format_data = self.format_matches(tids)
         self.save_cache(format_data)
         return format_data
     else:
         self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" %
                           (self.site_asset_id, cache))
         return cache
Ejemplo n.º 19
0
 def load(self):
     '''
         :return code `None`, `dict`
             `None`: no such task
             `{}`: in progress
             `{"keys1":"value1"...}`:normal results
     '''
     cache = self.load_from_cache()
     if cache is None:
         if self.has_parent():
             self.prefix_search = True
         g_logger.info(trans2json(message="site_asset_id:%s, "
                                  "prefix_search:%s" % (self.site_asset_id, self.prefix_search),
                                  action="no hit cache"))
         tids = self.get_tids()
         self.logger.debug("task_ids: %s" % tids)
         format_data = None
         if tids == []:
             format_data = wrap_error(NO_TASK_ERROR['code'],
                                      NO_TASK_ERROR['message'], [])
         else:
             format_data = self.format_matches(tids)
         self.save_cache(format_data)
         return format_data
     else:
         self.logger.debug("hit cache, site_asset_id:[%s], cache: %s" %
                          (self.site_asset_id, cache))
         return cache
Ejemplo n.º 20
0
    def finish(self, t, p, res):
        self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s',
                         t.uuid, t.site_asset_id)
        self.logger.debug("res:%s " % str(res))
        assert res.matches != None
        code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH
        if code == WITHOUT_MATCH:
            try:
                if db_txn(self.pool, partial(self.check_matches, t)):
                    code = WITH_MATCH
            except:
                pass

        tr = 'match' if code == WITH_MATCH else 'no_match'
        self.logger.debug('record finished task %s, site_asset_id: %s', t.uuid,
                          t.site_asset_id)
        try:
            ms = self.filter_matches(res.matches)
            for m in ms:
                g_logger.info(
                    trans2json(
                        message="company_id:%s, "
                        "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" %
                        (t.account, m['meta_uuid'], m['instance_id'],
                         m['company_id']),
                        action='matches info'))
            mc = len(ms)
            #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr)
            #m.save()
            self.send_matches(t, ms, res.crr)
            task_status = db_txn(self.pool,
                                 partial(self.load_task_status, t.uuid))
            self.update_hbase_task(task_status)
            db_txn(self.pool, partial(self.update_task, t, code, mc, tr))
        except:
            self.logger.error('failed to finish task: %s, site_asset_id: %s' %
                              (t.uuid, t.site_asset_id),
                              exc_info=True)
            # dooming may succeed, as it touches fewer tables
            self.doom(t, INTERNAL, p, res)
            return
        g_logger.info(
            trans2json(message="site_asset_id:%s, "
                       "task_uuid:%s, external_id:%s" %
                       (t.site_asset_id, t.uuid, t.external_id),
                       action="task query complete"))
        stats.incr(QUERY_SUCCESS, 1)
Ejemplo n.º 21
0
def upload_file(upload_path, file_path):
    ret = -1
    err = ''
    try:
        args = "%s/swift upload '%s' '%s' " % (
            gv.swith_path, upload_path, file_path)
        ret, _, err = popen(args)
        if ret == 0:
            g_logger.info(trans2json('upload file success'))
            os.remove(file_path)
        else:
            g_logger.error(trans2json('upload file failed, cmd is %s, reason is %s' % (args, err)))
    except:
        g_logger.error(trans2json(
            "delete or upload bt file %s error %s" % (file_path, traceback.format_exc())))
    finally:
        return False if ret else True
Ejemplo n.º 22
0
def query_vddb_async(req_hash, data):
    g_logger.debug(trans2json("query vddb async by hash %s" % str(req_hash)))
    
    mysystem = mysystem(gv.mysystem_user, gv.mysystem_passwd,
                          gv.mysystem_url, False,3, g_logger)
    uuid = data['params']['external_id']
    ret, status_listing = mysystem.query(req_hash, uuid)
    return ret, status_listing
    '''
Ejemplo n.º 23
0
def download_file(swift_path, download_path):
    ret = 0
    err = ''
    try:
        swift_path_list = swift_path.split('/')
        container = swift_path_list[0]
        far_name = swift_path_list[-1]
        swift_name = swift_path[len(container) + 1:len(swift_path)]
        download_far_name = os.path.join(download_path, far_name)
        args = "%s/swift download %s  %s -o %s " % (
            gv.swith_path, container, swift_name, download_far_name)
        ret, _, err = popen(args)
        g_logger.info(trans2json('download file success: %s' % args))
    except Exception:
        g_logger.error(trans2json("download file error%s") %
                       traceback.format_exc())
    finally:
        return False if ret or err else True, download_far_name
Ejemplo n.º 24
0
def run(queue):
    pro = producer()
    push = pusher(pro)
    while True:
        t = queue.get(block=True)
        logger.info('get a task to push, task_id: %s', t)
        g_logger.info(trans2json(message ='get a task to push, task_id: %s'%t,
            atcion= 'get task'))
        task = getTask(t)
        logger.info('------task:%s', task)
        try:
            re = push.getMatch(t)
            #logger.info('type:%s, %s', type(re), re)
            push.pushResult(re)
            updateFinished(t, 'success')
            dropUnpush(t)
            logger.info('succeed to push the match result , task_id: %s,'
                    'result: %s', t, re)
            g_logger.info(trans2json(message='succeed to push the match result ,'
                'external_id :%s, task_id: %s'
                %(task['i:external_id'], t, ), action='push result'))
        except resultError:
            logger.error("failed to get the result , task_id: %s", t)
            g_logger.error(trans2json(message ='failed to push result,'
                'external_id: %s, task_id: %s'
                %(task['i:external_id'], t), action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except AssertionError:
            logger.error("failed to get matches, task_id: %s", t)
            g_logger.error(trans2json(message ='failed to push result,'
                'external_id: %s, task_id: %s'
                %(task['i:external_id'], t), action='push result'))
            dropUnpush(t)
            updateFinished(t, 'failed')
        except :
            logger.error('failed to push result, rest status to new,'
                    'task_id: %s,  Error:', t,  exc_info=True)
            g_logger.error(trans2json(message ='failed to push result, res status'
                'to new, external_id: %s, task_id: %s '
                %(task['i:external_id'], t), action = 'retry to push'))
            changeStatus(t, 'new')
            logger.error("reset status to new , task_id: %s", t)
Ejemplo n.º 25
0
 def POST(self):
     try:
         stats.incr(RESULT_INSERT, 1)
         web.header("Content-Type", "application/json")
         res = web.data()
         req = web.input()
         self.logger.info('input:%s', req)
         self.source = req.get("source", "")
         res = json.loads(res)
         self.logger.debug('get input :%s', req)
         self.logger.debug('get message :%s', res)
         g_logger.info(
             trans2json(message='get message ,input: %s, '
                        'msg: %s' % (req, res),
                        action='get resquest post'))
         if not self.checkParams(res):
             self.error_code = PARAMS_ERROR["code"]
             self.error_msg = PARAMS_ERROR["message"]
             self.error_data.append("site_asset_id")
             raise web.BadRequest(
                 wrap_error(self.error_code, self.error_msg,
                            self.error_data))
         ins = insert(self.site_asset_id, self.source, self.match_type,
                      self.matches, self.extra_info, self.crr, self.url,
                      self.parent_info)
         tid, src = ins.store_result()
         g_logger.info(
             trans2json(message='reply to caller, task_id:%s,'
                        'source:%s' % (tid, src),
                        action='reply to caller'))
         stats.incr(RESULT_INSERT_SUCCESS, 1)
         if self.source == 'init':
             return init_result(tid, src)
         else:
             return insert_result()
     except Exception:
         stats.incr(RESULT_INSERT_FAILED, 1)
         self.logger.error("va-interface catch unhandle exception",
                           exc_info=True)
         self.error_code = INTERNAL_ERROR["code"]
         self.error_msg = INTERNAL_ERROR["message"]
         raise web.internalerror(message=wrap_error(
             self.error_code, self.error_msg, self.error_data))
Ejemplo n.º 26
0
    def buf_tasks(self, reqs):
        accs = self.accounts()
        backends = self.backends()
        for t in reqs:
            try:
                self.logger.info("receivce task from picker, task_uuid: %s, "
                                 "site_asset_id: %s" % (t.uuid, t.site_asset_id))
                self.logger.debug("receive task info:%s" % t._asdict())
                g_logger.info(trans2json(message="site_asset_id:%s, "
                    "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid, t.external_id),
                    action="receive picked task"))
                if not self.task_check(t, accs, backends):
                    self.reply(t)
                    continue
                acc = accs[t.account]._asdict()
                acc["backends"] = [v._asdict() for v in acc["backends"]]
                backs = {}
                for k, v in backends.iteritems():
                    backs[k] = v._asdict()
                self.logger.debug("add task's account: %s, backends: %s" % (acc, backs))

                ct = Task().query.delay(t._asdict(), acc, backs)
                self.taskm[ct.task_id]['celery_task'] = ct
                self.taskm[ct.task_id]['task'] = t
                self.tasks.add(ct)
                self.logger.info("add task to celery, task_uuid: %s, "
                                 "site_asset_id: %s, celery_uuid: %s " % \
                                 (t.uuid, t.site_asset_id, ct.task_id))
                g_logger.info(trans2json(message="site_asset_id:%s, "
                              "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid,
                              t.external_id), action="add task to celery"))

            except Exception, ex:
                self.reply(t)
                self.logger.error("catch exception from buf tasks, "
                                  "task_uuid: %s , site_asset_id: %s" % (t.uuid,
                                  t.site_asset_id), exc_info=True)
                continue
            try:
                db_txn(self.pool, partial(self.record, t))
            except Exception:
                self.logger.error("failed to record execution for task %s" % t.uuid)
Ejemplo n.º 27
0
 def process_task(self, body, message):
     try:
         data = json.loads(body)
         utils.digest = data['params']['digest']
         g_logger_info.info(
             trans2json("receive CAS finsh  message %s" % (data),
                        "qb_pull_receive_cas"))
         gv.statsd_conn.incr(
             "thunder.querybroker.qbpull.receive_cas_finish_message", 1)
         result = self.check_input_params(data)
         if result[0] != 0:
             error_message = self.trans_error_json(result, data)
             message.ack()
             g_logger.error(trans2json("response info %s" % error_message))
             return
         else:
             # check error code
             error_code = int(data['params']['error_code'])
             # download correct
             if error_code == gv.DOWNLOAD_SUCCESS:
                 # write to redis url/bt hash and dna  hash
                 #writeHashToRedis(data)
                 # get query hash result
                 if data['params'].has_key('files'):
                     self.parse_and_send_vddbmessage(data)
                     message.ack()
                     return
                 else:
                     self.parse_and_send_casmessage(data)
                     message.ack()
                     return
             else:
                 # download error
                 if error_code in gv.UNRECOGNIZED_ERROR_LIST:
                     post_to_vddbdnaerror(data, error_code, '')
                 message.ack()
                 return
     except Exception:
         message.ack()
         g_logger.error(
             trans2json("worker_query errors happend %s" %
                        str(traceback.format_exc())))
Ejemplo n.º 28
0
def upload_file(upload_path, file_path):
    ret = -1
    err = ''
    try:
        args = "%s/swift upload '%s' '%s' " % (gv.swith_path, upload_path,
                                               file_path)
        ret, _, err = popen(args)
        if ret == 0:
            g_logger.info(trans2json('upload file success'))
            os.remove(file_path)
        else:
            g_logger.error(
                trans2json('upload file failed, cmd is %s, reason is %s' %
                           (args, err)))
    except:
        g_logger.error(
            trans2json("delete or upload bt file %s error %s" %
                       (file_path, traceback.format_exc())))
    finally:
        return False if ret else True
Ejemplo n.º 29
0
 def pick(self, t):
     self.picked[t.account].add(t.id)
     if t.account in self.fetching:
         self.fetching[t.account].add(t.id)
     self.manager.request(t)
     self.logger.info('picked task %s for account %s, site_asset_id %s',
                      t.uuid, t.account, t.site_asset_id)
     g_logger.info(trans2json(action="picked task to query",
                              message="task_uuid: %s, site_asset_id: %s, external_id: %s" % \
                              (t.uuid, t.site_asset_id, t.external_id)))
     stats.incr(PICKED, 1)
Ejemplo n.º 30
0
 def send_task_pushresult(self, data):
     message = json.dumps(data)
     g_logger_info.info(trans2json("send to push result message %s" % (data),"qb_push_push_result"))
     gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbresultpush", 1)
     with producers[self.pushresult_connection].acquire(block=True) as producer:
         producer.publish(message,
                          serializer='json',
                          compression='bzip2',
                          exchange=self.pushresult_exchange,
                          declare=[self.pushresult_exchange],
                          routing_key=gv.pushresult_routing_key)
Ejemplo n.º 31
0
def post_to_vddbdnaerror(data, code, dna_hash):
    files_size_len = 0
    if data['params'].has_key('files'):
        files_size_len = len(data['params']['files'])
    message = {}
    message['jsonrpc'] = '2.0'
    message['method'] = 'insert'
    message['id'] = 'null'
    message['params'] = {}
    message['params']['site_asset_id'] = []
    if code in gv.UNRECOGNIZED_ERROR_LIST:
        message['params']['match_type'] = 'unrecognized'
    elif code in gv.NOMATCH_ERROR_LIST:
        message['params']['match_type'] = 'no_match'
    set_parent_info(data, message)
    if (data['params'].has_key('seed_file')
            and files_size_len > 1) or files_size_len > 1:
        if data['params'].has_key('seed_file'):
            if data['params']['seed_file']['hash'] != None and data['params'][
                    'seed_file']['hash'] != '':
                message['params']['site_asset_id'].append(
                    data['params']['seed_file']['hash'] + '-' + dna_hash)
        if data['params'].has_key('url'):
            if data['params']['url']['hash'] != None and data['params']['url'][
                    'hash'] != '':
                message['params']['site_asset_id'].append(
                    data['params']['url']['hash'] + '-' + dna_hash)
        if data['params']['thunder_hash'] != None and data['params'][
                'thunder_hash'] != '':
            message['params']['site_asset_id'].append(
                data['params']['thunder_hash'] + '-' + dna_hash)
    else:
        if data['params'].has_key('seed_file'):
            if data['params']['seed_file']['hash'] != None and data['params'][
                    'seed_file']['hash'] != '':
                message['params']['site_asset_id'].append(
                    data['params']['seed_file']['hash'])
        if data['params'].has_key('url'):
            if data['params']['url']['hash'] != None and data['params']['url'][
                    'hash'] != '':
                message['params']['site_asset_id'].append(
                    data['params']['url']['hash'])
        if data['params']['thunder_hash'] != None and data['params'][
                'thunder_hash'] != '':
            message['params']['site_asset_id'].append(
                data['params']['thunder_hash'])
    if dna_hash != '':
        message['params']['site_asset_id'].append(dna_hash)
    header = {"Content-Type": "application/json"}
    conn = httplib.HTTPConnection(gv.mysystem_host, int(gv.mysystem_port))
    g_logger_info.info(trans2json("post %s  to  vddb-async matches" % message))
    conn.request('POST', "/vddb-async/matches?source=auto_match",
                 json.dumps(message), header)
Ejemplo n.º 32
0
def main():
    args = docopt.docopt(__doc__, version=gv.version)
    cfg_file = get_conf_abspath(args)
    check_conf_validation(cfg_file)
    cfg = parse_conf_file(cfg_file)
    init_logger(cfg)
    get_global_vars(cfg)
    gv.dp.start()
    init_statsd()
    while True:
        with Connection(gv.qb_url) as conn:
            try:
                worker = Worker(conn, gv.qb_exchange, gv.qb_queue,
                                gv.qb_routing_key)
                g_logger.info(trans2json('query_broker qb_push service start'))
                worker.run()
            except Exception:
                g_logger.error(
                    trans2json("qb_push %s happend!" %
                               str(traceback.format_exc())))
    gv.dp.join()
Ejemplo n.º 33
0
    def GET(self):
        try:
            stats.incr(RECEIVE_REQUEST, 1)
            web.header("Content-Type", "application/json")
            req = web.input()
            self.site_asset_id = str(req.get('site_asset_id', ""))
            if req.get('all_matches', 'true').lower() == 'false':
                self.all_matches = False
            if self.site_asset_id == "":
                self.error_code = PARAMS_ERROR["code"]
                self.error_msg = PARAMS_ERROR["message"]
                self.error_data.append("site_asset_id")
                raise web.BadRequest(
                    wrap_error(self.error_code, self.error_msg,
                               self.error_data))

            self.logger.info(
                "get history matches, site_asset_id: %s, "
                "all_matches: %s", self.site_asset_id, self.all_matches)
            g_logger.info(trans2json(message="site_asset_id: %s, "
                                     "all_matches: %s" % \
                                    (self.site_asset_id, self.all_matches),
                                     action="get history matches"))
            mr = match_result(self.site_asset_id, self.all_matches)
            res = mr.load()
            self.logger.debug(
                "site_asset_id: %s,"
                "all_matches: %s, history matches: %s", self.site_asset_id,
                self.all_matches, res)
            if res == None:
                g_logger.info(
                    trans2json(message="site_asset_id:%s, "
                               "all_matches: %s" %
                               (self.site_asset_id, self.all_matches),
                               action="no such task"))
            return res
        except web.BadRequest, e:
            stats.incr(REQUEST_ERROR, 1)
            self.logger.error("site_asset_id is null")
            raise e
Ejemplo n.º 34
0
 def check_input_params(self, data):
     method = "finish_task"
     result = [0, 0]
     if not data.has_key('jsonrpc'):
         result[0] = JSONRPC_ERROR
         result[1] = "There is no key named jsonrpc"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data.has_key('method') or data['method'] != method:
         result[0] = METHOD_ERROR
         result[
             1] = "There is no key named method or method is not " + method
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data.has_key('params'):
         result[0] = PARAMS_ERROR
         result[1] = "There is no key named params"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params']['additional_info'].has_key(
             'client_id'
     ) or data['params']['additional_info']['client_id'] == '':
         result[0] = CLIENT_ID_ERROR
         result[
             1] = "There is no key named client_id in params or client_id is null "
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params'].has_key('error_code'):
         result[0] = ERROR_CODE_ERROR
         result[1] = "There is no key named error_code"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     else:
         g_logger.info(trans2json("----Params check done.----"))
     return result
Ejemplo n.º 35
0
    def finish(self, t, p, res):
        self.logger.info('to finish task, task_uuid:%s, site_asset_id:%s',
                         t.uuid, t.site_asset_id)
        self.logger.debug("res:%s " % str(res))
        assert res.matches != None
        code = WITHOUT_MATCH if len(res.matches) == 0 else WITH_MATCH
        if code == WITHOUT_MATCH:
            try:
                if db_txn(self.pool, partial(self.check_matches, t)):
                    code = WITH_MATCH
            except:
                pass

        tr = 'match' if code == WITH_MATCH else 'no_match'
        self.logger.debug('record finished task %s, site_asset_id: %s',
                          t.uuid, t.site_asset_id)
        try:
            ms = self.filter_matches(res.matches)
            for m in ms:
                g_logger.info(trans2json(message="company_id:%s, "
                "meta_uuid:%s, instance_uuid:%s, vddb_company_id:%s" % (t.account, m['meta_uuid'],
                              m['instance_id'], m['company_id']),
                              action='matches info'))
            mc = len(ms)
            #m = match_saver(self.hbase_pool, self.redis_conn, task_status, ms, res.crr)
            #m.save()
            self.send_matches(t, ms, res.crr)
            task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid))
            self.update_hbase_task(task_status)
            db_txn(self.pool, partial(self.update_task, t, code, mc, tr))
        except:
            self.logger.error('failed to finish task: %s, site_asset_id: %s' %
                              (t.uuid, t.site_asset_id), exc_info=True)
            # dooming may succeed, as it touches fewer tables
            self.doom(t, INTERNAL, p, res)
            return
        g_logger.info(trans2json(message="site_asset_id:%s, "
                      "task_uuid:%s, external_id:%s" % (t.site_asset_id, t.uuid,
                      t.external_id), action="task query complete"))
        stats.incr(QUERY_SUCCESS, 1)
Ejemplo n.º 36
0
    def store_result(self):
        task_id = None
        source = None
        res = self.get_sid_info()
        if res:
            source = res['t:source']
            self.logger.info('start update res:%s', res)
            task_id = res['t:task_uuid']
            self.logger.info('task_id:%s', task_id)
            if self.source == 'manual'\
                    or self.source == 'auto_match' \
                    or self.source == 'init'\
                    or (res['t:source'] == 'manual_tmp'and self.source
                            =='manual_tmp')\
                    or (self.source == 'manual_tmp' and res['t:source'] ==\
                            'auto_match' and not self.check_finished(task_id)):
                self.logger.info('source:%s, type:%s, check_finished:%s',
                                 self.source, type(self.source),
                                 self.check_finished(task_id))
                self.update_result(self.source, task_id, self.match_type,
                                   self.matches, self.extra_info, self.url,
                                   self.site_asset_id)
                if not self.has_parent(self.site_asset_id):
                    self.save_redis(task_id, self.source, self.match_type)
            g_logger.info(
                trans2json(message='succeed to update result,'
                           'task_id:%s, match_type:%s, source:%s' %
                           (task_id, self.source, self.match_type),
                           atcion='update result'))
        else:
            self.logger.info('start to store new task')
            task_id = str(uuid.uuid1())
            self.store_sid_tid(self.site_asset_id, task_id, self.source)
            self.store_tid_sid(task_id, self.site_asset_id)
            self.save_matches(self.match_type, task_id, self.matches)
            self.store_task_info(task_id, self.extra_info, self.url)
            self.logger.info('succeed to store task: %s, task_id %s',
                             self.site_asset_id, task_id)

            #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'})
            #storeFinished('finished', )
            #self.store_unpush(task_id, 'new')
            self.save_redis(task_id, self.source, self.match_type)
        #if self.source !='init' and  self.source != 'manual_tmp':
        #    self.store_unpush(task_id, 'new')
        if self.parent_info:
            for i in self.parent_info:
                for k, v in i.items():
                    redis_conn.delete(k + "-s")
                    self.store_parent_task(k, v)
        self.record_result_statsd()
        return task_id, source
Ejemplo n.º 37
0
 def send_task_priority_escalator(self, data):
     data['params']['downloader_time'] = 0
     data['params']['downloader_retry'] = 0
     message = json.dumps(data)
     g_logger_info.info(trans2json("send to send task priority escalator message %s" % (data),"qb_push_send_priority"))
     gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbpriority", 1)
     with producers[self.taskpriority_connection].acquire(block=True) as producer:
         producer.publish(message,
                          serializer='json',
                          compression='bzip2',
                          exchange=self.taskpriority_exchange,
                          declare=[self.taskpriority_exchange],
                          routing_key=gv.taskpriorit_routing_key)
Ejemplo n.º 38
0
 def process_task(self, body, message):
     try:
         data = body
         #data = json.loads(body)
         utils.digest = data['params']['digest']
         g_logger_info.info(trans2json("receive gateway task message %s" % (body),'qb_push_receive_gateway'))
         #g_logger_info.info(trans2json("task_uuid:%s"%data['params']['external_id']))
         gv.statsd_conn.incr("thunder.querybroker.qbpush.receive_gateway_message", 1)
         result = self.check_input_params(data)
         if result[0] != 0:
             error_message = self.trans_error_json(result, data)
             g_logger.error(trans2json("response info %s" % error_message))
         else:
             ret_code, result = query_hash(data)
             if ret_code is None:
                 self.send_task_priority_escalator(data)
         message.ack()
         return
     except Exception:
         g_logger.error(
             trans2json("process_task errors happend %s" % str(traceback.format_exc())))
         message.ack()
Ejemplo n.º 39
0
 def process(self, config, body):
     #db_txn(pool, partial(store), data, cre)
     self.logger.info('start to process message')
     task_id = None
     task = defaultdict(list)
     try :
         #self.process(config, task)
         req =requests.post(config['matches_server']+'?source=init',
                 data= json.dumps(body))
         self.logger.info('get request :%s, type:%s', req.content,
                 type(req.content))
         res = json.loads(req.content)
         task_id = res['result']['task_id']
         parseParams(body, task)
         if res['result']['source'] == 'auto_match':
             db_txn(pool, partial(updateStatus), task_id,
                     task['site_asset_id'], task['external_id'])
             self.logger.info('this task has already been in hbase reset'
                 ' status to new , site_asset_ids : %s',task['site_asset_id'])
             g_logger.info(trans2json(message='task has already been in'
                 ' hbase, external_id:%s, site_asset_id:%s, task_id:%s'%
                 (task['external_id'], task['site_asset_id'], task_id, ),
                 action = "reset status to new"))
         else:
             genTask(task)
             task['task_uuid'] = task_id
             db_txn(pool, partial(storeTaskMysql), task)
             g_logger.info(trans2json(message="succeed to store task external_id:%s,"
              " site_asset_id:%s, task_id:%s"%(task['external_id'], task['site_asset_id'],
             task['task_uuid']), action = 'store task to db'))
     except :
         self.logger.info('failed to store task, start to retry, task_uuid: %s'
                 ' site_asset_id: %s',
                 task_id, task['site_asset_id'], exc_info=True )
         g_logger.error(trans2json(message='failed to store task, start to'
             ' retry ,external_id:%s, site_asset_id: %s, task_id: %s'
                 %(task['external_id'], task['site_asset_id'], task_id),
                 action='store task to db'))
         raise sotreError
Ejemplo n.º 40
0
 def send_task_pushresult(self, data):
     message = json.dumps(data)
     g_logger_info.info(
         trans2json("send to push result message %s" % (data),
                    "qb_push_push_result"))
     gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbresultpush", 1)
     with producers[self.pushresult_connection].acquire(
             block=True) as producer:
         producer.publish(message,
                          serializer='json',
                          compression='bzip2',
                          exchange=self.pushresult_exchange,
                          declare=[self.pushresult_exchange],
                          routing_key=gv.pushresult_routing_key)
Ejemplo n.º 41
0
 def do_doom(self, t, code, queue_at, deadline):
     logger = logging.getLogger('mwtm_cleaner')
     if queue_at != None and (deadline == None or deadline == 0 or \
                              deadline > queue_at):
         logger.debug('to retry task %s, queue at %s', t.uuid, queue_at)
         yield db_execute(RETRY_TASK, queue_at, code, t.id)
         #yield db_execute(RENEW_EVENT, t.uuid, 'retry')
         g_logger.info(trans2json(message="task_uuid:%s, "
                       "site_asset_id:%s, deadline:%s, external_id:%s " % (t.uuid,
                       t.site_asset_id, deadline, t.external_id),
                       action="retry task"))
     else:
         logger.debug('to fail task %s', t.uuid)
         g_logger.info(trans2json(message="task_uuid:%s, "
                       "site_asset_id:%s, external_id:%s" % (t.uuid,
                       t.site_asset_id, t.external_id), action="to fail task"))
         rc, _ = yield db_query(CHECK_TASK, t.id)
         if rc <= 0:
             yield db_execute(FAIL_TASK, code, t.id)
             self.send_matches(t, unrecognized=True)
             task_status = db_txn(self.pool, partial(self.load_task_status, t.uuid))
             self.update_hbase_task(task_status)
         stats.incr(QUERY_FAILED, 1)
def main():
    args = docopt.docopt(__doc__, version=gv.version)
    cfg_file = get_conf_abspath(args)
    check_conf_validation(cfg_file)
    cfg = parse_conf_file(cfg_file)
    init_logger(cfg)
    get_global_vars(cfg)
    init_statsd()
    gv.dp.start()
    thread_tasker = fetch_query_result()
    thread_tasker.start()
    while True:
        with Connection(gv.finsh_url) as conn:
            try:
                worker = Worker_query(conn)
                g_logger.info(
                    trans2json("start exceptionhandle and query service"))
                worker.run()
            except Exception:
                g_logger.error(
                    trans2json("error happend! %s" % str(traceback.format_exc())))
    thread_tasker.join()
    gv.dp.join()
Ejemplo n.º 43
0
 def send_task_to_vddb(self, data):
     message = json.dumps(data)
     #message = data
     g_logger_info.info(
         trans2json("send to vddb async query %s" % (data),
                    "qb_pull_send_vddb"))
     gv.statsd_conn.incr("thunder.querybroker.qbpull.send_to_vddb_query", 1)
     with producers[self.vddb_connection].acquire(block=True) as producer:
         producer.publish(message,
                          serializer='json',
                          compression='bzip2',
                          exchange=self.vddb_exchange,
                          declare=[self.vddb_exchange],
                          routing_key=gv.vddb_queryrouting_key)
Ejemplo n.º 44
0
 def POST(self):
     try:
         stats.incr(RESULT_INSERT, 1)
         web.header("Content-Type", "application/json")
         res = web.data()
         req = web.input()
         self.logger.info('input:%s', req)
         self.source = req.get("source","")
         res = json.loads(res)
         self.logger.debug('get input :%s', req)
         self.logger.debug('get message :%s', res)
         g_logger.info(trans2json(message='get message ,input: %s, '
             'msg: %s'%(req, res), action='get resquest post'))
         if not self.checkParams(res):
             self.error_code = PARAMS_ERROR["code"]
             self.error_msg = PARAMS_ERROR["message"]
             self.error_data.append("site_asset_id")
             raise web.BadRequest(wrap_error(self.error_code, self.error_msg,
                               self.error_data))
         ins = insert(self.site_asset_id, self.source, self.match_type,
                 self.matches, self.extra_info, self.crr, self.url, self.parent_info)
         tid, src = ins.store_result()
         g_logger.info(trans2json(message='reply to caller, task_id:%s,'
         'source:%s'%(tid, src), action='reply to caller'))
         stats.incr(RESULT_INSERT_SUCCESS, 1)
         if self.source == 'init':
             return init_result(tid, src)
         else:
             return insert_result()
     except Exception:
         stats.incr(RESULT_INSERT_FAILED, 1)
         self.logger.error("va-interface catch unhandle exception", exc_info=True)
         self.error_code = INTERNAL_ERROR["code"]
         self.error_msg = INTERNAL_ERROR["message"]
         raise web.internalerror(message=wrap_error(self.error_code,
                                                 self.error_msg,
                                                 self.error_data))
Ejemplo n.º 45
0
    def GET(self):
        try:
            stats.incr(RECEIVE_REQUEST, 1)
            web.header("Content-Type", "application/json")
            req = web.input()
            self.site_asset_id = str(req.get('site_asset_id', ""))
            if req.get('all_matches', 'true').lower() == 'false':
                self.all_matches = False
            if self.site_asset_id == "":
                self.error_code = PARAMS_ERROR["code"]
                self.error_msg = PARAMS_ERROR["message"]
                self.error_data.append("site_asset_id")
                raise web.BadRequest(wrap_error(self.error_code, self.error_msg,
                                  self.error_data))

            self.logger.info("get history matches, site_asset_id: %s, "
                             "all_matches: %s",
                             self.site_asset_id, self.all_matches)
            g_logger.info(trans2json(message="site_asset_id: %s, "
                                     "all_matches: %s" % \
                                    (self.site_asset_id, self.all_matches),
                                     action="get history matches"))
            mr = match_result(self.site_asset_id, self.all_matches)
            res = mr.load()
            self.logger.debug("site_asset_id: %s,"
                              "all_matches: %s, history matches: %s",
                              self.site_asset_id, self.all_matches, res)
            if res == None:
                g_logger.info(trans2json(message="site_asset_id:%s, "
                                         "all_matches: %s" % (self.site_asset_id,
                                         self.all_matches), action="no such task"))
            return res
        except web.BadRequest, e:
            stats.incr(REQUEST_ERROR, 1)
            self.logger.error("site_asset_id is null")
            raise e
def main():
    args = docopt.docopt(__doc__, version=gv.version)
    cfg_file = get_conf_abspath(args)
    check_conf_validation(cfg_file)
    cfg = parse_conf_file(cfg_file)
    init_logger(cfg)
    get_global_vars(cfg)
    init_statsd()
    gv.dp.start()
    thread_tasker = fetch_query_result()
    thread_tasker.start()
    while True:
        with Connection(gv.finsh_url) as conn:
            try:
                worker = Worker_query(conn)
                g_logger.info(
                    trans2json("start exceptionhandle and query service"))
                worker.run()
            except Exception:
                g_logger.error(
                    trans2json("error happend! %s" %
                               str(traceback.format_exc())))
    thread_tasker.join()
    gv.dp.join()
Ejemplo n.º 47
0
    def store_result(self):
        task_id = None
        source = None
        res = self.get_sid_info()
        if res:
            source = res['t:source']
            self.logger.info('start update res:%s', res)
            task_id = res['t:task_uuid']
            self.logger.info('task_id:%s', task_id)
            if self.source == 'manual'\
                    or self.source == 'auto_match' \
                    or self.source == 'init'\
                    or (res['t:source'] == 'manual_tmp'and self.source
                            =='manual_tmp')\
                    or (self.source == 'manual_tmp' and res['t:source'] ==\
                            'auto_match' and not self.check_finished(task_id)):
                self.logger.info('source:%s, type:%s, check_finished:%s', self.source,
                            type(self.source),  self.check_finished(task_id))
                self.update_result(self.source, task_id, self.match_type, self.matches,
                                self.extra_info, self.url, self.site_asset_id)
                if not self.has_parent(self.site_asset_id):
                    self.save_redis(task_id, self.source, self.match_type)
            g_logger.info(trans2json(message='succeed to update result,'
                'task_id:%s, match_type:%s, source:%s'%(task_id,
                    self.source, self.match_type), atcion='update result'))
        else:
            self.logger.info('start to store new task')
            task_id = str(uuid.uuid1())
            self.store_sid_tid(self.site_asset_id, task_id, self.source)
            self.store_tid_sid(task_id, self.site_asset_id)
            self.save_matches(self.match_type, task_id, self.matches)
            self.store_task_info(task_id, self.extra_info, self.url)
            self.logger.info('succeed to store task: %s, task_id %s',
                    self.site_asset_id, task_id)

            #storeHbasempl('unpush', str(task_d), {'u:match_type':'new'})
            #storeFinished('finished', )
        #self.store_unpush(task_id, 'new')
            self.save_redis(task_id, self.source, self.match_type)
        #if self.source !='init' and  self.source != 'manual_tmp':
        #    self.store_unpush(task_id, 'new')
        if self.parent_info:
            for i in self.parent_info:
                for k, v in i.items():
                    redis_conn.delete(k+"-s")
                    self.store_parent_task(k, v)
        self.record_result_statsd()
        return task_id, source
Ejemplo n.º 48
0
 def send_task_priority_escalator(self, data):
     data['params']['downloader_time'] = 0
     data['params']['downloader_retry'] = 0
     message = json.dumps(data)
     g_logger_info.info(
         trans2json(
             "send to send task priority escalator message %s" % (data),
             "qb_push_send_priority"))
     gv.statsd_conn.incr("thunder.querybroker.qbpush.send_qbpriority", 1)
     with producers[self.taskpriority_connection].acquire(
             block=True) as producer:
         producer.publish(message,
                          serializer='json',
                          compression='bzip2',
                          exchange=self.taskpriority_exchange,
                          declare=[self.taskpriority_exchange],
                          routing_key=gv.taskpriorit_routing_key)
Ejemplo n.º 49
0
 def check_input_params(self, data):
     method = "submit_task"
     result = [0, 0]
     if not data.has_key('jsonrpc'):
         result[0] = 121201
         result[1] = "There is no key named jsonrpc"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data.has_key('method') or data['method'] != method:
         result[0] = 121202
         result[
             1] = "There is no key named method or method is not " + method
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data.has_key('params'):
         result[0] = 121203
         result[1] = "There is no key named params"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params'].has_key('additional_info'):
         result[0] = 121204
         result[
             1] = "There is no key named client_id in params or client_id is null "
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params'].has_key(
             'external_id') or data['params']['external_id'] == '':
         result[0] = 121210
         result[
             1] = "There is no key named external_id or external_id is null"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params']['thunder_hash']:
         if data['params'].has_key('url'):
             if not data['params']['url']['hash']:
                 if data['params'].has_key('seed_file'):
                     if not data['params']['seed_file']['hash']:
                         result[0] = 121211
                         result[1] = "There is no hash in params"
                         g_logger.error(
                             trans2json("input params check failed: %s" %
                                        result[1]))
     else:
         g_logger.debug(trans2json("----Params check done.----"))
     return result
Ejemplo n.º 50
0
def post_to_vddbdnaerror(data, code, dna_hash):
    files_size_len = 0
    if data['params'].has_key('files'):
        files_size_len = len(data['params']['files'])
    message = {}
    message['jsonrpc'] = '2.0'
    message['method'] = 'insert'
    message['id'] = 'null'
    message['params'] = {}
    message['params']['site_asset_id'] = []
    if code in gv.UNRECOGNIZED_ERROR_LIST:
        message['params']['match_type'] = 'unrecognized'
    elif code in gv.NOMATCH_ERROR_LIST:
         message['params']['match_type'] = 'no_match'
    set_parent_info(data,message)
    if (data['params'].has_key('seed_file')  and files_size_len >1) or files_size_len > 1:
        if data['params'].has_key('seed_file'):
            if data['params']['seed_file']['hash'] != None and data['params']['seed_file']['hash'] != '':
                message['params']['site_asset_id'].append(data['params']['seed_file']['hash'] + '-'+dna_hash)
        if data['params'].has_key('url'):
            if data['params']['url']['hash'] != None and data['params']['url']['hash'] != '':
                message['params']['site_asset_id'].append(data['params']['url']['hash'] + '-' + dna_hash)
        if data['params']['thunder_hash'] != None and data['params']['thunder_hash'] != '':
            message['params']['site_asset_id'].append(data['params']['thunder_hash']+'-'+dna_hash)
    else:
        if data['params'].has_key('seed_file'):
            if data['params']['seed_file']['hash'] != None and data['params']['seed_file']['hash'] != '':
                message['params']['site_asset_id'].append(data['params']['seed_file']['hash'])
        if data['params'].has_key('url'):
            if data['params']['url']['hash'] != None and data['params']['url']['hash'] != '':
                message['params']['site_asset_id'].append(data['params']['url']['hash'])
        if data['params']['thunder_hash'] != None and data['params']['thunder_hash'] != '':
            message['params']['site_asset_id'].append(data['params']['thunder_hash'])
    if dna_hash != '':
        message['params']['site_asset_id'].append(dna_hash)
    header = {"Content-Type": "application/json"}
    conn = httplib.HTTPConnection(gv.mysystem_host, int(gv.mysystem_port))
    g_logger_info.info(trans2json("post %s  to  vddb-async matches" % message))
    conn.request('POST', "/vddb-async/matches?source=auto_match", json.dumps(message), header)
Ejemplo n.º 51
0
 def check_input_params(self, data):
     method = "submit_task"
     result = [0, 0]
     if not data.has_key('jsonrpc'):
         result[0] = 121201
         result[1] = "There is no key named jsonrpc"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data.has_key('method') or data['method'] != method:
         result[0] = 121202
         result[
             1] = "There is no key named method or method is not " + method
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data.has_key('params'):
         result[0] = 121203
         result[1] = "There is no key named params"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params'].has_key('additional_info'):
         result[0] = 121204
         result[
             1] = "There is no key named client_id in params or client_id is null "
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params'].has_key('external_id') or data['params']['external_id'] == '':
         result[0] = 121210
         result[1] = "There is no key named external_id or external_id is null"
         g_logger.error(
             trans2json("input params check failed: %s" % result[1]))
     elif not data['params']['thunder_hash']:
         if data['params'].has_key('url'):
             if not data['params']['url']['hash']:
                 if data['params'].has_key('seed_file'):
                     if not data['params']['seed_file']['hash']:
                         result[0] = 121211
                         result[1] = "There is no hash in params"
                         g_logger.error(trans2json("input params check failed: %s" % result[1]))
     else:
         g_logger.debug(trans2json("----Params check done.----"))
     return result
Ejemplo n.º 52
0
 def task_finished(self, celery_id, query_res):
     try:
         t = self.taskm[celery_id]['task']
         self.logger.info("finished query, task_id:%s, "
                          "site_asset_id: %s, celery_id:%s, "
                          "ret: %s, err: %s " %
                          (t.uuid, t.site_asset_id, celery_id,
                           query_res.ret, query_res.err))
         self.logger.debug("task_id:%s, out: %s", t.uuid, query_res.out)
         self.logger.debug("finished task info: %s" % str(t))
         #parse query result
         g_logger.info(trans2json(message="site_asset_id:%s, "
                       "task_uuid:%s, external_id:%s " % (t.site_asset_id, t.uuid,
                       t.external_id), action="task finished query from celery"))
         if not isinstance(query_res, TaskRes): #means catch some exception
             self.cleaner.request((t, BAD_OUTPUT, None))
         else:
             _, state, res = self.parse_query_res(query_res)
             self.cleaner.request((t, state, res))
     except Exception, ex:
         self.cleaner.request((t, BAD_OUTPUT, None))
         self.logger.error("task finished catch unhandle exception, "
                           "task_uuid:%s" % t.uuid, exc_info=True)
Ejemplo n.º 53
0
def writeHashToRedis(data):
    key_hash = ''
    if data['params'].has_key('files'):
        if len(data['params']['files']) > 1:
            if data['params'].has_key('url'):
                key_hash = data['params']['url']['hash']
            else:
                ret_code, bt_file_name = download_file(
                    data['params']['seed_file']['path'], gv.file_tmpdir)
                if ret_code == True:
                    seed_file_content = ''
                    with open(bt_file_name, 'r') as fp:
                        seed_file_content = fp.read()
                    seed_file_hash = Hash(
                        filename=bt_file_name, content=seed_file_content).value
                    data['params']['seed_file']['hash'] = seed_file_hash
                    key_hash = seed_file_hash
                    try:
                        os.remove(bt_file_name)
                    except OSError:
                        g_logger.error(trans2json(
                            "delete bt file %s  error %s" % (bt_file_name, traceback.format_exc())))
            for i in data['params']['files']:
                key = "%s#%s" % (
                    data['params']['additional_info']['client_id'], key_hash)
                dna_hash = {}
                code = i['code']
                if code == GENERATE_SUCESS:
                    dna_hash[i['hash']] = None
                    dna_hash['file_path'] = i['file_path']
                elif code in (NOT_COPYWRITE, FILTERING):
                    dna_hash[i['hash']] = 0
                    dna_hash['file_path'] = i['file_path']
                elif code == GENERATE_FAILED:
                    dna_hash[i['hash']] = 3
                    dna_hash['file_path'] = i['file_path']
                writesetredis(gv.rds_conn, key, dna_hash)
Ejemplo n.º 54
0
def query_hash(data):
    result_hash_list = []
    start_time=time.time() 
    if data['params'].has_key('url'):
        if data['params']['url']['hash'] != None and data['params']['url']['hash'] != '':
            ret_code, result = query_vddb_async(
                data['params']['url']['hash'], data)
            if ret_code == 1:
                end_time = time.time()
                #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000)
                return ret_code, result
            result_hash_list.append((ret_code, result))
    if data['params']['thunder_hash'] != None and data['params']['thunder_hash'] != '':
        ret_code, result = query_vddb_async(
            data['params']['thunder_hash'], data)
        if ret_code == 1:
            end_time = time.time()
            #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000)
            return ret_code, result
        result_hash_list.append((ret_code, result))
    if data['params'].has_key('seed_file'):
        seed_file_hash = ''
        if data['params']['seed_file']['hash'] != '':
            seed_file_hash = data['params']['seed_file']['hash']
        else:
            ret_code, bt_file_name = download_file(
                data['params']['seed_file']['path'], gv.file_tmpdir)
            if ret_code:
                client_id = data['params']['additional_info']['client_id']
                with open(bt_file_name, 'rb') as fp:
                    seed_file_content = fp.read()
                seed_file_hash = Hash(
                    filename=bt_file_name, content=seed_file_content).value
                data['params']['seed_file']['hash'] = seed_file_hash
                try:
                    os.remove(bt_file_name)
                except OSError:
                    g_logger.error(trans2json(
                        "delete bt file %s  error %s" % (bt_file_name, traceback.format_exc())))
        ret_code, result = query_vddb_async(seed_file_hash, data)
        if ret_code == 1:
            end_time = time.time()
            #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000)
            return ret_code, result
        result_hash_list.append((ret_code, result))
    if data['params'].has_key('files'):
        hash_list = []
        data_list = []
        for i in data['params']['files']:
            dna_hash = i['hash']
            hash_list.append(dna_hash)
            data_list.append(data)
        result_list = map(query_vddb_async, hash_list, data_list)
        for i in range(len(result_list)):
            if result_list[i][0] == 1:
                end_time = time.time()
                #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000)
                return result_list[i][0], result_list[i][1]
    end_time = time.time()
    #gv.statsd_conn.timing("thunder.querybroker_qbpull", (end_time-start_time)*1000)
    return 3, None
 def run(self):
     g_logger.info(trans2json("start fetch query result service"))
     self.th.run()
Ejemplo n.º 56
0
    def query(t, account, backends):
        #account {..., backends:[{'extra':, 'account':, 'backend':}]}
        #backends {id:{'capacity':, 'id':, 'vddb_address':}}
        logger = logging.getLogger("mw_celery_task")
        try:
            logger.info("receive task to query: task_uuid: %s, "
                        "site_asset_id: %s" % (t['uuid'], t['site_asset_id']))

            g_logger.info(trans2json(message="site_asset_id:%s, "
                          "task_uuid:%s, external_id:%s" % (t['site_asset_id'], t['uuid'],
                          t['external_id']), action="get task from celery"))

            urls = []
            for b in account['backends']:
                be = backends[b['backend']]#{'capacity':, 'id':, 'vddb_address':}
                opts = dict(level=b['level'], mode=b['mode'], extra=b['extra'])
                if account['hot'] == 'true':
                    opts['hot_user'] = account['hot_user']
                    opts['hot_pass'] = account['hot_pass']
                # if account['slicing'] == 'true':
                #     opts['slice'] = p['slice_duration']
                qs = urlencode(opts.items())
                urls.append('vdna://%s:%s@%s/?%s' % (account['backend_user'],
                                                     account['backend_pass'],
                                                     be['vddb_address'], qs))
            args = ['/'.join([os.getenv('MW_HOME'), 'lib', 'task_executor.py']),
                    '--task', str(t['id']), '--task-uuid', t['uuid'],
                    '--timestamp', str(t['created_at']),
                    '--account', str(account['id']),
                    '--site-domain', account['domain'],
                    '--site-asset-id', t['site_asset_id'],
                    '--clip-format', t['format'],
                    '--dna-url', t['dna_url']]
            if account['slicing'] == 'true':
                args.extend(['--slice-duration', str(account['slice_duration'])])
            if account['allow_partial'] == 'false':
                args.append('--fail-partial')
            # reverse query ingestion triggering is done by reverse_trigger.py
            # if account['do_reverse'] == 'true':
            #     args.extend('--reverse-query')
            for u in urls:
                args.extend(['--backend', u])
            if t['scope'] != None:
                for s in t['scope']:
                    args.extend(['--scope', s])
            for r in account['rules']:
                args.append('--' + r)
        except Exception:
            logger.error("generate command line failed, "
                         "uuid: %s, site_asset_id: %s" % \
                         (t['uuid'], t['site_asset_id']), exc_info=True)
            logger.debug("task: %s, account: %s, backends: %s" % \
                        (t, account, backends))
            raise TaskException(err="query failed, generate execute cmd failed")
        else:
            # bufsize=-1 usually means fully buffer the output, usually, ugh
            # please contact [email protected] if stdout is blocked
            proc = None
            try:
                start_time = time.time()
                proc = Popen(args, close_fds=True, stdout=PIPE, bufsize= -1)
                #row = db_txn(pool, partial(self.record, t, proc.pid)) 
                logger.info("spawn a process to query, task_uuid: %s, "
                            "site_asset_id: %s, just wait til finished" %
                            (t['uuid'], t['site_asset_id']))
                g_logger.info(trans2json(message="site_asset_id:%s, "
                              "task_uuid:%s, external_id:%s" % (t['site_asset_id'],
                              t['uuid'], t['external_id']), action="start query vddb"))
                out, err = proc.communicate()
                ret = proc.wait()
                logger.info("query finished, return to manager, "
                            "task_uuid: %s, site_asset_id: %s, "
                            "ret: %s, out: %s, err: %s"
                             % (t['uuid'], t['site_asset_id'], ret, out, err))
                g_logger.info(trans2json(message="site_asset_id:%s, "
                             "task_uuid:%s, external_id:%s" % (t['site_asset_id'],
                              t['uuid'], t['external_id']), action="query vddb finished"))
                end_time = time.time()
                stats.timing(QUERY_VDDB, int(end_time-start_time)*1000)
                return TaskRes(t, ret, out, err)
            except:
                logger.error("spawn process catch exception, uuid: %s, "
                             "site_asset_id: %s, " % \
                             (t['uuid'], t['site_asset_id']),  exc_info=True)
                logger.debug("task: %s, account: %s" % (t, account))
                raise TaskException(err="query failed, spawn process failed")