Exemple #1
0
    def _work(self):
        """
        
        """

        try:
            self.work()
            self._render_result(self.errno, self.errmsg, self.response_data)
        except error.BaseError as e:
            self._render_result(e.errno, e.errmsg, {})
            warning = {
                "uri": self.request.uri,
                "logid": self.logid,
                "errno": e.errno,
                "errmsg": e.errmsg,
                "args": str(e.args),
                "trace": traceback.format_exc(),
                "ex_type": type(e)
            }
            log.warning(warning)
            sys.stderr.write(pprint.pformat(warning))
        except Exception, e:
            errno = error.ERRNO_UNKNOWN
            self._render_result(errno, str(e), "")
            warning = {
                "uri": self.request.uri,
                "logid": self.logid,
                "errno": errno,
                "errmsg": str(e),
                "args": str(e.args),
                "trace": traceback.format_exc(),
                "ex_type": type(e)
            }
            log.fatal("internal_error", warning)
            sys.stderr.write(pprint.pformat(warning))
Exemple #2
0
 def checkParamAsString(self, key):
     p = self.getParamAsString(key, None)
     if p is None or len(p) == 0:
         errmsg = "error_param:'{}'".format(key)
         log.warning(errmsg)
         raise error.BaseError(errno=error.ERRNO_PARAM, errmsg=errmsg)
     return p
Exemple #3
0
def raise_runtime_error(msg):
    """
    log and raise
    """
    #TODO: get context
    traceback.print_exc()
    log.warning(msg)
    raise RuntimeError(msg)
Exemple #4
0
def tryMcpackLoad(receive_buf, charset):
    try:
        result_info = mcpack.loads(receive_buf,
                                   use_unicode=True,
                                   charset=charset)
        return result_info
    except Exception as e:
        log.warning("tryMcpackLoad", e)
        return False
Exemple #5
0
def safe_json_decode(s, default=None):
    """  
    """
    if s is None:
        return None
    try:
        return json.loads(s)
    except:
        traceback.print_exc()
        log.warning("safe_json_decode", str(s)[:100])
        return default
Exemple #6
0
def addMp4Job(itemKey, priority=10000):
    itemKey = itemKey.strip()
    for _ in range(3):
        try:
            params = {"cmd": "add", "_key_": itemKey, "priority": priority}
            host = conftool.randomChoice(SUBMIT_HOST, SUBMIT_PORT)
            resp = requests.get("http://{}/job".format(host), params=params)
            return json.loads(resp.text)
        except Exception as e:
            log.warning("addMp4Job", e)
            time.sleep(1)
            pass
    log.fatal("submiter.addMp4Job fail")
    return False
def isVideoCrawled(videoId):
    result = getByVideoId(videoId)
    if result is None:
        #异常, 默认为已抓取
        return 1
    if len(result['data']) == 0:
        return 0
    try:
        data = result['data']
        if data[0]['crawl'] == 0:
            return 0
        return 1
    except Exception as e:
        log.warning("getVideoCrawled", e)
        return 1
Exemple #8
0
def friends_get(user_id):
    response = requests.get('https://api.vk.com/method/friends.get',
                            params={
                                'access_token': key,
                                'v': 5.92,
                                'user_id': user_id,
                                'order': 'random',
                                'count': '10000',
                                'fields': 'city, bdate, sex'
                            }).json()

    try:
        return response['response']

    except:
        log.warning(response['error']['error_msg'])
        exit(0)
Exemple #9
0
 def _execute_action(self, path_actions, root, urlPack):
     """
     execute xpath action
     """
     for xa in path_actions:
         pattern = xa[0]
         func = xa[1]
         if pattern is None:
             func(root, None)
         else:
             r = pattern.find(root)
             for match in r:
                 try:
                     func(root, match.value, urlPack)
                 except Exception as e:
                     traceback.print_exc()
                     log.fatal(
                         "_execute_action_error:{}, match.value:{}".format(
                             func, match.value), e)
             if len(r) == 0:
                 log.warning("pattern {} match empty!".format(pattern))
Exemple #10
0
import requests
from libs import log, pictorem as pct

with open('key.txt', 'r') as file:
    key = file.read().replace('\n', '').replace(' ', '')
    file.close()

if len(key) < 5:
    log.warning('You did not put your access token into key.txt')
    exit(0)


def work_on_bdate(bdate):
    spl = bdate.split('.')

    if len(spl) == 3:
        return spl[2]
    else:
        return False


def user_get(user_id):
    response = requests.get(
        'https://api.vk.com/method/users.get',
        params={
            'access_token': key,
            'v': 5.92,
            'user_ids': user_id,
            'fields': 'city, bdate, domain'
        }
Exemple #11
0
 def work(self):
     """
         main worker
     """
     log.notice("in JobHandler handler")
     cmd = self.getParamAsString('cmd')
     if cmd == "get":
         #从队列提取一条item
         try:
             q = queue.JobPriorityQueue()
             itemKey, priority = q.deQueue(True)
             if itemKey is False:
                 self.response_data = {"notice": "queue empty"}
                 return
             self.response_data = {"_key_": itemKey}
             queueBack = queue.JobBackupQueue()
             queueBack.enQueue(itemKey, time.time())
             _, provider, thirdId, _ = dbtools.get_key_info(itemKey)
             isCrawled = spider_ucptool.isVideoCrawled("{}_{}".format(provider, thirdId))
             db = mongo.DB()
             if isCrawled:
                 insertVal = {}
                 insertVal["_crawl_"] = const.CRAWL_STATUS_OK
                 insertVal["_utime_"] = int(time.time())
                 db.updateByKey(const.getTable(itemKey), itemKey, insertVal)
                 self.response_data = {"_key_": itemKey, "_crawl_": const.CRAWL_STATUS_OK}
                 return
             data = db.getOne(const.getTable(itemKey), itemKey, '_key_')   
             uid = adaptertool.getUid(data)
             authorKey = "AUTHOR-{}-{}-1".format(provider, uid)
             data['_authorInfo_'] = db.getOne(const.getTable(const.DATA_TYPE_AUTHOR), authorKey, '_key_')
             data['_callback_'] = "http://" + conftool.randomChoice(CALLBACK_HOST, CALLBACK_PORT) + "/job?cmd=callback&_key_=" + itemKey
             data['_priority_'] = priority
             if len(data.get('_topic3rdId_', '')) > 0:
                 try:
                     topicKey = "TOPIC-{}-{}-1".format(provider, data['_topic3rdId_'])
                     topicInfo = db.getOne(const.getTable('TOPIC'), topicKey, '_key_')
                     data['microVideoTopic'] = adaptertool.transform(topicKey, topicInfo)['microVideoTopic']
                 except Exception as e:
                     log.warning("error_get_microVideoTopic", e)
                 
             self.response_data = data        
             log.notice("pop one not crawled:{}".format(itemKey))
         except Exception as e:
             log.fatal("error_get_job_fromqueue={}, _key_={}".format(e, itemKey))
             self.response_data = {"_key_": itemKey, "error": str(e)}
         return
     if cmd == "add":
         itemKey = self.checkParamAsString('_key_')
         priority = self.getParamAsInt('priority', 10000)
         q = queue.JobPriorityQueue()
         resp = q.enQueue(itemKey, priority)
         self.response_data = resp
         return
     if cmd == "callback":
         itemKey = self.checkParamAsString('_key_')
         log.notice("got a callback:{}".format(itemKey))
         db = mongo.DB()
         stat = statistics.Statistics()
         value = {}
         value["_crawl_"] = 1
         value["_utime_"] = int(time.time())
         if self.getParamAsString('from') == 'mimod':
             value['_cspubResult_'] = self.getParamAsString('result', '')
             stat.incrCspubResult(value['_cspubResult_'])
         resp = db.updateByKey(const.getTable(itemKey), itemKey, value)
         self.response_data = {"_key_": itemKey, "_crawl_": 1, 'resp': resp}
         stat.incrSenderCallback()
         return
     raise ValueError("invalid cmd: ".format(cmd))