Exemple #1
0
def mq_correct():
    '''运行时错误校正'''
    return True
    rawJson = redis.hget('mq_correct_running', 'checkdb')
    dataOld = json.loads(rawJson.decode()) if rawJson else {}
    dataNew = {}
    rows = db.fetchall(
        'select id,status from task_execute where status in(0,1,2) order by id asc;'
    )
    stages = ['undo', 'ready', 'doing', 'done']
    batchErrors = {}
    for row in rows:
        errors = []
        eid = str(row['id'])
        dataNew[eid] = 'ok'
        logger.debug("mq execute check ::::%s" % eid)

        if eid in dataOld.keys() and dataOld[eid] == 'ok': continue

        logger.debug("mq_correct_running checkdb::::%s" % eid)
        execute = mongoSpider['execute'].find_one({'id': row['id']},
                                                  {'_id': 0})
        if not execute: errors.append('noexecute')

        urlCount = mongoSpider['spiderurl'].find({
            'execute_id': row['id']
        }, {
            '_id': 0
        }).count()
        if not urlCount: errors.append('nourl')

        pre = 'mq_spider_'
        stats = {
            stage: mongoMq[pre + stage].find({
                'mq_batch': row['id']
            }).count()
            for stage in stages
        }
        total = stats['undo'] + stats['ready'] + stats['doing'] + stats['done']
        if not total: errors.append('nomq')

        if errors:
            batchErrors[eid] = errors
            dataNew[eid] = 'uncheck'
    idsDel = list(set(dataOld.keys()) - set(dataNew.keys()))
    for batch in idsDel:
        del (dataOld[batch])
    dataOld.update(dataNew)
    redis.hset('mq_correct_running', 'checkdb',
               json.dumps(dataOld, ensure_ascii=False))

    logger.debug("mq execute error ::::%s" %
                 json.dumps(errors, ensure_ascii=False))

    #url数据未写入
    for batch, errors in batchErrors.items():
        bTask.execute_init(batch)
Exemple #2
0
def _checkUrlExists(executeid, url, method, invisible):
    '''检查url是否存在'''
    key = 'exists_%s' % executeid
    value = method + '-' + url + str(invisible)
    hkey = md5(value)
    if redis.hexists(key, hkey): return True

    redis.hset(key, hkey, value)
    redis.expire(key, 86400)
    return False
Exemple #3
0
def _verify_proxy_useful(proxy):
    """
    如果可用: 保留
       不可用: 放入 raw
    :param proxy:
    :return:
    """
    if not redis.hexists(REDIS_KEY_USEFUL, proxy):
        return

    available = check(proxy)
    if available is True:
        pass
    else:
        redis.hdel(REDIS_KEY_USEFUL, proxy)
        redis.hset(REDIS_KEY_RAW, proxy, '1')
Exemple #4
0
def _save_to_db(ip_ports):
    for proxy in ip_ports:
        if proxy and check(proxy):
            redis.hset(REDIS_KEY_USEFUL, proxy, '1')