def mq_correct(): '''运行时错误校正''' return True rawJson = redis.hget('mq_correct_running', 'checkdb') dataOld = json.loads(rawJson.decode()) if rawJson else {} dataNew = {} rows = db.fetchall( 'select id,status from task_execute where status in(0,1,2) order by id asc;' ) stages = ['undo', 'ready', 'doing', 'done'] batchErrors = {} for row in rows: errors = [] eid = str(row['id']) dataNew[eid] = 'ok' logger.debug("mq execute check ::::%s" % eid) if eid in dataOld.keys() and dataOld[eid] == 'ok': continue logger.debug("mq_correct_running checkdb::::%s" % eid) execute = mongoSpider['execute'].find_one({'id': row['id']}, {'_id': 0}) if not execute: errors.append('noexecute') urlCount = mongoSpider['spiderurl'].find({ 'execute_id': row['id'] }, { '_id': 0 }).count() if not urlCount: errors.append('nourl') pre = 'mq_spider_' stats = { stage: mongoMq[pre + stage].find({ 'mq_batch': row['id'] }).count() for stage in stages } total = stats['undo'] + stats['ready'] + stats['doing'] + stats['done'] if not total: errors.append('nomq') if errors: batchErrors[eid] = errors dataNew[eid] = 'uncheck' idsDel = list(set(dataOld.keys()) - set(dataNew.keys())) for batch in idsDel: del (dataOld[batch]) dataOld.update(dataNew) redis.hset('mq_correct_running', 'checkdb', json.dumps(dataOld, ensure_ascii=False)) logger.debug("mq execute error ::::%s" % json.dumps(errors, ensure_ascii=False)) #url数据未写入 for batch, errors in batchErrors.items(): bTask.execute_init(batch)
def _checkUrlExists(executeid, url, method, invisible): '''检查url是否存在''' key = 'exists_%s' % executeid value = method + '-' + url + str(invisible) hkey = md5(value) if redis.hexists(key, hkey): return True redis.hset(key, hkey, value) redis.expire(key, 86400) return False
def _verify_proxy_useful(proxy): """ 如果可用: 保留 不可用: 放入 raw :param proxy: :return: """ if not redis.hexists(REDIS_KEY_USEFUL, proxy): return available = check(proxy) if available is True: pass else: redis.hdel(REDIS_KEY_USEFUL, proxy) redis.hset(REDIS_KEY_RAW, proxy, '1')
def _save_to_db(ip_ports): for proxy in ip_ports: if proxy and check(proxy): redis.hset(REDIS_KEY_USEFUL, proxy, '1')