def get_json_dict(url, proxy=None, times=1): if times > RETRY_TIMES: log.error( 'Timeout for {} beyond the maximum({}) retry times. SKIP!'.format( url, RETRY_TIMES)) return None timer.sleep_awhile() #随机睡眠1~2秒 try: if proxy is not None: log.info("使用代理{}".format(proxy)) return requests.get(url, headers=headers, cookies=cookies, timeout=5, proxies={ 'http': proxy }).json() else: log.info("无代理".format(proxy)) return requests.get(url, headers=headers, cookies=cookies, timeout=5).json() #获取JSON文件 except Timeout: #获取json时出现超时问题 log.warn("timeout for {}. Try again.".format(url)) return get_json_dict(url, times + 1)
def get_json_dict_raw(url, cookies, proxy=False, times=1): if exist(url): return fetch(url) if times > RETRY_TIMES: log.error( 'Timeout for {} beyond the maximum({}) retry times. SKIP!'.format( url, RETRY_TIMES)) return None timer.sleep_awhile() try: if proxy and proxies != {}: return requests.get(url, headers=headers, cookies=cookies, timeout=5, proxies=proxies).text return requests.get(url, headers=headers, cookies=cookies, timeout=5).text except Timeout: log.warn("timeout for {}. Try again.".format(url)) except Exception as e: log.error("unknown error for {}. Try again. Error string: {}".format( url, e)) log.error(traceback.format_exc()) data = get_json_dict_raw(url, cookies, proxy, times + 1) return data
def get_json_dict(url): timer.sleep_awhile() try: return requests.get(url, headers=headers, cookies=cookies, timeout=5).json() except Timeout: log.error("timeout for {}. SKIP.".format(url)) return None
def get_json_dict(url, cookies, proxy=False, times=1): if times > RETRY_TIMES: log.error('Timeout for {} beyond the maximum({}) retry times. SKIP!'.format(url, RETRY_TIMES)) return None timer.sleep_awhile() try: if proxy: return requests.get(url, headers=headers, cookies=cookies, timeout=5, proxies=proxies).json() else: return requests.get(url, headers=headers, cookies=cookies, timeout=5).json() except Timeout: log.warn("timeout for {}. Try again.".format(url)) return get_json_dict(url, cookies, proxy, times + 1)
async def async_get_json_dict_raw(url, cookies, session: ClientSession, proxy = False, times = 1): if times > config.RETRY_TIMES: log.error('Timeout for {} beyond the maximum({}) retry times. SKIP!'.format(url, config.RETRY_TIMES)) return None try: async with session.get(url) as resp: return await resp.text() # return requests.get(url, headers=get_headers(), cookies=cookies, timeout=5).text except Timeout: log.warn("Timeout for {}. Try again.".format(url)) except Exception as e: log.error("Unknown error for {}. Try again. Error string: {}".format(url, e)) log.error(traceback.format_exc()) # 首次出错时异步休眠,第二次出错时全体任务休眠。 await timer.async_sleep_awhile() if times == 2: log.error('aio http error happens 2 times. use sync wait') timer.sleep_awhile() data = await async_get_json_dict_raw(url, cookies, session, proxy, times + 1) return data