Ejemplo n.º 1
0
def get_json_dict(url, proxy=None, times=1):
    if times > RETRY_TIMES:
        log.error(
            'Timeout for {} beyond the maximum({}) retry times. SKIP!'.format(
                url, RETRY_TIMES))
        return None

    timer.sleep_awhile()
    #随机睡眠1~2秒
    try:
        if proxy is not None:
            log.info("使用代理{}".format(proxy))
            return requests.get(url,
                                headers=headers,
                                cookies=cookies,
                                timeout=5,
                                proxies={
                                    'http': proxy
                                }).json()
        else:
            log.info("无代理".format(proxy))
            return requests.get(url,
                                headers=headers,
                                cookies=cookies,
                                timeout=5).json()
        #获取JSON文件
    except Timeout:
        #获取json时出现超时问题
        log.warn("timeout for {}. Try again.".format(url))
        return get_json_dict(url, times + 1)
Ejemplo n.º 2
0
def get_json_dict_raw(url, cookies, proxy=False, times=1):
    if exist(url):
        return fetch(url)

    if times > RETRY_TIMES:
        log.error(
            'Timeout for {} beyond the maximum({}) retry times. SKIP!'.format(
                url, RETRY_TIMES))
        return None

    timer.sleep_awhile()
    try:
        if proxy and proxies != {}:
            return requests.get(url,
                                headers=headers,
                                cookies=cookies,
                                timeout=5,
                                proxies=proxies).text
        return requests.get(url, headers=headers, cookies=cookies,
                            timeout=5).text
    except Timeout:
        log.warn("timeout for {}. Try again.".format(url))
    except Exception as e:
        log.error("unknown error for {}. Try again. Error string: {}".format(
            url, e))
        log.error(traceback.format_exc())

    data = get_json_dict_raw(url, cookies, proxy, times + 1)
    return data
Ejemplo n.º 3
0
def get_json_dict(url):
    timer.sleep_awhile()
    try:
        return requests.get(url, headers=headers, cookies=cookies,
                            timeout=5).json()
    except Timeout:
        log.error("timeout for {}. SKIP.".format(url))
        return None
Ejemplo n.º 4
0
def get_json_dict(url, cookies, proxy=False, times=1):
    if times > RETRY_TIMES:
        log.error('Timeout for {} beyond the maximum({}) retry times. SKIP!'.format(url, RETRY_TIMES))
        return None

    timer.sleep_awhile()
    try:
        if proxy:
            return requests.get(url, headers=headers, cookies=cookies, timeout=5, proxies=proxies).json()
        else:
            return requests.get(url, headers=headers, cookies=cookies, timeout=5).json()
    except Timeout:
        log.warn("timeout for {}. Try again.".format(url))
        return get_json_dict(url, cookies, proxy, times + 1)
Ejemplo n.º 5
0
async def async_get_json_dict_raw(url, cookies, session: ClientSession, proxy = False, times = 1):
    if times > config.RETRY_TIMES:
        log.error('Timeout for {} beyond the maximum({}) retry times. SKIP!'.format(url, config.RETRY_TIMES))
        return None

    try:
        async with session.get(url) as resp:
            return await resp.text()
        # return requests.get(url, headers=get_headers(), cookies=cookies, timeout=5).text
    except Timeout:
        log.warn("Timeout for {}. Try again.".format(url))
    except Exception as e:
        log.error("Unknown error for {}. Try again. Error string: {}".format(url, e))
        log.error(traceback.format_exc())

    # 首次出错时异步休眠,第二次出错时全体任务休眠。
    await timer.async_sleep_awhile()
    if times == 2:
        log.error('aio http error happens 2 times. use sync wait')
        timer.sleep_awhile()

    data = await async_get_json_dict_raw(url, cookies, session, proxy, times + 1)
    return data