예제 #1
0
def get_html(intervel):
    for i in range(2055, 1, -1):
        try:
            new_url = Proxy_url + str(i) + ".html"
            lm.log_info("正在获取" + new_url + "的信息")
            html = get_page_content(new_url,None)
            parse_html(html)
            time.sleep(intervel)
        except Exception as e:
            lm.log_error(e.args[0])
            continue
예제 #2
0
def get_html(interval):
    page_num = parse_page_num(Proxy_url)
    for i in range(page_num):
        try:
            new_url = Proxy_url + str(i + 1) + ".html"
            lm.log_info("正在获取" + new_url + "的信息")
            html = get_page_content(new_url, None)
            parse_html(html)
            time.sleep(interval)
        except Exception as e:
            lm.log_error(e.args[0])
            continue
예제 #3
0
def get_html(interval):
    for suf in suffix:
        url_joint = Proxy_url + "/" + suf
        for i in range(2000):
            try:
                new_url = url_joint + "/" + str(i + 1)
                lm.log_info("正在获取" + new_url + "的信息")
                html = get_page_content(new_url, None)
                parse_html(html)
                time.sleep(interval)
            except Exception as e:
                lm.log_error(e.args[0])
                continue
예제 #4
0
def get_page_content(url, proxy):
    try:
        if proxy:
            r = requests.get(url, headers=headers, timeout=10, proxies=proxy)
        else:
            r = requests.get(url, headers=headers, timeout=10)
        if r.status_code == 200:
            lm.log_info("获取" + url + "的页面数据成功")
            return r.text
        else:
            lm.log_warning("获取" + url + "的页面数据失败 正在换代理获取重新获取。。。")
            proxy_list = {
                "http": gip.GetProxyIP(),
            }
            get_page_content(url,proxy_list)
    except:
        lm.log_error(url + "链接错误 取消此次链接")
        get_page_content(url, None)