Exemple #1
0
def get_detail_html(url):
    for i in range(5):
        try:
            response = requests.get(url, headers=headers, proxies=get_proxy())
            res = response.text
            print("当前运行url为{}".format(url))
            if res:
                return res
        except Exception as e:
            print("当前运行url{}出错{}".format(url, repr(e)))
Exemple #2
0
def get_all_brand():
    # 获取所有的品牌,然后请求品牌url,获取所有地区的url,请求地区的url,获取列表页,获取条数,判断有几页,进行翻页
    url="https://www.chinatruck.org/service/b3382_area_0"
    for i in range(3):
        try:
            res=requests.get(url,headers=headers,proxies=get_proxy()).text
        except Exception as e:
            print("当前运行url{}出错{}=1==".format(url, repr(e)))
    html=etree.HTML(res)
    brand_url_list=list(set(html.xpath("//li/div[@class='brand-jxs-tab']/span/a/@href")))
    return brand_url_list
Exemple #3
0
def get_all_brand():
    # 获取所有的品牌,然后请求品牌url,获取所有地区的url,请求地区的url,获取列表页,获取条数,判断有几页,进行翻页
    url = "https://www.chinatruck.org/dealer/b490_area_0"
    for i in range(3):
        try:
            res = requests.get(url, headers=headers, proxies=get_proxy()).text
        except:
            pass
    html = etree.HTML(res)
    brand_url_list = list(
        set(html.xpath("//li/div[@class='brand-jxs-tab']/span/a/@href")))
    return brand_url_list
Exemple #4
0
def get_add_url_by_brand(brand_url):
    add_url_dict={}
    for i in range(5):
        try:
            res=requests.get(brand_url,headers=headers,proxies=get_proxy()).text
            break
        except Exception as e:
            print("当前运行url{}出错{}===".format(brand_url, repr(e)))
    html = etree.HTML(res)
    addr_url_list =html.xpath("//li/dl[@class='tab-fw']/dd")
    for addr_url in addr_url_list:
        province=addr_url.xpath("./a/text()")[0]
        url=addr_url.xpath("./a/@href")[0]
        add_url_dict[province]=url
    return add_url_dict
Exemple #5
0
def get_add_url_by_brand(brand_url):
    add_url_dict = {}
    for i in range(5):
        try:
            res = requests.get(brand_url, headers=headers,
                               proxies=get_proxy()).text
            break
        except:
            pass
    try:
        html = etree.HTML(res)
        addr_url_list = html.xpath("//li/dl[@class='tab-jxs']/dd")
        for addr_url in addr_url_list:
            province = addr_url.xpath("./a/text()")[0]
            url = addr_url.xpath("./a/@href")[0]
            add_url_dict[province] = url
    # except:
    #     pass
    finally:
        return add_url_dict