Example #1
0
def parser_for_one_url(soup):
    url_list = []
    try:
        lists = soup.find_all("ul", {"class": "gl-warp clearfix"})
        for item in lists:
            hrefs = item.find_all()
            for herf in hrefs:
                names = herf.find_all("div", {"class": "p-name"})
                for name in names:
                    url = name.a["href"]
                    if url is not None:
                        try:
                            url_list.append(url)
                        except:
                            pass
                    else:
                        print("soup为空")
    except:
        pass
    return url_list


if __name__ == "__main__":
    with open("JD_commodity_urls.txt", mode="w", encoding="utf-8") as file:
        _catch_Index_Url()
    Creep_Tools._Deduplication("JD_commodity_urls.log")
    # 测试用URL
    # soup =_Analyze_Soup("http://list.jd.hk/list.html?cat=1319,1525,7057&go=0&gjz=0")
    # parser_for_one_url(soup)
    print("运行终了")