def parser_for_one_url(soup): url_list = [] try: lists = soup.find_all("ul", {"class": "gl-warp clearfix"}) for item in lists: hrefs = item.find_all() for herf in hrefs: names = herf.find_all("div", {"class": "p-name"}) for name in names: url = name.a["href"] if url is not None: try: url_list.append(url) except: pass else: print("soup为空") except: pass return url_list if __name__ == "__main__": with open("JD_commodity_urls.txt", mode="w", encoding="utf-8") as file: _catch_Index_Url() Creep_Tools._Deduplication("JD_commodity_urls.log") # 测试用URL # soup =_Analyze_Soup("http://list.jd.hk/list.html?cat=1319,1525,7057&go=0&gjz=0") # parser_for_one_url(soup) print("运行终了")