def main(): range_ = input('请输入下载范围(xxx-xxx):') os.chdir(os.getcwd()) try: os.mkdir('画师通') except: pass os.chdir('画师通') pool = pl() for each in geturl(range_): each = list(map(lambda x: x.split('src="')[-1], each)) pool.map(download, each) pool.close() pool.join() print('下载完成')
s3 = etree.HTML(r1.text) pic1 = s3.xpath('//*[@class="content-pic"]/a/img/@src') file_name = pic1[0].split('/')[-1] r2 = requests.get(pic1[0], headers=headers) with open(file_name, 'wb') as f: f.write(r2.content) print('downloading......') except: pass def main(): url_list = [] for i in range(2, 10): url = 'https://www.mm131.net/xinggan/list_6_2.html'.replace( '2', str(i)) url_list.append(url) return url_list if __name__ == '__main__': url_list = main() pool = pl() pool.map(geturl, url_list) pool.map(download, url_all) pool.close() pool.join() print('下载完成') #download('https://www.mm131.net/xinggan/2260.html')
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } response = requests.get(url, headers=headers) selector = etree.HTML(response.text) for j in range(1, 31): # 获取每张图片的url count += 1 url_iamge = selector.xpath( '//*[@id="houselist-mod-new"]/li[{}]/div[1]/img/@src'.format(j))[0] res = requests.get(url_iamge, headers=headers) # 这里要注意图片的名字不能重复 with open("./安居客/{}.jpg".format(str(count)), 'wb') as f: f.write(res.content) # 把图片内容写入 time.sleep(2) # 爬完休息一会 time.sleep(2) if __name__ == "__main__": pool = pl(4) # 初始化线程池 preurl = 'https://xa.anjuke.com/sale/p{}/?pi=baidu-cpc-xa-tyongxa1&kwid=89460384111#filtersort' house_url = [preurl.format(i) for i in range(1, 5)] # 用列表推导式搞出10页的url # 将url映射给spider pool.map(spider, house_url) pool.close() pool.join()
result_num.append([Key,num_o,num_h]) def artical_write(result_num): with open('KeyWord2.csv', 'wt',newline='') as csvfile: writer = csv.writer(csvfile) # 先写入columns_name writer.writerow(['KeyWrods','num_o','num_h']) # 再写入数据 for res in result_num: writer.writerow([res[0],res[1],res[2]]) if __name__ == '__main__': # 线程数, 默认为cpu核心数 pool = pl(4) global result_num result_num = [] #读取文档中的关键字 with open('KeyWord.csv', 'r') as KW: KeyWrods = KW.readlines() # url列表收集 all_url = [] url = 'https://www.amazon.com/s?k=' strings = '&ref=nb_sb_noss_1' for i in KeyWrods: i_string = '"' +'+'.join(i.split(" ")).strip() + '"' all_url.append([url + '+'.join(i.split(" ")).strip() +strings,url + i_string + strings])