def VisitPage(photo_hash, download_folder, proxy_ip): folder_name = out_dir + "/" + download_folder CheckDir(folder_name) s = requests.session() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Host': '***', } try: rs = requests.get(photo_hash, proxies=proxy_ip, headers=headers, cookies=MYCOOKIE, verify=False) rs.encoding = 'utf-8' # print(rs.text) data = BeautifulSoup(rs.text, "lxml") # log.info(data) check_html(photo_hash, data, folder_name) except Exception as e: proxies = ValidIp(True, 'http://www.jiayuan.com') VisitPage(photo_hash, download_folder, proxies[0]) print(e)
def VisitPhotoPage(photo_hash, download_folder): folder_name = out_dir + "/" + download_folder CheckDir(folder_name) s = requests.session() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', } try: rs = requests.get(photo_hash, headers=headers, cookies=cookies, verify=False) rs.encoding = 'utf-8' # print(rs.text) data = BeautifulSoup(rs.text, "lxml") # print(data) CheckPhotoHtml(photo_hash, data, folder_name) except Exception as e: print(e)
#输出文件夹 out_dir = './photo_new' # print(project_path) def download_file(url, folder_name, id): new_url = url.split("_thumbnail")[0] + ".png" file_name = str(id) + ".png" DownloadFile(new_url, folder_name, file_name) csv_path = "/home/chenwei/文档/1031-徐汇滨江.txt" print(csv_path) csv_file = csv.reader(open(csv_path, 'r')) i = 0 for line in csv_file: # print(line) folder_name = out_dir + "/" + str(i) + "/" CheckDir(folder_name) file_name = str(i) + ".png" download_file(line[0], folder_name, "1") download_file(line[1], folder_name, "2") print(i) i = i + 1