def csv_download(self): html_analyse = HtmlAnalyse(self.url) filename = self.path + 'schottkyBarrierDiodes.csv' if os.path.exists(filename): return filename html_analyse.download(filename) return filename
def download(self, pdf_url): content_list = re.match(r'downloadLinkClick\((.*?)\);return false', a).group(1).split(",") filename = content_list[0].replace("'", "") url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename isSeriesData = content_list[1] isProductsData = content_list[2] isProductsDataGraph = content_list[3] DownloadForm = { "action": "detail.do", "classificationID": "AE", "fileName": filename, "isSeriesData": isSeriesData, "isProductsData": isProductsData, "isProductsDataGraph": isProductsDataGraph } html_analyse = HtmlAnalyse(url) html_analyse.post_download( data=DownloadForm, path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf") filename = self.path + str(random.random()) + '.pdf' try: html_analyse = HtmlAnalyse(url, proxy=self.proxy_ip) html_analyse.download(filename) print("下载完成。。。") except Exception as e: print(e) self.proxy_pool.remove(self.proxy_ip) self.proxy_ip = self.proxy_pool.get() self.download(pdf_url) return filename
def file_download(self, url, file_type, file_name=str(random.random())): download_dir_path = "..\\download_files\\" if not os.path.exists(download_dir_path): os.mkdir(download_dir_path) download_file_path = download_dir_path + file_name + file_type if os.path.exists(download_file_path): return try_count = 0 while True: try: download_file_path = download_dir_path + str( random.random()) + file_type # html_analyse = HtmlAnalyse(url, proxy=self.proxy_ip) html_analyse = HtmlAnalyse(url) html_analyse.download(download_file_path) print("File Download Success !!") break except Exception as e: print(sys._getframe().f_code.co_name, url, e) try_count += 1 if try_count > 2 and "https" in url: return if try_count > 5: return self.proxy_pool.remove(self.proxy_ip) self.proxy_ip = self.proxy_pool.get() # download_file_path = download_dir_path + str(random.random()) + file_type return download_file_path
def download(self, pdf_url): filename = self.path + str(random.random()) + '.pdf' try: html_analyse = HtmlAnalyse(pdf_url, proxy=self.proxy_ip) html_analyse.download(filename) print("下载完成。。。") except Exception as e: print(e) self.proxy_pool.remove(self.proxy_ip) self.proxy_ip = self.proxy_pool.get() self.download(pdf_url) return filename
def download(self, pdf_url): filename = self.path + str(random.random()) + '.pdf' html_analyse = HtmlAnalyse(pdf_url, is_proxy=True) html_analyse.download(filename) print("下载完成。。。") return filename
def download(self, img_url): filename = self.path + str(random.random()) + '.jpg' html_analyse = HtmlAnalyse(img_url) html_analyse.download(filename) print("下载完成。。。") return filename