Beispiel #1
0
 def csv_download(self):
     html_analyse = HtmlAnalyse(self.url)
     filename = self.path + 'schottkyBarrierDiodes.csv'
     if os.path.exists(filename):
         return filename
     html_analyse.download(filename)
     return filename
Beispiel #2
0
    def download(self, pdf_url):
        content_list = re.match(r'downloadLinkClick\((.*?)\);return false',
                                a).group(1).split(",")
        filename = content_list[0].replace("'", "")

        url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename

        isSeriesData = content_list[1]
        isProductsData = content_list[2]
        isProductsDataGraph = content_list[3]
        DownloadForm = {
            "action": "detail.do",
            "classificationID": "AE",
            "fileName": filename,
            "isSeriesData": isSeriesData,
            "isProductsData": isProductsData,
            "isProductsDataGraph": isProductsDataGraph
        }
        html_analyse = HtmlAnalyse(url)
        html_analyse.post_download(
            data=DownloadForm,
            path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf")

        filename = self.path + str(random.random()) + '.pdf'
        try:
            html_analyse = HtmlAnalyse(url, proxy=self.proxy_ip)
            html_analyse.download(filename)
            print("下载完成。。。")
        except Exception as e:
            print(e)
            self.proxy_pool.remove(self.proxy_ip)
            self.proxy_ip = self.proxy_pool.get()
            self.download(pdf_url)

        return filename
Beispiel #3
0
    def file_download(self, url, file_type, file_name=str(random.random())):
        download_dir_path = "..\\download_files\\"
        if not os.path.exists(download_dir_path):
            os.mkdir(download_dir_path)
        download_file_path = download_dir_path + file_name + file_type
        if os.path.exists(download_file_path):
            return
        try_count = 0
        while True:
            try:
                download_file_path = download_dir_path + str(
                    random.random()) + file_type
                # html_analyse = HtmlAnalyse(url, proxy=self.proxy_ip)
                html_analyse = HtmlAnalyse(url)
                html_analyse.download(download_file_path)
                print("File Download Success !!")
                break
            except Exception as e:
                print(sys._getframe().f_code.co_name, url, e)
                try_count += 1
                if try_count > 2 and "https" in url:
                    return
                if try_count > 5:
                    return
                self.proxy_pool.remove(self.proxy_ip)
                self.proxy_ip = self.proxy_pool.get()
                # download_file_path = download_dir_path + str(random.random()) + file_type

        return download_file_path
Beispiel #4
0
    def download(self, pdf_url):
        filename = self.path + str(random.random()) + '.pdf'
        try:
            html_analyse = HtmlAnalyse(pdf_url, proxy=self.proxy_ip)
            html_analyse.download(filename)
            print("下载完成。。。")
        except Exception as e:
            print(e)
            self.proxy_pool.remove(self.proxy_ip)
            self.proxy_ip = self.proxy_pool.get()
            self.download(pdf_url)

        return filename
Beispiel #5
0
 def download(self, pdf_url):
     filename = self.path + str(random.random()) + '.pdf'
     html_analyse = HtmlAnalyse(pdf_url, is_proxy=True)
     html_analyse.download(filename)
     print("下载完成。。。")
     return filename
Beispiel #6
0
 def download(self, img_url):
     filename = self.path + str(random.random()) + '.jpg'
     html_analyse = HtmlAnalyse(img_url)
     html_analyse.download(filename)
     print("下载完成。。。")
     return filename