Example #1
0
 def __init__(self, product_url):
     self.url = product_url
     html_analyse = HtmlAnalyse(self.url)
     self.bs_content = html_analyse.get_bs_contents()
Example #2
0
    def get_product_list(self):
        html_analyse = HtmlAnalyse(self.url)
        data1 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_CFilm",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_CFilm",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data2 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_Fwirewound",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_Fwirewound",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data3 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_LowOhmWire",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_LowOhmWire",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data4 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_MetalFilm",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_MetalFilm",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data5 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_MetalGlazedFilm",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_MetalGlazedFilm",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data6 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_OFilm",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_OFilm",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data7 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_PulseLoad",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_PulseLoad",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data8 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_Wirewound",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_Wirewound",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data9 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_ZeroOhm",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_ZeroOhm",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data10 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_Cement",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_Cement",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data11 = {
            "Category_Radio": "LeadedR",
            "Feature_Radio": "LeadedR_AlumiHouse",
            "CATEGORY": "LeadedR",
            "FEATURE": "LeadedR_AlumiHouse",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }

        datas = [
            data1, data2, data3, data4, data5, data6, data7, data8, data9,
            data10, data11
        ]
        all_product_tags = []
        for data in datas:
            contents = html_analyse.post_contents(data=data).encode().decode()
            bs_contents = BeautifulSoup(contents, "html.parser")
            product_tags = bs_contents.find_all(name="tr")[1:]
            all_product_tags += product_tags
        return all_product_tags
Example #3
0
 def __init__(self, img_url):
     self.img, self.url = img_url
     html_analyse = HtmlAnalyse(self.url)
     self.bs_content = html_analyse.get_bs_contents()
Example #4
0
 def get_attach(attach_url):
     html_analyse = HtmlAnalyse(attach_url)
     bs_content = html_analyse.get_bs_contents()
     attach = Vishay_Pre_Url + bs_content.find(name="meta", attrs={"content": re.compile(r'^/docs/.*?\.pdf$')}).get("content")
     return attach
Example #5
0
"""
    @description:   
    @author:        RoyalClown
    @date:          2017/3/7
"""
import re

import requests

from Lib.NetCrawl.HtmlAnalyse import HtmlAnalyse

html_analyse = HtmlAnalyse('https://api.github.com/repos/kennethreitz/requests/git/commits/a050faf084662f3a352dd1a941f2c7c9f886d4ad')
bs_content = html_analyse.get_bs_contents()
print(bs_content)
Example #6
0
 def get_product_list(self):
     html_analyse = HtmlAnalyse(self.url)
     bs_content = html_analyse.get_bs_contents()
     rough_products_list = bs_content.find_all(name="tr", attrs={"class": re.compile(r'^doc-')})
     return rough_products_list
Example #7
0
 def download(self, img_url):
     filename = self.path + str(random.random()) + '.jpg'
     html_analyse = HtmlAnalyse(img_url)
     html_analyse.download(filename)
     print("下载完成。。。")
     return filename
Example #8
0
 def __init__(self, url, code):
     self.url = url
     self.code = code
     html_analyse = HtmlAnalyse(self.url)
     self.bs_content = html_analyse.get_bs_contents()
Example #9
0
        threading_pool = ThreadingPool()
        threading_pool.multi_thread(thread, pdf_urls)


if __name__ == "__main__":
    # pdfdownload = PdfDownload(task_code="CCT2016120900000001")
    #
    # pdfdownload.go()

    a = "downloadLinkClick('E-HTQ_e.pdf',true,false,false);return false"
    content_list = re.match(r'downloadLinkClick\((.*?)\);return false',
                            a).group(1).split(",")
    filename = content_list[0].replace("'", "")

    url = "http://ds.yuden.co.jp/TYCOMPAS/cs/detail.do?mode=download&fileName=" + filename

    isSeriesData = content_list[1]
    isProductsData = content_list[2]
    isProductsDataGraph = content_list[3]
    DownloadForm = {
        "action": "detail.do",
        "classificationID": "AE",
        "fileName": filename,
        "isSeriesData": isSeriesData,
        "isProductsData": isProductsData,
        "isProductsDataGraph": isProductsDataGraph
    }
    html_analyse = HtmlAnalyse(url)
    html_analyse.post_download(
        data=DownloadForm,
        path="I:\PythonPrj\StandardSpider\DataAnalyse\\NewRules\\a.pdf")
Example #10
0
    def get_product_list(self):
        html_analyse = HtmlAnalyse(self.url)
        data1 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_Automotive",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_Automotive",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data2 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_GeneralPurpose",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_GeneralPurpose",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data3 = {
            "Category_Radio": "Rchip",
            "Rchip_LeadFree": "Rchip_Automotive",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_LeadFree",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data4 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_GPHighPrecision",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_GPHighPrecision",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data5 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_ThinFilm",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_ThinFilm",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data6 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_Termination",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_Termination",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data7 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_Surge",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_Surge",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data8 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_Anti-Sulfurated",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_Anti-Sulfurated",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }
        data9 = {
            "Category_Radio": "Rchip",
            "Feature_Radio": "Rchip_MLV",
            "CATEGORY": "Rchip",
            "FEATURE": "Rchip_MLV",
            "INDUCTANCE": "",
            "TOLERANCE": "",
            "IMPEDANCE": "",
            "SIZE": "",
            "POWER": "",
            "RESISTANCE": "",
            "TCR": "",
            "CAPACITANCE": "",
            "TC": "",
            "VOLTAGE": "",
            "FREQUENCY": "",
            "INSERTIONLOSS": "",
            "LIFETIME": "",
            "ANTENNA": "",
            "ISSEARCH": "OK"
        }

        datas = [data1, data2, data3, data4, data5, data6, data7, data8, data9]
        all_product_tags = []
        for data in datas:
            contents = html_analyse.post_contents(data=data).encode().decode()
            bs_contents = BeautifulSoup(contents, "html.parser")
            product_tags = bs_contents.find_all(name="tr")[1:]
            all_product_tags += product_tags
        return all_product_tags