예제 #1
0
class FaceSpider:
    def __init__(self):
        self.downloader = HtmlDownloader()
        self.urlmanager = UrlManager()

    def crawl(self, path):
        url = self.urlmanager.img_urls
        self.downloader.save_fig(url, path)
예제 #2
0
    def parse(self, url):
        response = HtmlDownloader.download(url)

        tree = etree.HTML(response)
        nodes = tree.xpath("//td/text()")

        txt = ""
        for count in range(len(nodes)):
            if (count % 7 == 0):
                txt = txt + "('" + nodes[count] + "'"
                txt = txt + ",'" + nodes[count + 1] + "'"
                txt = txt + ",'" + nodes[count + 3] + "'),"
        txt = txt[:-1]
        sql = "insert into iprecord(ip,port,protocol) values" + txt
        print(sql)
        self.db.execute(sql)
예제 #3
0
class MenetSpider:
    def __init__(self):
        self.downloader = HtmlDownloader()
        self.urlmanager = UrlManager()
        self.parser = HtmlParser()
        self.output = DataOutput()

    def crawl(self, i):
        try:
            print(f"Process {i} is running")
            url = self.urlmanager.get_new_url(i)
            html = self.downloader.download(url)
            data = self.parser.parser(html)
            return data
        except:
            print(f"crawl failed at {i}")
            return pd.DataFrame([0, 0, 0, 0, 0, 0, 0, 0, 0],
                                columns=[
                                    '编码', "药品名称", "生产企业", "批文文号", "商品名", "剂型",
                                    "规格", "进口国产", "批准日期"
                                ])
예제 #4
0
 def __init__(self):
     self.downloader = HtmlDownloader()
     self.urlmanager = UrlManager()
예제 #5
0
 def __init__(self):
     self.downloader = HtmlDownloader()
     self.urlmanager = UrlManager()
     self.parser = HtmlParser()
     self.output = DataOutput()