예제 #1
0
 def parse(self, response):
     li_list = response.xpath("//tr")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 125
         item["collectionName"] = li.xpath(
             "..//div[2]/div[1]/a/text()").extract_first()
         item["collectionIntroduction"] = li.xpath(
             ".//div//div[2]/text()").extract_first()
         item["collectionImage"] = '(http://tour.dha.ac.cn)' + str(
             li.xpath(".//a/img/@src").extract_first())
         yield item
예제 #2
0
 def parse(self, response):
     li_list = response.xpath(
         "/html//div[3]/div/div[2]/div/div/div[1]/ul/li")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 105
         item["collectionName"] = li.xpath("./p/text()").extract_first()
         item["collectionIntroduction"] = li.xpath(
             "./div/img/@name2").extract_first()
         item["collectionImage"] = li.xpath(
             "./div/img/@src").extract_first()
         yield item
예제 #3
0
 def parse(self, response):
     li_list = response.xpath("//div[2]//div[2]/div/div/div[2]//ul/li")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 109
         item["collectionName"] = li.xpath("./div[2]/ul/li/h1/strong/a/text()").extract_first()
         item["collectionImage"] ="http://www.ynnmuseum.com" + str(li.xpath("./div[1]/div/a/img/@src").extract_first())
         url ='http://www.ynnmuseum.com' + str(li.xpath("./div[1]/div/a/@href").extract_first())
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  # 传递参数
         )
예제 #4
0
 def parse(self, response):
     li_list = response.xpath("/html//div[3]/div[2]/ul/li")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 117
         item["collectionName"] = li.xpath("./span[2]/a/text()").extract_first()
         item["collectionImage"] ="http://www.hylae.com/"+li.xpath("./span[1]/a/img/@src").extract_first()
         url =li.xpath("./span/a/@href").extract_first()
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  # 传递参数
         )
예제 #5
0
 def parse(self, response):
     li_list = response.xpath(
         "//table[4]//table[2]//td[3]/table[3]//tr[1]//ul")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 118
         item["collectionName"] = li.xpath("./li/a/text()").extract_first()
         url = li.xpath("./li/a/@href").extract_first()
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  # 传递参数
         )
예제 #6
0
 def parse(self, response):
     li_list = response.xpath("//div[2]/div/div[2]/div[2]/div[2]/ul/li")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 126
         item["collectionName"] = li.xpath("./p/a/text()").extract_first()
         item["collectionImage"] =li.xpath(".//a/img/@src").extract_first()
         url ='http://www.nxgybwg.com'+li.xpath("./div/a/@href").extract_first()
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  # 传递参数
         )
예제 #7
0
 def parse(self, response):
     li_list = response.xpath("//div[@class='item pb-item grid__item']")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 127
         item["collectionName"] = li.xpath("./h3/a/text()").extract_first()
         item["collectionImage"] = li.xpath(".//img/@src").extract_first()
         url = 'https://www.nxbwg.com' + li.xpath(
             "./a/@href").extract_first()
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  # 传递参数
         )
예제 #8
0
 def parse(self, response):
     li_list = response.xpath("//div[2]/div[3]//div[2]/div[1]/ul/li")
     for li in li_list:
         item = collection75Item()
         item["museumID"] = 112
         item["collectionName"] = li.xpath(
             "./div/div[2]/a/h6").extract_first()
         item["collectionImage"] = "https://www.cmnh.org.cn" + li.xpath(
             "./p/a/img/@src").extract_first()
         url = 'https://www.cmnh.org.cn' + li.xpath(
             "./p/a/@href").extract_first()
         yield scrapy.Request(
             url,
             callback=self.parse_detail,
             meta={"item": item}  # 传递参数
         )
예제 #9
0
    def parse(self, response):
        li_list = response.xpath("//div[@class='con12']")
        for li in li_list:
            item = collection75Item()
            item["museumID"] = 119
            item["collectionName"] = li.xpath(".//h3/a/text()").extract_first()
            item["collectionImage"] = "http://www.zgshm.cn/" + li.xpath(
                ".//img/@src").extract_first()
            item["collectionIntroduction"] = ' '
            yield item

        if self.offset < 21:
            self.offset += 1
            url = self.base_url + str(self.offset)
            yield scrapy.Request(url, callback=self.parse, meta={"item": item})
        if self.offset == 21:
            self.offset = 40
            url = self.base_url + str(self.offset)
            yield scrapy.Request(url, callback=self.parse, meta={"item": item})
예제 #10
0
    def parse(self, response):

        d_list = response.xpath("//div[@class='infolist01']")
        for d in d_list:
            dy = d.xpath(".//li")
            for dd in dy:
                item = collection75Item()
                item["museumID"] = 120
                item["collectionImage"] = dd.xpath(
                    ".//img/@src").extract_first()
                item["collectionName"] = dd.xpath(
                    ".//a/text()").extract_first()
                url = dd.xpath(".//a/@href").extract_first()

                #处理详 情页
                yield scrapy.Request(
                    url,
                    callback=self.parse_detail,
                    meta={"item": item}  # 传递参数
                )