def parse(self, response): li_list = response.xpath("//tr") for li in li_list: item = collection75Item() item["museumID"] = 125 item["collectionName"] = li.xpath( "..//div[2]/div[1]/a/text()").extract_first() item["collectionIntroduction"] = li.xpath( ".//div//div[2]/text()").extract_first() item["collectionImage"] = '(http://tour.dha.ac.cn)' + str( li.xpath(".//a/img/@src").extract_first()) yield item
def parse(self, response): li_list = response.xpath( "/html//div[3]/div/div[2]/div/div/div[1]/ul/li") for li in li_list: item = collection75Item() item["museumID"] = 105 item["collectionName"] = li.xpath("./p/text()").extract_first() item["collectionIntroduction"] = li.xpath( "./div/img/@name2").extract_first() item["collectionImage"] = li.xpath( "./div/img/@src").extract_first() yield item
def parse(self, response): li_list = response.xpath("//div[2]//div[2]/div/div/div[2]//ul/li") for li in li_list: item = collection75Item() item["museumID"] = 109 item["collectionName"] = li.xpath("./div[2]/ul/li/h1/strong/a/text()").extract_first() item["collectionImage"] ="http://www.ynnmuseum.com" + str(li.xpath("./div[1]/div/a/img/@src").extract_first()) url ='http://www.ynnmuseum.com' + str(li.xpath("./div[1]/div/a/@href").extract_first()) yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )
def parse(self, response): li_list = response.xpath("/html//div[3]/div[2]/ul/li") for li in li_list: item = collection75Item() item["museumID"] = 117 item["collectionName"] = li.xpath("./span[2]/a/text()").extract_first() item["collectionImage"] ="http://www.hylae.com/"+li.xpath("./span[1]/a/img/@src").extract_first() url =li.xpath("./span/a/@href").extract_first() yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )
def parse(self, response): li_list = response.xpath( "//table[4]//table[2]//td[3]/table[3]//tr[1]//ul") for li in li_list: item = collection75Item() item["museumID"] = 118 item["collectionName"] = li.xpath("./li/a/text()").extract_first() url = li.xpath("./li/a/@href").extract_first() yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )
def parse(self, response): li_list = response.xpath("//div[2]/div/div[2]/div[2]/div[2]/ul/li") for li in li_list: item = collection75Item() item["museumID"] = 126 item["collectionName"] = li.xpath("./p/a/text()").extract_first() item["collectionImage"] =li.xpath(".//a/img/@src").extract_first() url ='http://www.nxgybwg.com'+li.xpath("./div/a/@href").extract_first() yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )
def parse(self, response): li_list = response.xpath("//div[@class='item pb-item grid__item']") for li in li_list: item = collection75Item() item["museumID"] = 127 item["collectionName"] = li.xpath("./h3/a/text()").extract_first() item["collectionImage"] = li.xpath(".//img/@src").extract_first() url = 'https://www.nxbwg.com' + li.xpath( "./a/@href").extract_first() yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )
def parse(self, response): li_list = response.xpath("//div[2]/div[3]//div[2]/div[1]/ul/li") for li in li_list: item = collection75Item() item["museumID"] = 112 item["collectionName"] = li.xpath( "./div/div[2]/a/h6").extract_first() item["collectionImage"] = "https://www.cmnh.org.cn" + li.xpath( "./p/a/img/@src").extract_first() url = 'https://www.cmnh.org.cn' + li.xpath( "./p/a/@href").extract_first() yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )
def parse(self, response): li_list = response.xpath("//div[@class='con12']") for li in li_list: item = collection75Item() item["museumID"] = 119 item["collectionName"] = li.xpath(".//h3/a/text()").extract_first() item["collectionImage"] = "http://www.zgshm.cn/" + li.xpath( ".//img/@src").extract_first() item["collectionIntroduction"] = ' ' yield item if self.offset < 21: self.offset += 1 url = self.base_url + str(self.offset) yield scrapy.Request(url, callback=self.parse, meta={"item": item}) if self.offset == 21: self.offset = 40 url = self.base_url + str(self.offset) yield scrapy.Request(url, callback=self.parse, meta={"item": item})
def parse(self, response): d_list = response.xpath("//div[@class='infolist01']") for d in d_list: dy = d.xpath(".//li") for dd in dy: item = collection75Item() item["museumID"] = 120 item["collectionImage"] = dd.xpath( ".//img/@src").extract_first() item["collectionName"] = dd.xpath( ".//a/text()").extract_first() url = dd.xpath(".//a/@href").extract_first() #处理详 情页 yield scrapy.Request( url, callback=self.parse_detail, meta={"item": item} # 传递参数 )