def parse_item(self, response): url = response.url item = ItemLoader(item=MeizituItem(), response=response) item.add_xpath('title', '//h2/a/text()') item.add_xpath('image_urls', '//div[@id="picture"]//img/@src') item.add_value('url', url) return item.load_item()
def parse_item(self, response): print('parse=========', response.url) item = ItemLoader(item=MeizituItem(), response=response) item.add_value('url', response.url) item.add_value('page', response.url[len(self.host):].replace('/', '_')) # item.add_xpath('tags', "//ul[@id='article']/li/h2/a/text()") item.add_xpath('tags', "/html/body/div[2]/ul[@id='article']/li/h2/a/text()") item.add_xpath('image_urls', "//div[@class='pic']/a/img/@src") return item.load_item()
def parse_detail(self, response): item = MeizituItem() title = response.xpath('//h2/a/text()').get() image_urls = response.xpath('//div[@id="picture"]//img/@src').extract() url = response.url item['title'] = title item['image_urls'] = image_urls item['url'] = url yield item
def parse(self, response): node_list = response.xpath("//ul[@id='pins']/li") for node in node_list: item = MeizituItem() item['img_name'] = node.xpath('./span/a/text()').extract_first() item['img_link'] = node.xpath('./span/a/@href').extract_first() # print(item['img_link']) yield scrapy.Request(url=item['img_link'], callback=self.detail_page, meta={"item": item})
def parse(self, response): sel = scrapy.selector.Selector(response) # list = response.xpath('//ol[@class="commentlist"]') # for li in list: item = MeizituItem() item['image'] = sel.xpath('//div[@class="wrap2"]/div/div/dl/dt/a/img/@src').extract() #item['image'] = response.xpath("./li//img/@src").extract() yield item
def parse_detail(self, response): # print("response.url===",response.url) #好多的图片 images = response.xpath("//img/@src").extract() title = response.xpath("//h5/text()").extract()[0] for image in images: item = MeizituItem() item["image"] = image item["title"] = title item["url"] = response.url yield item