예제 #1
0
 def parse_item(self, response):
     url = response.url
     item = ItemLoader(item=MeizituItem(), response=response)
     item.add_xpath('title', '//h2/a/text()')
     item.add_xpath('image_urls', '//div[@id="picture"]//img/@src')
     item.add_value('url', url)
     return item.load_item()
예제 #2
0
파일: mzitu.py 프로젝트: limmi/spider
 def parse_item(self, response):
     print('parse=========', response.url)
     item = ItemLoader(item=MeizituItem(), response=response)
     item.add_value('url', response.url)
     item.add_value('page', response.url[len(self.host):].replace('/', '_'))
     # item.add_xpath('tags', "//ul[@id='article']/li/h2/a/text()")
     item.add_xpath('tags', "/html/body/div[2]/ul[@id='article']/li/h2/a/text()")
     item.add_xpath('image_urls', "//div[@class='pic']/a/img/@src")
     return item.load_item()
예제 #3
0
 def parse_detail(self, response):
     item = MeizituItem()
     title = response.xpath('//h2/a/text()').get()
     image_urls = response.xpath('//div[@id="picture"]//img/@src').extract()
     url = response.url
     item['title'] = title
     item['image_urls'] = image_urls
     item['url'] = url
     yield item
예제 #4
0
 def parse(self, response):
     node_list = response.xpath("//ul[@id='pins']/li")
     for node in node_list:
         item = MeizituItem()
         item['img_name'] = node.xpath('./span/a/text()').extract_first()
         item['img_link'] = node.xpath('./span/a/@href').extract_first()
         # print(item['img_link'])
         yield scrapy.Request(url=item['img_link'],
                              callback=self.detail_page,
                              meta={"item": item})
예제 #5
0
    def parse(self, response):
        sel = scrapy.selector.Selector(response)
        # list = response.xpath('//ol[@class="commentlist"]')
        # for li in list:
        item = MeizituItem()
        item['image'] = sel.xpath('//div[@class="wrap2"]/div/div/dl/dt/a/img/@src').extract()


        #item['image'] = response.xpath("./li//img/@src").extract()

        yield item
예제 #6
0
    def parse_detail(self, response):
        # print("response.url===",response.url)
        #好多的图片
        images = response.xpath("//img/@src").extract()
        title = response.xpath("//h5/text()").extract()[0]
        for image in images:

            item = MeizituItem()
            item["image"] = image
            item["title"] = title
            item["url"] = response.url
            yield item