Ejemplo n.º 1
0
    def parse(self, response):
        sel = Selector(response)
        sites = sel.xpath('//ol[@class="grid_view"]/li/div/div[@class="pic"]')
        items = []

        for site in sites:
            item = top100Item()
            item["movie_rank"] = site.xpath("em/text()").extract()
            item["movie_name"] = site.xpath("a/img/@alt").extract()
            item["movie_url"] = site.xpath("a/@href").extract()
            items.append(item)

            yield item

        # 获得下一篇top250的url
        urls = sel.xpath('//div[@class="paginator"]/a/@href').extract()
        urls[:-1]
        urls.reverse()

        for url in urls:
            if url != "?start=0&filter=":
                # print url
                url = "http://movie.douban.com/top250" + url
                # print url
                yield Request(url, callback=self.parse)

        """i=0
Ejemplo n.º 2
0
    def parse(self, response):  
        sel = Selector(response)  
        sites=sel.xpath('//ol[@class="grid_view"]/li/div/div[@class="pic"]')
        items = []

        for site in sites:
            item=top100Item()
            item['movie_rank']=site.xpath('em/text()').extract()
            item['movie_name']=site.xpath('a/img/@alt').extract()
            item['movie_url']=site.xpath('a/@href').extract()
            items.append(item)
        return items