Example #1
0
    def parse_book(self, response):

        sel = Selector(response)

        items = []

        item = doubanBookItem()
        item['title'] = sel.xpath(
            '//*[@id="wrapper"]/h1/span/text()').extract()

        item['author'] = sel.xpath(
            '//*[@id="info"]/span[1]/a/text()').extract()
        item['intro_content'] = sel.xpath(
            '//*[@id="link-report"]/div[1]/div/p[1]/text()').extract()
        item['intro_author'] = sel.xpath(
            '//*[@id="content"]/div/div[1]/div[3]/div[3]/div/div/p[1]/text()').extract()

        item['image_urls'] = sel.xpath(
            '//*[@id="mainpic"]/a/img/@src').extract()

        items.append(item)
        print('success add one pagecontent')

        if len(items) > 30:
            raise CloseSpider('enough')

        yield item
Example #2
0
	def parse_book(self, response):

		sel = Selector(response)
		sel = sel.xpath('//')

		for sel in sels:
			item = doubanBookItem()
			item['title'] = sel.xpath('//*[@id="wrapper"]/h1/span/text()').extract()
			item['author'] = sel.xpath('//*[@id="info"]/span[1]/a').extract()
			item['intro_content'] = sel.xpath('//*[@id="link-report"]/div[1]/div/p[1]/text()').extract()
			item['intro_author'] = sel.xpath('//*[@id="content"]/div/div[1]/div[3]/div[3]/div/div/p[1]').extract()

			items.append(item)
			print('success add one pagecontent')

			if len(items) > 30:
				raise CloseSpider('enough')

			return items
Example #3
0
    def parse_book(self, response):

        sel = Selector(response)
        sel = sel.xpath('//')

        for sel in sels:
            item = doubanBookItem()
            item['title'] = sel.xpath(
                '//*[@id="wrapper"]/h1/span/text()').extract()
            item['author'] = sel.xpath('//*[@id="info"]/span[1]/a').extract()
            item['intro_content'] = sel.xpath(
                '//*[@id="link-report"]/div[1]/div/p[1]/text()').extract()
            item['intro_author'] = sel.xpath(
                '//*[@id="content"]/div/div[1]/div[3]/div[3]/div/div/p[1]'
            ).extract()

            items.append(item)
            print('success add one pagecontent')

            if len(items) > 30:
                raise CloseSpider('enough')

            return items