def parse_book(self, response): sel = Selector(response) items = [] item = doubanBookItem() item['title'] = sel.xpath( '//*[@id="wrapper"]/h1/span/text()').extract() item['author'] = sel.xpath( '//*[@id="info"]/span[1]/a/text()').extract() item['intro_content'] = sel.xpath( '//*[@id="link-report"]/div[1]/div/p[1]/text()').extract() item['intro_author'] = sel.xpath( '//*[@id="content"]/div/div[1]/div[3]/div[3]/div/div/p[1]/text()').extract() item['image_urls'] = sel.xpath( '//*[@id="mainpic"]/a/img/@src').extract() items.append(item) print('success add one pagecontent') if len(items) > 30: raise CloseSpider('enough') yield item
def parse_book(self, response): sel = Selector(response) sel = sel.xpath('//') for sel in sels: item = doubanBookItem() item['title'] = sel.xpath('//*[@id="wrapper"]/h1/span/text()').extract() item['author'] = sel.xpath('//*[@id="info"]/span[1]/a').extract() item['intro_content'] = sel.xpath('//*[@id="link-report"]/div[1]/div/p[1]/text()').extract() item['intro_author'] = sel.xpath('//*[@id="content"]/div/div[1]/div[3]/div[3]/div/div/p[1]').extract() items.append(item) print('success add one pagecontent') if len(items) > 30: raise CloseSpider('enough') return items
def parse_book(self, response): sel = Selector(response) sel = sel.xpath('//') for sel in sels: item = doubanBookItem() item['title'] = sel.xpath( '//*[@id="wrapper"]/h1/span/text()').extract() item['author'] = sel.xpath('//*[@id="info"]/span[1]/a').extract() item['intro_content'] = sel.xpath( '//*[@id="link-report"]/div[1]/div/p[1]/text()').extract() item['intro_author'] = sel.xpath( '//*[@id="content"]/div/div[1]/div[3]/div[3]/div/div/p[1]' ).extract() items.append(item) print('success add one pagecontent') if len(items) > 30: raise CloseSpider('enough') return items