def parse(self,response): item = biquItem() item['bookname'] = response.xpath('//*[@class="bottem1"]/a[3]/text()').extract()[0] item['chaptername'] = response.xpath('//*[@class="bookname"]/h1/text()').extract()[0] item['content'] = response.xpath('//*[@id="content"]/text()').extract() return item
def parse(self, response): item = biquItem() #item['bookname'] = response.xpath('//h1[@id="chaptertitle"]/text()').extract() item['bookname'] = response.xpath( '//meta[@name="keywords"]/@content').extract()[0].split(',')[0] #print(str(response.body).encode('utf-8').decode('gbk')) item['chaptername'] = response.xpath( '//meta[@name="keywords"]/@content')[0].extract().split(',')[1] item['content'] = response.xpath( '//div[@id="nr1"]/text()').extract()[0] return item
def parse(self, response): item = biquItem() #item['bookname'] = response.xpath('//h1[@id="chaptertitle"]/text()').extract() item['bookname'] = response.xpath( '//meta[@name="keywords"]/@content').extract()[0].split(',')[0] #print(str(response.body).encode('utf-8').decode('gbk')) item['chaptername'] = response.xpath( '//meta[@name="keywords"]/@content')[0].extract().split(',')[1] item['booktype'] = response.xpath( '//meta[@name="description"]/@content').extract()[0].split( '提供了')[1].split('创作的')[1].split('小说')[0] #item['booktype'] = re.findall(r'/w*',item['booktype']) item['content'] = response.xpath( '//div[@id="novelcontent"]/p/text()').extract()[0] print(item['booktype']) return item