def parse(self,response):
        item = biquItem()
        item['bookname'] = response.xpath('//*[@class="bottem1"]/a[3]/text()').extract()[0]

        item['chaptername'] = response.xpath('//*[@class="bookname"]/h1/text()').extract()[0]

        item['content'] = response.xpath('//*[@id="content"]/text()').extract()
        return item
Beispiel #2
0
    def parse(self, response):
        item = biquItem()
        #item['bookname'] = response.xpath('//h1[@id="chaptertitle"]/text()').extract()
        item['bookname'] = response.xpath(
            '//meta[@name="keywords"]/@content').extract()[0].split(',')[0]
        #print(str(response.body).encode('utf-8').decode('gbk'))
        item['chaptername'] = response.xpath(
            '//meta[@name="keywords"]/@content')[0].extract().split(',')[1]

        item['content'] = response.xpath(
            '//div[@id="nr1"]/text()').extract()[0]

        return item
Beispiel #3
0
 def parse(self, response):
     item = biquItem()
     #item['bookname'] = response.xpath('//h1[@id="chaptertitle"]/text()').extract()
     item['bookname'] = response.xpath(
         '//meta[@name="keywords"]/@content').extract()[0].split(',')[0]
     #print(str(response.body).encode('utf-8').decode('gbk'))
     item['chaptername'] = response.xpath(
         '//meta[@name="keywords"]/@content')[0].extract().split(',')[1]
     item['booktype'] = response.xpath(
         '//meta[@name="description"]/@content').extract()[0].split(
             '提供了')[1].split('创作的')[1].split('小说')[0]
     #item['booktype'] = re.findall(r'/w*',item['booktype'])
     item['content'] = response.xpath(
         '//div[@id="novelcontent"]/p/text()').extract()[0]
     print(item['booktype'])
     return item