Beispiel #1
0
 def parse(self, response):
     baseList=response.xpath('//article/a/text()')
     item=DaomuItem()
     i=0
     for base in baseList:
         info=base.extract().split( )
         item['title']=info[0]
         item['chapter']=info[1]
         item['chapterName']=info[2]
         item['link']=response.xpath('//article/a/@href').extract()[i]
         i+=1
         yield item
Beispiel #2
0
    def parse(self, response):
        item = DaomuItem()
        item["bookName"] = response.xpath(
            '//h1[@class="focusbox-title"]/text()').extract()[0]
        articles = response.xpath('//article[@class="excerpt excerpt-c3"]')

        for article in articles:
            info = article.xpath('./a/text()').extract()[0].split(' ')
            item['bookTitle'] = info[0]
            item['zhName'] = info[2]
            item['zhNum'] = info[1]
            item['zhLink'] = article.xpath('./a/@href').extract()[0]

            yield item
Beispiel #3
0
 def parse(self, response):
     # 创建item对象(items.py里面的class)
     item = DaomuItem()
     item["BookName"] = response.xpath(
         '//h1[@class="focusbox-title"]/text()').extract()[0]
     # 匹配所有章节
     articles = response.xpath('//article[@class="excerpt excerpt-c3"]')
     for article in articles:
         info = article.xpath('./a/text()').extract()[0].split(' ')
         # ['七星鲁王','第一章','血尸']
         item["BookTitle"] = info[0]
         item["zhNum"] = info[1]
         item["zhName"] = info[2]
         item["zhLink"] = article.xpath('./a/@href').extract()[0]
         yield item
Beispiel #4
0
    def parse(self, response):
        # 创建item对象(items.py里的class)
        item = DaomuItem()
        # 匹配书名,单独匹配
        book = response.xpath(
            '//h1[@class="focusbox-title"]/text()').extract()[0]
        # 匹配所有章节对象(基准xpath)
        articles = response.xpath('//article[@class="excerpt excerpt-c3"]')
        for article in articles:
            info = article.xpath('./a/text()').extract()[0].split(' ')
            # ['七星鲁王', '第一章', '血尸']
            item["book"] = book
            item["title"] = info[0]
            item["chapter"] = info[2]
            item["chapter_num"] = info[1]
            item["chapter_url"] = article.xpath('./a/@href').extract()[0]

            yield item
Beispiel #5
0
    def parse_two_link(self,response):
        #基准xpath,匹配所有节点对象的列表
        article_list = response.xpath('/html/body/section/div[2]/div/article')
        for article in article_list:
            # 创建item对象
            item = DaomuItem()

            info = article.xpath('./a/text()')\
                        .extract()[0].split()
            # info:['七星鲁王','第一章','血尸']
            item['va_name'] = info[0]
            item['ch_number'] = info[1]
            item['ch_name'] = info[2]
            item['ch_link'] = article.xpath('./a/@href')\
                            .extract()[0]
            # 将章节链接交给调度器
            # 必须把item传递到下一个函数,利用meta参数
            yield scrapy.Request(item['ch_link'],
                                 meta={'item':item},
                                 callback= self.parse_three_link)
Beispiel #6
0
 def parse_two_link(self, response):
     # 基准xpath,匹配所有节点对象的列表
     article_list = response.xpath('//article[@class="excerpt excerpt-c3"]')
     for article in article_list:
         # 创建item对象
         item = DaomuItem()
         info = article.xpath('./a/text()')\
                                 .extract()[0].split()
         print(info)
         # info:['七星鲁王','第一章,'血尸']
         item['va_name'] = info[0]
         item['ch_number'] = info[1]
         item['ch_name'] = info[2] if len(info) > 2 else ""
         item['ch_link'] = article.xpath('./a/@href')\
                                     .extract()[0]
         # 将章节链接交给调度器
         # 必须把item传递到下一个函数,利用meta参数
         yield scrapy.Request(item['ch_link'],
                              meta={'item': item},
                              callback=self.parse_three_link)
         time.sleep(2)