Example #1
0
 def parse_news(self,response):
     ld = NewsLoader(NewsItem(),response)
     ld.add_value('url',response.url)
     ld.add_css('title','.page-header h1::text')
     ld.add_value('channel','sina')
     datetime = response.css('.time-source').xpath('text()').extract()
     ld.add_value('datetime',datetime,MapCompose(unicode.strip))
     comment_id = ld.get_xpath('//meta[@name="comment"]/@content',TakeFirst())
     if comment_id:
         cc = comment_id.split(':')
         if len(cc) == 2:
             channel,comment_id = cc
     if comment_id and channel:
         yield ld.load_item()
         page = 1
         page_size = 20
         cmurl = '&channel=%s&newsid=%s&page=%s&page_size=%s' % (channel,comment_id,page,page_size)
         yield scrapy.Request(self.cmturl + cmurl,self.parse_comment)