def parse_news(self,response): ld = NewsLoader(NewsItem(),response) ld.add_value('url',response.url) ld.add_css('title','.page-header h1::text') ld.add_value('channel','sina') datetime = response.css('.time-source').xpath('text()').extract() ld.add_value('datetime',datetime,MapCompose(unicode.strip)) comment_id = ld.get_xpath('//meta[@name="comment"]/@content',TakeFirst()) if comment_id: cc = comment_id.split(':') if len(cc) == 2: channel,comment_id = cc if comment_id and channel: yield ld.load_item() page = 1 page_size = 20 cmurl = '&channel=%s&newsid=%s&page=%s&page_size=%s' % (channel,comment_id,page,page_size) yield scrapy.Request(self.cmturl + cmurl,self.parse_comment)