def parse(self, response): hxs = HtmlXPathSelector(response) items = [] title= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/text()').extract() url= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/@href').extract() for i in range(0, 10): item = bbsItem() item['link']= urljoin_rfc('http://bbs.nju.edu.cn/', url[i])+'&start=-1' items.append(item) for item in items: yield Request(item['link'],meta={'item':item,'items':items},dont_filter=True,callback=self.parse2)
def parse(self, response): hxs = HtmlXPathSelector(response) items = [] title= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/text()').extract() url= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/@href').extract() for i in range(0, 10): item = bbsItem() item['link'] = urljoin_rfc('http://bbs.nju.edu.cn/', url[i]) item['title'] = title[i][:-1] items.append(item) #return items for item in items: request = Request(item['link'],meta={'item':item},dont_filter=True,callback=self.parse2) if request: yield request else: yield item