Example #1
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        items = []
        title= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/text()').extract()
        url= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/@href').extract()
        for i in range(0, 10):
            item = bbsItem()
            item['link']= urljoin_rfc('http://bbs.nju.edu.cn/', url[i])+'&start=-1'


            items.append(item)

        for item in items:
            yield Request(item['link'],meta={'item':item,'items':items},dont_filter=True,callback=self.parse2)
Example #2
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        items = []
        title= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/text()').extract()
        url= hxs.select('/html/body/center/table/tr[position()>1]/td[3]/a/@href').extract()
        for i in range(0, 10):
            item = bbsItem()
            item['link'] = urljoin_rfc('http://bbs.nju.edu.cn/', url[i])

            item['title'] =  title[i][:-1]

            items.append(item)

        #return items
        for item in items:
            request = Request(item['link'],meta={'item':item},dont_filter=True,callback=self.parse2)
            if request:
                yield request
            else:
                yield item