def parse(self, response): # self.store_html(response) for item in self.parse_tables(response): yield item links = Selector(response).xpath("//a/@href").extract() for l in links: normLink = Utility.normalize(response.url, l) yield Request(normLink, callback=self.follow_links)
def follow_links(self, response): if next(self.table_counter) > self.table_limit: raise CloseSpider(reason="Enough tables") # self.store_html(response) for item in self.parse_tables(response): yield item links = Selector(response).xpath("//a/@href").extract() for l in links: normLink = Utility.normalize(response.url, l) yield Request(normLink, callback=self.follow_links)