def get_pagination_urls(self, response): meta = dict() url = self.pagination_template % self.page_no self.page_no += 1 meta['page_no'] = self.page_no if utils.validate_url(url): yield url, meta
def get_pagination_urls(self, response): meta = dict() rel_url = response.xpath( './/li[contains(@class,"pager-next")]//a/@href').extract_first() url = response.urljoin(rel_url) if utils.validate_url(url): yield url, meta
def get_pagination_urls(self, response): meta = dict() url = self.pagination_template % self.page_no self.page_no += 1 # todo: every url yielded should be validated & quoted if utils.validate_url(url): yield url, meta
def get_pagination_urls(self, response): meta = dict() url = self.root_url + response.xpath( './/div[contains(@class,"pagenav")]//li[contains(@class,"news-next")]/a/@href' ).extract_first().strip() if utils.validate_url(url): yield url, meta
def get_pagination_urls(self, response): meta = dict() rel_url = response.xpath( '//li[contains(@class, "news-next")]/a/@href').strip() url = response.urljoin(rel_url) import ipdb ipdb.set_trace() if utils.validate_url(url): yield url, meta
def get_pagination_urls(self, response): meta = dict() url = '' if utils.validate_url(url): yield url, meta