def get_pagination_urls(self, response):
     meta = dict()
     url = self.pagination_template % self.page_no
     self.page_no += 1
     meta['page_no'] = self.page_no
     if utils.validate_url(url):
         yield url, meta
Ejemplo n.º 2
0
 def get_pagination_urls(self, response):
     meta = dict()
     rel_url = response.xpath(
         './/li[contains(@class,"pager-next")]//a/@href').extract_first()
     url = response.urljoin(rel_url)
     if utils.validate_url(url):
         yield url, meta
 def get_pagination_urls(self, response):
     meta = dict()
     url = self.pagination_template % self.page_no
     self.page_no += 1
     # todo: every url yielded should be validated & quoted
     if utils.validate_url(url):
         yield url, meta
Ejemplo n.º 4
0
 def get_pagination_urls(self, response):
     meta = dict()
     url = self.root_url + response.xpath(
         './/div[contains(@class,"pagenav")]//li[contains(@class,"news-next")]/a/@href'
     ).extract_first().strip()
     if utils.validate_url(url):
         yield url, meta
 def get_pagination_urls(self, response):
     meta = dict()
     rel_url = response.xpath(
         '//li[contains(@class, "news-next")]/a/@href').strip()
     url = response.urljoin(rel_url)
     import ipdb
     ipdb.set_trace()
     if utils.validate_url(url):
         yield url, meta
 def get_pagination_urls(self, response):
     meta = dict()
     url = ''
     if utils.validate_url(url):
         yield url, meta