def start_requests(self): self.run_time = time() # print( self.run_time ) for url in self.start_urls: rules = Rules.default() rules.update(**self.settings.get("DEFAULT_RULES", {})) rules["follow"] = True rules["priority"] = 600 yield Request( url, callback=self.parse, priority=600, meta=dict(rules=rules, context=dict(run_time=self.run_time, offset=0)), )
def get_pages(self, response, **kwargs): rules = Rules.default() self.run_time = time() rules["follow"] = True rules["priority"] = 600 for i in range(int(floor(self.MAXPAGES))): urli = response.url + "?s=%d" % ((i + 1) * 100) yield Request( urli, callback=self.parse, priority=600, meta=dict(rules=rules, context=dict(run_time=self.run_time, offset=(i + 1) * 100)), ) # splash = dict( endpoint = "render.html" , args = dict ( html = 1 ), dont_process_response=True ) ) ) for request in self.listinglinks.process_response( response.css("html"), self, response, context=response.request.meta.get("context", {}), ignoreWhen=True ): yield request