def parse_page(self, response): item = parse_response(response) if item: try: item['url'] = response.meta['origin_url'] except: pass return item else: parse_error(response)
def parse(self, response): return parse_response(response)
def parse_page(self, response): self.max_count += 1 #crawl max_count page per date if self.max_count > 5000: raise CloseSpider('url_exceeded') return parse_response(response)