def retry(self, response): request = response.meta['request'] # cookies = response.meta['cookiejar'] retryreq = request.copy() # retryreq.meta['cookiejar'] = cookies logger.debug("Request: %s", retryreq.cookies) retryreq.dont_filter = True retryreq.priority = request.priority + 1 return retryreq
def _retry(self, request, reason, spider): sleep(60) retries = request.meta.get('retry_times', 0) + 1 if retries <= self.max_retry_times: logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s", {'request': request, 'retries': retries, 'reason': reason}, extra={'spider': spider}) retryreq = request.copy() retryreq.meta['retry_times'] = retries retryreq.dont_filter = True retryreq.priority = request.priority + self.priority_adjust return retryreq else: logger.debug("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s", {'request': request, 'retries': retries, 'reason': reason}, extra={'spider': spider})
def _retry(self, request, reason, spider): response_retries = request.meta.get('response_retry', 0) exception_retries = request.meta.get('exception_retry', 0) print("response_retries is %s" % response_retries) print("exception_retries is %s" % exception_retries) retries = response_retries + exception_retries retry_times = self.max_retry_times if 'max_retry_times' in request.meta: retry_times = request.meta['max_retry_times'] stats = spider.crawler.stats if retries <= retry_times: logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s", {'request': request, 'retries': retries, 'reason': reason}, extra={'spider': spider}) retryreq = request.copy() retryreq.meta['retry_times'] = retries retryreq.dont_filter = True retryreq.priority = request.priority + self.priority_adjust if isinstance(reason, Exception): reason = global_object_name(reason.__class__) stats.inc_value('retry/count') stats.inc_value('retry/reason_count/%s' % reason) return retryreq else: stats.inc_value('retry/max_reached') logger.debug("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s", {'request': request, 'retries': retries, 'reason': reason}, extra={'spider': spider}) # 如果主要是由于出现exception,则说明该ip地址很可能失效 if exception_retries > response_retries: # 随意封装一个response,返回给靠近engin的middleware,也就是上面定义的MiddlewareIpagentDownloaderMiddleware response = HtmlResponse(url='retry_over_exception') return response
def update(self, request, spider): logger.debug("Updating cookies %(request)s", {'request': request}, extra={'spider': spider}) return Request("http://xueqiu.com", meta={'dont_merge_cookies': False, 'request': request}, dont_filter=True, callback=self.retry)