Example #1
0
 def retry(self, response):
     request = response.meta['request']
     # cookies = response.meta['cookiejar']
     retryreq = request.copy()
     # retryreq.meta['cookiejar'] = cookies
     logger.debug("Request: %s", retryreq.cookies)
     retryreq.dont_filter = True
     retryreq.priority = request.priority + 1
     return retryreq
Example #2
0
    def _retry(self, request, reason, spider):
        sleep(60)
        retries = request.meta.get('retry_times', 0) + 1

        if retries <= self.max_retry_times:
            logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s",
                         {'request': request, 'retries': retries, 'reason': reason},
                         extra={'spider': spider})
            retryreq = request.copy()
            retryreq.meta['retry_times'] = retries
            retryreq.dont_filter = True
            retryreq.priority = request.priority + self.priority_adjust
            return retryreq
        else:
            logger.debug("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s",
                         {'request': request, 'retries': retries, 'reason': reason},
                         extra={'spider': spider})
Example #3
0
	def _retry(self, request, reason, spider):
		response_retries = request.meta.get('response_retry', 0)
		exception_retries = request.meta.get('exception_retry', 0)
		print("response_retries is %s" % response_retries)
		print("exception_retries is %s" % exception_retries)
		retries = response_retries + exception_retries
		retry_times = self.max_retry_times
		
		if 'max_retry_times' in request.meta:
			retry_times = request.meta['max_retry_times']
		
		stats = spider.crawler.stats
		if retries <= retry_times:
			logger.debug("Retrying %(request)s (failed %(retries)d times): %(reason)s",
			             {'request': request, 'retries': retries, 'reason': reason},
			             extra={'spider': spider})
			retryreq = request.copy()
			retryreq.meta['retry_times'] = retries
			retryreq.dont_filter = True
			retryreq.priority = request.priority + self.priority_adjust
			
			if isinstance(reason, Exception):
				reason = global_object_name(reason.__class__)
			
			stats.inc_value('retry/count')
			stats.inc_value('retry/reason_count/%s' % reason)
			return retryreq
		else:
			stats.inc_value('retry/max_reached')
			logger.debug("Gave up retrying %(request)s (failed %(retries)d times): %(reason)s",
			             {'request': request, 'retries': retries, 'reason': reason},
			             extra={'spider': spider})
			# 如果主要是由于出现exception,则说明该ip地址很可能失效
			if exception_retries > response_retries:
				# 随意封装一个response,返回给靠近engin的middleware,也就是上面定义的MiddlewareIpagentDownloaderMiddleware
				response = HtmlResponse(url='retry_over_exception')
				return response
Example #4
0
 def update(self, request, spider):
     logger.debug("Updating cookies %(request)s", {'request': request}, extra={'spider': spider})
     return Request("http://xueqiu.com", meta={'dont_merge_cookies': False, 'request': request},
                    dont_filter=True, callback=self.retry)