class ProxySelectorMiddleware(object): # IOError is raised by the HttpCompression middleware when trying to # decompress an empty response EXCEPTIONS_TO_RETRY = ( ServerTimeoutError, UserTimeoutError, DNSLookupError, ConnectionRefusedError, ConnectionDone, ConnectError, ConnectionLost, TCPTimedOutError, IOError) def __init__(self, settings): if not settings.getbool('RETRY_ENABLED'): raise NotConfigured self.max_retry_times = settings.getint('RETRY_TIMES') self.retry_http_codes = set( int(x) for x in settings.getlist('RETRY_HTTP_CODES')) self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST') self.proxy_ev = ProxyEvaluator() self.proxy_chance = settings.getint('PROXY_CHANCE') self.min_level = settings.getint('MIN_LEVEL_FOR_PROXY') def process_request(self, request, spider): request.meta['dont_redirect'] = True if not self.proxy_ev.is_disabled() and self.use_proxy(request): p = self.proxy_ev.valid_proxy() log.msg("Using proxy = %s on %s" % (p, request.url)) try: request.meta['proxy'] = "http://%s" % p except Exception, e: log.msg("Exception %s" % e, _level=log.CRITICAL) elif 'proxy' in request.meta: # Just in case we have received a request # with that flag set. if 'proxy' in request.meta: del(request.meta['proxy'])
def __init__(self, settings): if not settings.getbool('RETRY_ENABLED'): raise NotConfigured self.max_retry_times = settings.getint('RETRY_TIMES') self.retry_http_codes = set( int(x) for x in settings.getlist('RETRY_HTTP_CODES')) self.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST') self.proxy_ev = ProxyEvaluator() self.proxy_chance = settings.getint('PROXY_CHANCE') self.min_level = settings.getint('MIN_LEVEL_FOR_PROXY')