def _retry(self, request, reason, spider): proxy = request.meta.get('proxy') if proxy: priority = get_priority(proxy) if priority: add_or_update(proxy, priority + PRIORITY_RATE) proxy = get_proxy() if proxy: request.meta['proxy'] = proxy spider.log('RetryMiddleware used proxy: %s, new proxy: %s' % (request.meta['proxy'], proxy), logging.DEBUG) super(ProxyRetryMiddleware, self)._retry(request, reason, spider)
def _response_downloaded(self, response, request, spider): key, slot = self._get_slot(request, spider) latency = request.meta.get('download_latency') * 1000 proxy = request.meta.get('proxy') if latency is None or slot is None or proxy is None: return if self.debug: size = len(response.body) conc = len(slot.transferring) msg = "slot: %s | conc: %2d | latency:%5d ms | size: %6d bytes | proxy:%s" \ % (key, conc, latency, size, proxy) spider.log(msg, level=logging.DEBUG) add_or_update(proxy, latency)
def process_item(self, item, spider): add_or_update(item["proxy"], item["priority"]) return item