def _get_slot(self, request): downloader = self.crawler.engine.downloader key = urlparse_cached(request).hostname or '' if downloader.ip_concurrency: key = dnscache.get(key, key) return key, downloader.slots.get(key) or downloader.inactive_slots.get( key)
def _get_slot_key(self, request, spider): if "download_slot" in request.meta: return request.meta["download_slot"] key = urlparse_cached(request).hostname or "" if self.ip_concurrency: key = dnscache.get(key, key) return key
def _get_slot_key(self, request, spider): if 'download_slot' in request.meta: return request.meta['download_slot'] key = urlparse_cached(request).hostname or '' if self.ip_concurrency: key = dnscache.get(key, key) return key
def _get_slot_key(self, request, spider): if self.DOWNLOAD_SLOT in request.meta: return request.meta[self.DOWNLOAD_SLOT] key = urlparse_cached(request).hostname or '' if self.ip_concurrency: key = dnscache.get(key, key) return key
def _get_slot_key(self, request, spider): if self.DOWNLOAD_SLOT in request.meta: #如果request.meta里有 这个slot信息 就调用那个 return request.meta[self.DOWNLOAD_SLOT] #否则 key是hostname 或者"" key = urlparse_cached(request).hostname or '' if self.ip_concurrency: key = dnscache.get(key, key) #从 DNS缓存总拿到对应host的key return key
def _get_slot(self, request, spider): key = urlparse_cached(request).hostname or '' if self.ip_concurrency: key = dnscache.get(key, key) if key not in self.slots: if self.ip_concurrency: concurrency = self.ip_concurrency else: concurrency = self.domain_concurrency concurrency, delay = _get_concurrency_delay(concurrency, spider, self.settings) self.slots[key] = Slot(concurrency, delay, self.settings) return key, self.slots[key]
def _get_slot(self, request, spider): key = urlparse_cached(request).hostname or '' if self.ip_concurrency: key = dnscache.get(key, key) if key not in self.slots: if self.ip_concurrency: concurrency = self.ip_concurrency else: concurrency = self.domain_concurrency concurrency, delay = _get_concurrency_delay( concurrency, spider, self.settings) self.slots[key] = Slot(concurrency, delay, self.settings) return key, self.slots[key]
def resolveHostName( self, resolutionReceiver, hostName, portNumber=0, addressTypes=None, transportSemantics="TCP", ): cached_addresses = dnscache.get(hostName) if cached_addresses: resolutionReceiver.resolutionBegan(HostResolution(hostName)) for address in cached_addresses: resolutionReceiver.addressResolved(address) resolutionReceiver.resolutionComplete() return resolutionReceiver @provider(IResolutionReceiver) class CachingResolutionReceiver: def __init__(self): self.addresses = [] def resolutionBegan(self, resolution): resolutionReceiver.resolutionBegan(resolution) def addressResolved(self, address): resolutionReceiver.addressResolved(address) self.addresses.append(address) def resolutionComplete(self): resolutionReceiver.resolutionComplete() if self.addresses: dnscache[hostName] = tuple(self.addresses) return self.original_resolver.resolveHostName( CachingResolutionReceiver(), hostName, portNumber, addressTypes, transportSemantics, )
def _get_slot(self, request): downloader = self.crawler.engine.downloader key = urlparse_cached(request).hostname or '' if downloader.ip_concurrency: key = dnscache.get(key, key) return key, downloader.slots.get(key) or downloader.inactive_slots.get(key)
def _get_key(self, request, type): key = urlparse_cached(request).hostname or '' if type == 'ip': key = dnscache.get(key, key) return key