def resetQueue(self, host=None): """ Resets a queue of requests. @type host: c{str} @param host: An option host queue to reset. If no host is provided the entire queue is reset. """ def dq(): return DeferredQueue(self.maxQueued, self.maxBacklog) if not host: self.queue = WWWInsensitiveDefaultDict(dq) else: self.queue[host] = dq()
class HTTPClientChannelManager(EmptyHTTPClientManager): """ TODO """ queue = None openChannels = set() pendingChannels = set() persistQueueThreshold = 5 clientIdleTimeout = 25 agent = None maxRedirects = 5 stopping = False clientChannel = HTTPClientProtocol clientChannelRequest = HTTPClientChannelRequest def __init__(self, maxQueued=None, maxBacklog=None, maxConcurrent=15): """docstring for __init__""" self.maxQueued = maxQueued self.maxBacklog = maxBacklog self.maxConcurrent = maxConcurrent self.resetQueue() def runCount(self): """ Gets the number of 'running' requests which pending requests are considered to be as well. """ return len(self.openChannels.union(self.pendingChannels)) def queueCount(self, host=None): """ Get the number of queued requests for a host. @type host: C{str} @param host: An HTTP Host, i.e. I{example.com} @return: Number of queued requests as an C{int} """ if host: return len(self.queue[host].pending) else: return reduce(lambda cum,q: cum+len(q.pending), self.queue.values(), 0) def shouldQueue(self, request): """ Determines if a request should be queued or made immediately. A Request should be queued for the following reasons: 1. If PERSISTENT and no channel accepting requests 2. If C{maxConcurrent} reached C{persistQueueThreshold} @type request: C{str} @param request: L{ClientRequest} """ allChannels = self.openChannels.union(self.pendingChannels) if self.runCount() >= self.maxConcurrent: return True # Connection will persist if not request.closeAfter: pCount = 0 for chan in allChannels: if chan.host == request.uri.getHost() and chan.readPersistent: pCount += 1 # Max concurrent persistent connections for host met if pCount >= 2: # Set to non-persist if threshold met if (self.persistQueueThreshold != 0 and self.queueCount(request.uri.getHost()) >= self.persistQueueThreshold): request.closeAfter = True return False else: return True return False def resetQueue(self, host=None): """ Resets a queue of requests. @type host: c{str} @param host: An option host queue to reset. If no host is provided the entire queue is reset. """ def dq(): return DeferredQueue(self.maxQueued, self.maxBacklog) if not host: self.queue = WWWInsensitiveDefaultDict(dq) else: self.queue[host] = dq() def rotateQueue(self, host=None): """ Queued requests can be 'forgotten' if requests for a host do not finish properly. When this happens the queues need to be rotated such that requests are forced into 'open' mode. @type host: c{str} @param host: An optional host queue to rotate. If no host is provided all queues are rotated. """ def rotateThese(hosts): maxRunnable = self.maxConcurrent - self.runCount() if maxRunnable <= 0: return False rotated = False for host in hosts: host = host.replace('www.','').strip() for x in xrange(0, len(self.queue[host].pending)): if maxRunnable > 0: d = self.queue[host].get() d.addCallback(self.createClientChannel) d.addErrback(self.__handleConnErrback) maxRunnable -= 1 rotated = True return rotated if host: return rotateThese([host]) else: return rotateThese(self.queue.keys()) def submitRequest(self, request, _deferFromRedirect=None, now=False): """ Submits a request which may be run immediately or queued. @param request: The request @type request: L{ClientRequest} @param _deferFromRedirect: If this is supplied it means that a redirect occurred and that deferred should be used for the subsequent re-request. @type _deferFromRedirect: C{Deferred} @param now: Force the request to run NOW @type now: C{bool} """ d = None if self.maxRedirects == 0: d = request.protocol.deferred elif not _deferFromRedirect: d = Deferred() request.protocol.deferred.addCallback(self.__handlePossibleRedirect, d) request.protocol.deferred.addErrback(self.__handleErrback, d) else: request.protocol.deferred.addCallback(self.__handlePossibleRedirect, _deferFromRedirect) request.protocol.deferred.addErrback(self.__handleErrback, _deferFromRedirect) if not now and self.shouldQueue(request): #self.agent.logger.debug('Queued %s' % request.uri) self.queue[request.uri.getHost()].put(request) else: #self.agent.logger.debug('Submitted %s' % request.uri) self.createClientChannel(request) return d def createClientChannel(self, request): """ Creates the actual channel for the request. @param request: The request @type request: L{ClientRequest} """ c = ClientCreator(reactor, self.clientChannel, self) if request.uri.scheme == 'http': d = c.connectTCP(host=request.uri.netloc, port=request.uri.port) else: d = c.connectSSL(host=request.uri.netloc, port=request.uri.port, contextFactory=ssl.ClientContextFactory()) pending = PendingChannel() pending.host = request.uri.getHost() if request.closeAfter: pending.readPersistent = False self.pendingChannels.add(pending) d.addCallback(self.__request, request, pending) d.addErrback(self.__handleConnErrback, request, pending) def __handleConnErrback(self, e, request, pending=None): """ Handles any timeout or unexpected error during a request. """ if pending: self.pendingChannels.discard(pending) return self.agent.handleError(e, request, retry=True) def __handleErrback(self, e, d): """ Handles all other errbacks. """ if d: d.errback(e) else: return self.agent.handleError(e) def __handlePossibleRedirect(self, response, d): """ The wonderful handling of possible redirects is done here, for lack of a place that makes more sense to me. """ request = response.request if response.code in (MOVED_PERMANENTLY,FOUND): # Only auto-handle redirect for GET and POST, per RFC 2616 if (request.method in ('GET','POST') and response.headers.hasHeader('Location')): if request.numRedirects < self.maxRedirects: loc = response.headers.getRawHeaders('Location')[0] request.redirect(loc) request.protocol.deferred = Deferred() self.submitRequest(request, d) else: d.errback(RedirectLimitExceededError(request.redirects[-1])) else: d.callback(response) else: d.callback(response) def __request(self, channel, request, pending=None): if pending: channel.host = request.uri.netloc self.openChannels.add(channel) self.pendingChannels.discard(pending) del pending channel.submitRequest(request, request.closeAfter) def __requestFromQueue(self, request, channel): self.__request(channel, request) def clientBusy(self, channel): self.agent.connManagerBusy() def clientIdle(self, channel): channel.setTimeout(self.clientIdleTimeout) # something messed below here... #d = self.queue[channel.host].get() #d.addCallback(self.__requestFromQueue, channel) #d.addErrback(self.__handleErrback, None) def clientPipelining(self, channel): # Not implemented due to bug in twisted.web2 (I think!) pass def clientGone(self, channel): self.openChannels.discard(channel) # There may not be another channel waiting for requests from this # host, so rotate the queue to possibily initialize a new one if not self.stopping: self.rotateQueue(channel.host) if self.runCount() <= 0: # rotate all queues to check for any dangling requests self.agent.connManagerIdle() del channel def loseClient(self, channel, reason): channel.connectionLost(reason) def loseEverything(self, reason): if not self.stopping: self.stopping = True self.resetQueue() # Here we copy the openChannels set in case one returns while we # are iterating and changes the set size. openChannels = self.openChannels.copy() for chan in openChannels: chan.connectionLost(reason) self.pendingChannels = set() self.openChannels = set()