def __init__(self, fileName=None, parent=None): PriorityQueue.__init__(self) if parent is not None: self.parent = parent else: self.parent = self # Segments curently on disk self.onDiskSegments = {} # Maintain a collection of the known nzbFiles belonging to the segments in this # queue. Set is much faster for _put & __contains__ self.nzbFiles = set() self.postponedNzbFiles = set() self.nzbFilesLock = Lock() self.nzbs = [] self.nzbsLock = Lock() self.totalQueuedBytes = 0 self.fillServerPriority = 0 self.retryQueueEnabled = False self.rQueue = RetryQueue() if fileName is not None: self.parseNZB(fileName)
def __init__(self, fileName = None, parent = None): PriorityQueue.__init__(self) if parent is not None: self.parent = parent else: self.parent = self # Segments curently on disk self.onDiskSegments = {} # Maintain a collection of the known nzbFiles belonging to the segments in this # queue. Set is much faster for _put & __contains__ self.nzbFiles = set() self.postponedNzbFiles = set() self.nzbFilesLock = Lock() self.nzbs = [] self.nzbsLock = Lock() self.totalQueuedBytes = 0 self.fillServerPriority = 0 self.retryQueueEnabled = False self.rQueue = RetryQueue() if fileName is not None: self.parseNZB(fileName)
def clear(self): """ Clear the queue of all its contents""" if self.retryQueueEnabled is not None: self.rQueue.clear() PriorityQueue.clear(self) self.nzbs = [] self.parent.onDiskSegments.clear()
def _put(self, item): """ Add a segment to the queue """ priority, item = item # Support adding NZBFiles to the queue. Just adds all the NZBFile's NZBSegments if isinstance(item, NZBFile): offset = 0 for nzbSegment in item.nzbSegments: PriorityQueue._put(self, (priority + offset, nzbSegment)) offset += 1 else: # Assume segment, add to list if item.nzbFile not in self.nzbFiles: self.nzbFiles.add(item.nzbFile) PriorityQueue._put(self, (priority, item))
def getSmart(self, serverFactory): """ Get the next available segment in the queue. The 'smart'ness first checks for segments in the RetryQueue, otherwise it falls back to the main queue """ # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are # multiple serverPools) if self.retryQueueEnabled: try: priority, segment = self.rQueue.get( serverFactory.serverPoolName) segment.fromQueue = self return priority, segment except Empty: # All retry queues for this serverPool are empty. fall through pass if not len(self) and len(self.rQueue): # Catch the special case where both the main NZBSegmentQueue is empty, all # the retry queues for the serverPool are empty, but there is still more # left to download in the retry queue (scheduled for retry by other # serverPools) raise EmptyForThisPool() priority, segment = PriorityQueue.get_nowait(self) segment.fromQueue = self return priority, segment
def _recurseCreateQueues(self, currentList, currentIndex, totalCount): """ Recurse through, creating the matrix of 'not1not2not3not4not5' etc and all its variants. Avoid creating duplicates """ # Build the original notName notName = '' for i in currentList: notName += 'not' + str(i + 1) if len(currentList) >= totalCount - 1: # We've reached the end return for x in range(totalCount): if x == currentIndex or x in currentList: # We've already not'd x, skip it continue newList = currentList[:] newList.append(x) newList.sort() if newList in self.allNotNames: # this notName == an already generated notName, skip it continue self.allNotNames.append(newList) newNotName = notName + 'not' + str(x + 1) self.poolQueues[newNotName] = PriorityQueue() self._recurseCreateQueues(newList, x, totalCount)
def dtestBenchmark(self): """ Benchmark putting garbage into a normal priority queue """ smallItemCount = 1000 largeItemCount = 40000 pq = PriorityQueue() info('Small:') self.doPut(pq, smallItemCount) info('Large:') self.doPut(pq, largeItemCount)
def getSmart(self, serverFactory): """ Get the next available segment in the queue. The 'smart'ness first checks for segments in the RetryQueue, otherwise it falls back to the main queue """ # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are # multiple serverPools) if self.retryQueueEnabled: try: priority, segment = self.rQueue.get(serverFactory.serverPoolName) segment.fromQueue = self return priority, segment except Empty: # All retry queues for this serverPool are empty. fall through pass if not len(self) and len(self.rQueue): # Catch the special case where both the main NZBSegmentQueue is empty, all # the retry queues for the serverPool are empty, but there is still more # left to download in the retry queue (scheduled for retry by other # serverPools) raise EmptyForThisPool() priority, segment = PriorityQueue.get_nowait(self) segment.fromQueue = self return priority, segment
print nzbq.get() except: print 'doh' break #if __name__ == '__main__': if __name__ == '__main__2': import sys #(newsgroups, posts) = ParseNZB(sys.argv[1], [1, 2]) (newsgroups, posts) = ParseNZB(sys.argv[1]) for n in newsgroups: print 'n: ' + n print 'l: ' + str(len(posts)) total = 0 from Hellanzb.Util import PriorityQueue pq = PriorityQueue() NZB_CONTENT_P = 25 from time import time start = time.time() for p in posts: print 'p: ' + p print 'contents: ' + posts[p].__repr__() total += posts[p].numparts for part in posts[p].parts: pq.put((NZB_CONTENT_P, posts[p].parts[part])) elapsed = time.time() - start print 'elapsed: ' + str(elapsed) print 'total: ' + str(total) #while 1:
def createQueues(self): """ Create the retry PriorityQueues for all known serverPools This is a hairy way to do this. It's not likely to scale for more than probably 4-5 serverPools. However it is functionally ideal for a reasonable number of serverPools The idea is you want your downloaders to always be busy. Without the RetryQueue, they would simply always pull the next available segment out of the main NZBSegmentQueue. Once the NZBSegmentQueue was empty, all downloaders knew they were done Now that we desire the ability to requeue a segment that failed on a particular serverPool, the downloaders need to exclude the segments they've previously failed to download, when pulling segments out of the NZBSegmentQueue If we continue keeping all queued (and now requeued) segments in the same queue, the potentially many downloaders could easily end up going through the entire queue seeking a segment they haven't already tried. This is unacceptable when our queues commonly hold over 60K items The best way I can currently see to support the downloaders being able to quickly lookup the 'actual' next segment they want to download is to have multiple queues, indexed by what serverPool(s) have previously failed on those segments If we have 3 serverPools (1, 2, and 3) we end up with a dict looking like: not1 -> q not2 -> q not3 -> q not1not2 -> q not1not3 -> q not2not3 -> q I didn't quite figure out the exact equation to gather the number of Queues in regard to the number of serverPools, but (if my math is right) it seems to grow pretty quickly (is quadratic) Every serverPool avoids certain queues. In the previous example, serverPool 1 only needs to look at all the Queues that are not tagged as having already failed on 1 (not2, not3, and not2not3) -- only half of the queues The numbers: serverPools totalQueues onlyQueues 2 2 1 3 6 3 4 14 7 5 30 15 6 62 31 7 126 63 The RetryQueue.get() algorithim simply checks all queues for emptyness until it finds one with items in it. The > 5 is worrysome. That means for 6 serverPools, the worst case scenario (which could be very common in normal use) would be to make 31 array len() calls. With a segment size of 340KB, downloading at 1360KB/s, (and multiple connections) we could be doing those 31 len() calls on average of 4 times a second. And with multiple connections, this could easily spurt to near your max connection count, per second (4, 10, even 30 connections?) Luckily len() calls are as quick as can be and who the hell uses 6 different usenet providers anyway? =] """ # Go through all the serverPools and create the initial 'not1' 'not2' # queues # FIXME: could probably let the recursive function take care of this for i in range(len(self.serverPoolNames)): notName = 'not' + str(i + 1) self.poolQueues[notName] = PriorityQueue() self._recurseCreateQueues([i], i, len(self.serverPoolNames)) # Finished creating all the pools. Now index every pool's list of valid retry # queues they need to check. (using the above docstring, serverPool 1 would have # a list of 'not2', 'not3', and 'not2not3' in its nameIndex i = 0 for name in self.serverPoolNames: i += 1 valids = [] for notName in self.poolQueues.keys(): if notName.find('not' + str(i)) > -1: continue valids.append(notName) self.nameIndex[name] = valids