Example #1
0
    def __init__(self, fileName=None, parent=None):
        PriorityQueue.__init__(self)

        if parent is not None:
            self.parent = parent
        else:
            self.parent = self

            # Segments curently on disk
            self.onDiskSegments = {}

        # Maintain a collection of the known nzbFiles belonging to the segments in this
        # queue. Set is much faster for _put & __contains__
        self.nzbFiles = set()
        self.postponedNzbFiles = set()
        self.nzbFilesLock = Lock()

        self.nzbs = []
        self.nzbsLock = Lock()

        self.totalQueuedBytes = 0

        self.fillServerPriority = 0

        self.retryQueueEnabled = False
        self.rQueue = RetryQueue()

        if fileName is not None:
            self.parseNZB(fileName)
Example #2
0
    def __init__(self, fileName = None, parent = None):
        PriorityQueue.__init__(self)

        if parent is not None:
            self.parent = parent
        else:
            self.parent = self

            # Segments curently on disk
            self.onDiskSegments = {}

        # Maintain a collection of the known nzbFiles belonging to the segments in this
        # queue. Set is much faster for _put & __contains__
        self.nzbFiles = set()
        self.postponedNzbFiles = set()
        self.nzbFilesLock = Lock()

        self.nzbs = []
        self.nzbsLock = Lock()

        self.totalQueuedBytes = 0

        self.fillServerPriority = 0

        self.retryQueueEnabled = False
        self.rQueue = RetryQueue()

        if fileName is not None:
            self.parseNZB(fileName)
Example #3
0
    def clear(self):
        """ Clear the queue of all its contents"""
        if self.retryQueueEnabled is not None:
            self.rQueue.clear()
        PriorityQueue.clear(self)

        self.nzbs = []
        
        self.parent.onDiskSegments.clear()
Example #4
0
    def clear(self):
        """ Clear the queue of all its contents"""
        if self.retryQueueEnabled is not None:
            self.rQueue.clear()
        PriorityQueue.clear(self)

        self.nzbs = []

        self.parent.onDiskSegments.clear()
Example #5
0
    def _put(self, item):
        """ Add a segment to the queue """
        priority, item = item

        # Support adding NZBFiles to the queue. Just adds all the NZBFile's NZBSegments
        if isinstance(item, NZBFile):
            offset = 0
            for nzbSegment in item.nzbSegments:
                PriorityQueue._put(self, (priority + offset, nzbSegment))
                offset += 1
        else:
            # Assume segment, add to list
            if item.nzbFile not in self.nzbFiles:
                self.nzbFiles.add(item.nzbFile)
            PriorityQueue._put(self, (priority, item))
Example #6
0
    def _put(self, item):
        """ Add a segment to the queue """
        priority, item = item

        # Support adding NZBFiles to the queue. Just adds all the NZBFile's NZBSegments
        if isinstance(item, NZBFile):
            offset = 0
            for nzbSegment in item.nzbSegments:
                PriorityQueue._put(self, (priority + offset, nzbSegment))
                offset += 1
        else:
            # Assume segment, add to list
            if item.nzbFile not in self.nzbFiles:
                self.nzbFiles.add(item.nzbFile)
            PriorityQueue._put(self, (priority, item))
Example #7
0
    def getSmart(self, serverFactory):
        """ Get the next available segment in the queue. The 'smart'ness first checks for segments
        in the RetryQueue, otherwise it falls back to the main queue """
        # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are
        # multiple serverPools)
        if self.retryQueueEnabled:
            try:
                priority, segment = self.rQueue.get(
                    serverFactory.serverPoolName)
                segment.fromQueue = self
                return priority, segment
            except Empty:
                # All retry queues for this serverPool are empty. fall through
                pass

            if not len(self) and len(self.rQueue):
                # Catch the special case where both the main NZBSegmentQueue is empty, all
                # the retry queues for the serverPool are empty, but there is still more
                # left to download in the retry queue (scheduled for retry by other
                # serverPools)
                raise EmptyForThisPool()

        priority, segment = PriorityQueue.get_nowait(self)
        segment.fromQueue = self
        return priority, segment
Example #8
0
    def _recurseCreateQueues(self, currentList, currentIndex, totalCount):
        """ Recurse through, creating the matrix of 'not1not2not3not4not5' etc and all its
        variants. Avoid creating duplicates """
        # Build the original notName
        notName = ''
        for i in currentList:
            notName += 'not' + str(i + 1)

        if len(currentList) >= totalCount - 1:
            # We've reached the end
            return

        for x in range(totalCount):
            if x == currentIndex or x in currentList:
                # We've already not'd x, skip it
                continue

            newList = currentList[:]
            newList.append(x)
            newList.sort()

            if newList in self.allNotNames:
                # this notName == an already generated notName, skip it
                continue

            self.allNotNames.append(newList)

            newNotName = notName + 'not' + str(x + 1)
            self.poolQueues[newNotName] = PriorityQueue()
            self._recurseCreateQueues(newList, x, totalCount)
Example #9
0
    def dtestBenchmark(self):
        """ Benchmark putting garbage into a normal priority queue """

        smallItemCount = 1000
        largeItemCount = 40000

        pq = PriorityQueue()

        info('Small:')
        self.doPut(pq, smallItemCount)
        info('Large:')
        self.doPut(pq, largeItemCount)
Example #10
0
    def getSmart(self, serverFactory):
        """ Get the next available segment in the queue. The 'smart'ness first checks for segments
        in the RetryQueue, otherwise it falls back to the main queue """
        # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are
        # multiple serverPools)
        if self.retryQueueEnabled:
            try:
                priority, segment = self.rQueue.get(serverFactory.serverPoolName)
                segment.fromQueue = self
                return priority, segment
            except Empty:
                # All retry queues for this serverPool are empty. fall through
                pass

            if not len(self) and len(self.rQueue):
                # Catch the special case where both the main NZBSegmentQueue is empty, all
                # the retry queues for the serverPool are empty, but there is still more
                # left to download in the retry queue (scheduled for retry by other
                # serverPools)
                raise EmptyForThisPool()
            
        priority, segment = PriorityQueue.get_nowait(self)
        segment.fromQueue = self
        return priority, segment
Example #11
0
            print nzbq.get()
        except:
            print 'doh'
            break
            
#if __name__ == '__main__':
if __name__ == '__main__2':
        import sys
        #(newsgroups, posts) = ParseNZB(sys.argv[1], [1, 2])
        (newsgroups, posts) = ParseNZB(sys.argv[1])
        for n in newsgroups:
                print 'n: ' + n
        print 'l: ' + str(len(posts))
        total = 0
        from Hellanzb.Util import PriorityQueue
        pq = PriorityQueue()
        NZB_CONTENT_P = 25
        from time import time
        start = time.time()
        for p in posts:
                print 'p: ' + p
                print 'contents: ' + posts[p].__repr__()
                total += posts[p].numparts
                for part in posts[p].parts:
                        pq.put((NZB_CONTENT_P, posts[p].parts[part]))

        elapsed = time.time() - start
        print 'elapsed: ' + str(elapsed)
        print 'total: ' + str(total)

        #while 1:
Example #12
0
            print nzbq.get()
        except:
            print 'doh'
            break

#if __name__ == '__main__':
if __name__ == '__main__2':
    import sys
    #(newsgroups, posts) = ParseNZB(sys.argv[1], [1, 2])
    (newsgroups, posts) = ParseNZB(sys.argv[1])
    for n in newsgroups:
        print 'n: ' + n
    print 'l: ' + str(len(posts))
    total = 0
    from Hellanzb.Util import PriorityQueue
    pq = PriorityQueue()
    NZB_CONTENT_P = 25
    from time import time
    start = time.time()
    for p in posts:
        print 'p: ' + p
        print 'contents: ' + posts[p].__repr__()
        total += posts[p].numparts
        for part in posts[p].parts:
            pq.put((NZB_CONTENT_P, posts[p].parts[part]))

    elapsed = time.time() - start
    print 'elapsed: ' + str(elapsed)
    print 'total: ' + str(total)

    #while 1:
Example #13
0
    def createQueues(self):
        """ Create the retry PriorityQueues for all known serverPools

        This is a hairy way to do this. It's not likely to scale for more than probably
        4-5 serverPools. However it is functionally ideal for a reasonable number of
        serverPools

        The idea is you want your downloaders to always be busy. Without the RetryQueue,
        they would simply always pull the next available segment out of the main
        NZBSegmentQueue. Once the NZBSegmentQueue was empty, all downloaders knew they
        were done

        Now that we desire the ability to requeue a segment that failed on a particular
        serverPool, the downloaders need to exclude the segments they've previously failed
        to download, when pulling segments out of the NZBSegmentQueue

        If we continue keeping all queued (and now requeued) segments in the same queue,
        the potentially many downloaders could easily end up going through the entire
        queue seeking a segment they haven't already tried. This is unacceptable when our
        queues commonly hold over 60K items

        The best way I can currently see to support the downloaders being able to quickly
        lookup the 'actual' next segment they want to download is to have multiple queues,
        indexed by what serverPool(s) have previously failed on those segments

        If we have 3 serverPools (1, 2, and 3) we end up with a dict looking like:

        not1     -> q
        not2     -> q
        not3     -> q
        not1not2 -> q
        not1not3 -> q
        not2not3 -> q

        I didn't quite figure out the exact equation to gather the number of Queues in
        regard to the number of serverPools, but (if my math is right) it seems to grow
        pretty quickly (is quadratic)

        Every serverPool avoids certain queues. In the previous example, serverPool 1 only
        needs to look at all the Queues that are not tagged as having already failed on 1
        (not2, not3, and not2not3) -- only half of the queues

        The numbers:

        serverPools    totalQueues    onlyQueues

        2              2              1
        3              6              3
        4              14             7
        5              30             15
        6              62             31
        7              126            63

        The RetryQueue.get() algorithim simply checks all queues for emptyness until it
        finds one with items in it. The > 5 is worrysome. That means for 6 serverPools,
        the worst case scenario (which could be very common in normal use) would be to
        make 31 array len() calls. With a segment size of 340KB, downloading at 1360KB/s,
        (and multiple connections) we could be doing those 31 len() calls on average of 4
        times a second. And with multiple connections, this could easily spurt to near
        your max connection count, per second (4, 10, even 30 connections?)

        Luckily len() calls are as quick as can be and who the hell uses 6 different
        usenet providers anyway? =]
        """
        # Go through all the serverPools and create the initial 'not1' 'not2'
        # queues
        # FIXME: could probably let the recursive function take care of this
        for i in range(len(self.serverPoolNames)):
            notName = 'not' + str(i + 1)
            self.poolQueues[notName] = PriorityQueue()

            self._recurseCreateQueues([i], i, len(self.serverPoolNames))

        # Finished creating all the pools. Now index every pool's list of valid retry
        # queues they need to check.  (using the above docstring, serverPool 1 would have
        # a list of 'not2', 'not3', and 'not2not3' in its nameIndex
        i = 0
        for name in self.serverPoolNames:
            i += 1

            valids = []
            for notName in self.poolQueues.keys():
                if notName.find('not' + str(i)) > -1:
                    continue
                valids.append(notName)
            self.nameIndex[name] = valids