def logSkippedPars(nzb): """ Print a message describing the summary of all skipped par files """ # Tally the total mb skipped skippedParMB = 0 actualSkippedParMB = 0 parFilenames = [] skippedParsDict = {} skippedPars = nzb.skippedParFiles[:] skippedPars.sort(lambda x, y: cmp(x.filename, y.filename)) for nzbFile in skippedPars: parFilenames.append(nzbFile.filename) skippedParsDict[nzbFile.filename] = nzbFile skippedParMB += nzbFile.totalBytes for nzbSegment in nzbFile.dequeuedSegments: actualSkippedParMB += nzbSegment.bytes # Identify the par groups parGroups, parGroupOrder = findPar2Groups(parFilenames) if actualSkippedParMB > 0: info('Skipped pars: %i files, %s (actual skipped: %s)' % \ (len(nzb.skippedParFiles), prettySize(skippedParMB), prettySize(actualSkippedParMB))) # Further summarize the par group statistics for key in parGroupOrder: parFilenames = parGroups[key] skippedGroupMB = 0 skippedGroupBlocks = 0 for parFilename in parFilenames: skippedGroupBlocks += getParSize(parFilename) nzbFile = skippedParsDict[parFilename] skippedGroupMB += nzbFile.totalBytes # Don't duplicate the extra stats when there's only one par group groupStats = '' if len(parGroups) > 1: groupStats = '%i files, %s, ' % (len(parFilenames), prettySize(skippedGroupMB)) if len(parFilenames) == 1: label = parFilenames[0] elif len(parFilenames) == 2: label = '%s,\n %s' % (parFilenames[0], parFilenames[-1]) elif len(parFilenames) > 2: label = '%s ->\n %s' % (parFilenames[0], parFilenames[-1]) info(' %s (%s%s %s)' % (label, groupStats, skippedGroupBlocks, getParRecoveryName(nzbFile.parType)))
def endDownload(): """ Finished downloading """ sessionStartTime = None sessionReadBytes = 0 for nsf in Hellanzb.nsfs: sessionReadBytes += nsf.sessionReadBytes if nsf.fillServerPriority == 0: sessionStartTime = nsf.sessionStartTime nsf.endDownload() Hellanzb.downloading = False Hellanzb.totalSpeed = 0 Hellanzb.scroller.currentLog = None scrollEnd() Hellanzb.downloadScannerID.cancel() Hellanzb.totalArchivesDownloaded += 1 writeStateXML() if not len(Hellanzb.queue.currentNZBs()): # END return currentNZB = Hellanzb.queue.currentNZBs()[0] downloadTime = time.time() - currentNZB.downloadStartTime speed = sessionReadBytes / 1024.0 / downloadTime info( "Transferred %s in %s at %.1fKB/s (%s)" % (prettySize(sessionReadBytes), prettyElapsed(downloadTime), speed, currentNZB.archiveName) ) if not currentNZB.isParRecovery: currentNZB.downloadTime = downloadTime else: currentNZB.downloadTime += downloadTime
def endDownload(): """ Finished downloading """ Hellanzb.ht.rate = 0 sessionStartTime = None sessionReadBytes = 0 for nsf in Hellanzb.nsfs: sessionReadBytes += nsf.sessionReadBytes if nsf.fillServerPriority == 0: sessionStartTime = nsf.sessionStartTime nsf.endDownload() Hellanzb.downloading = False Hellanzb.totalSpeed = 0 Hellanzb.scroller.currentLog = None scrollEnd() Hellanzb.downloadScannerID.cancel() Hellanzb.totalArchivesDownloaded += 1 writeStateXML() if not len(Hellanzb.queue.currentNZBs()): # END return currentNZB = Hellanzb.queue.currentNZBs()[0] downloadTime = time.time() - currentNZB.downloadStartTime speed = sessionReadBytes / 1024.0 / downloadTime info('Transferred %s in %s at %.1fKB/s (%s)' % \ (prettySize(sessionReadBytes), prettyElapsed(downloadTime), speed, currentNZB.archiveName)) if not currentNZB.isParRecovery: currentNZB.downloadTime = downloadTime else: currentNZB.downloadTime += downloadTime
class NZBSegmentQueue(PriorityQueue): """ priority fifo queue of segments to download. lower numbered segments are downloaded before higher ones """ NZB_CONTENT_P = 100000 # normal nzb downloads # FIXME: EXTRA_PAR2_P isn't actually used EXTRA_PAR2_P = 0 # par2 after-the-fact downloads are more important def __init__(self, fileName=None, parent=None): PriorityQueue.__init__(self) if parent is not None: self.parent = parent else: self.parent = self # Segments curently on disk self.onDiskSegments = {} # Maintain a collection of the known nzbFiles belonging to the segments in this # queue. Set is much faster for _put & __contains__ self.nzbFiles = set() self.postponedNzbFiles = set() self.nzbFilesLock = Lock() self.nzbs = [] self.nzbsLock = Lock() self.totalQueuedBytes = 0 self.fillServerPriority = 0 self.retryQueueEnabled = False self.rQueue = RetryQueue() if fileName is not None: self.parseNZB(fileName) def cancel(self): self.postpone(cancel=True) def clear(self): """ Clear the queue of all its contents""" if self.retryQueueEnabled is not None: self.rQueue.clear() PriorityQueue.clear(self) self.nzbs = [] self.parent.onDiskSegments.clear() def postpone(self, cancel=False): """ Postpone the current download """ self.clear() self.nzbsLock.acquire() self.nzbFilesLock.acquire() if not cancel: self.postponedNzbFiles.update(self.nzbFiles) self.nzbFiles.clear() self.nzbFilesLock.release() self.nzbsLock.release() self.totalQueuedBytes = 0 def unpostpone(self, nzb): """ Recall a postponed NZB """ self.nzbFilesLock.acquire() arName = archiveName(nzb.nzbFileName) found = [] for nzbFile in self.postponedNzbFiles: # FIXME: # Why is this not nzbFile.nzb == nzb? if nzbFile.nzb.archiveName == arName: found.append(nzbFile) for nzbFile in found: self.postponedNzbFiles.remove(nzbFile) self.nzbFilesLock.release() def _put(self, item): """ Add a segment to the queue """ priority, item = item # Support adding NZBFiles to the queue. Just adds all the NZBFile's NZBSegments if isinstance(item, NZBFile): offset = 0 for nzbSegment in item.nzbSegments: PriorityQueue._put(self, (priority + offset, nzbSegment)) offset += 1 else: # Assume segment, add to list if item.nzbFile not in self.nzbFiles: self.nzbFiles.add(item.nzbFile) PriorityQueue._put(self, (priority, item)) def calculateTotalQueuedBytes(self): """ Calculate how many bytes are queued to be downloaded in this queue """ # NOTE: we don't maintain this calculation all the time, too much CPU work for # _put self.totalQueuedBytes = 0 self.nzbFilesLock.acquire() files = self.nzbFiles.copy() self.nzbFilesLock.release() # Total all the nzbFiles, then subtract their segments that don't need to be # downloaded for nzbFile in files: self.totalQueuedBytes += nzbFile.totalBytes if len(nzbFile.todoNzbSegments) != len(nzbFile.nzbSegments): for nzbSegment in nzbFile.nzbSegments: if nzbSegment not in nzbFile.todoNzbSegments: self.totalQueuedBytes -= nzbSegment.bytes def dequeueSegments(self, nzbSegments): """ Explicitly dequeue the specified nzb segments """ # ATOMIC: dequeued = self.dequeueItems([(nzbSegment.priority, nzbSegment) for nzbSegment in \ nzbSegments]) dequeuedSegments = [segment for priority, segment in dequeued] if self.retryQueueEnabled: dequeuedSegments.extend(self.rQueue.dequeueSegments(nzbSegments)) for nzbSegment in dequeuedSegments: self.segmentDone(nzbSegment, dequeue=True) return dequeuedSegments def addQueuedBytes(self, bytes): """ Add to the totalQueuedBytes count """ self.totalQueuedBytes += bytes def currentNZBs(self): """ Return a copy of the list of nzbs currently being downloaded """ self.nzbsLock.acquire() nzbs = self.nzbs[:] self.nzbsLock.release() return nzbs def nzbAdd(self, nzb): """ Denote this nzb as currently being downloaded """ self.nzbsLock.acquire() self.nzbs.append(nzb) self.nzbsLock.release() def nzbDone(self, nzb): """ NZB finished """ self.nzbsLock.acquire() try: self.nzbs.remove(nzb) except ValueError: # NZB might have been canceled pass self.nzbsLock.release() def isNZBDone(self, nzb, postponed=None): """ Determine whether or not all of the specified NZB as been thoroughly downloaded """ if postponed is None: if nzb not in Hellanzb.queue.currentNZBs(): postponed = True else: postponed = False self.nzbFilesLock.acquire() if not postponed: queueFilesCopy = self.nzbFiles.copy() else: queueFilesCopy = self.postponedNzbFiles.copy() self.nzbFilesLock.release() for nzbFile in queueFilesCopy: if nzbFile not in nzb.nzbFiles: continue debug('isNZBDone: NOT DONE: ' + nzbFile.getDestination()) return False return True def serverAdd(self, serverFactory): """ Add the specified server pool, for use by the RetryQueue """ self.rQueue.addServerPool(serverFactory.serverPoolName) def initRetryQueue(self): """ Initialize and enable use of the RetryQueue """ self.retryQueueEnabled = self.rQueue.needRetryQueue() if self.retryQueueEnabled: self.rQueue.createQueues() def serverRemove(self, serverFactory): """ Remove the specified server pool """ self.rQueue.removeServerPool(serverFactory.serverPoolName) def getSmart(self, serverFactory): """ Get the next available segment in the queue. The 'smart'ness first checks for segments in the RetryQueue, otherwise it falls back to the main queue """ # Don't bother w/ retryQueue nonsense unless it's enabled (meaning there are # multiple serverPools) if self.retryQueueEnabled: try: priority, segment = self.rQueue.get( serverFactory.serverPoolName) segment.fromQueue = self return priority, segment except Empty: # All retry queues for this serverPool are empty. fall through pass if not len(self) and len(self.rQueue): # Catch the special case where both the main NZBSegmentQueue is empty, all # the retry queues for the serverPool are empty, but there is still more # left to download in the retry queue (scheduled for retry by other # serverPools) raise EmptyForThisPool() priority, segment = PriorityQueue.get_nowait(self) segment.fromQueue = self return priority, segment def requeue(self, serverFactory, segment): """ Requeue the segment for download. This differs from requeueMissing as it's for downloads that failed for reasons other than the file or group missing from the server (such as a connection timeout) """ # This segment only needs to go back into the retry queue if the retry queue is # enabled AND the segment was previously requeueMissing()'d if self.retryQueueEnabled and len(segment.failedServerPools): self.rQueue.requeue(serverFactory.serverPoolName, segment) else: self.put((segment.priority, segment)) # There's a funny case where other NZBLeechers in the calling NZBLeecher's factory # received Empty from the queue, then afterwards the connection is lost (say the # connection timed out), causing the requeue. Find and reactivate them because # they now have work to do self.nudgeIdleNZBLeechers(segment) def requeueMissing(self, serverFactory, segment): """ Requeue a missing segment. This segment will be added to the RetryQueue (if enabled), where other serverPools will find it and reattempt the download """ # This serverPool has just failed the download assert (serverFactory.serverPoolName not in segment.failedServerPools) segment.failedServerPools.append(serverFactory.serverPoolName) if self.retryQueueEnabled: self.rQueue.requeue(serverFactory.serverPoolName, segment) # We might have just requeued a segment onto an idle server pool. Reactivate # any idle connections pertaining to this segment self.nudgeIdleNZBLeechers(segment) else: raise PoolsExhausted() def nudgeIdleNZBLeechers(self, requeuedSegment): """ Activate any idle NZBLeechers that might need to download the specified requeued segment """ reactor.callLater(0, self._nudgeIdleNZBLeechers, requeuedSegment) def _nudgeIdleNZBLeechers(self, requeuedSegment): """ Activate any idle NZBLeechers that might need to download the specified requeued segment """ if not Hellanzb.downloadPaused and not requeuedSegment.nzbFile.nzb.canceled: for nsf in Hellanzb.nsfs: if nsf.fillServerPriority != self.fillServerPriority: continue if nsf.serverPoolName not in requeuedSegment.failedServerPools: nsf.activated = True nsf.fetchNextNZBSegment() def fileDone(self, nzbFile): """ Notify the queue a file is done. This is called after assembling a file into its final contents. Segments are really stored independantly of individual Files in the queue, hence this function """ self.nzbFilesLock.acquire() if nzbFile in self.nzbFiles: self.nzbFiles.remove(nzbFile) else: self.nzbFilesLock.release() return self.nzbFilesLock.release() if nzbFile.isAllSegmentsDecoded(): for nzbSegment in nzbFile.nzbSegments: if self.parent.onDiskSegments.has_key( nzbSegment.getDestination()): self.parent.onDiskSegments.pop(nzbSegment.getDestination()) if nzbFile.isExtraPar and nzbFile.nzb.queuedBlocks > 0: fileBlocks = getParSize(nzbFile.filename) nzbFile.nzb.queuedBlocks -= fileBlocks nzbFile.nzb.neededBlocks -= fileBlocks if nzbFile.isSkippedPar: # If a skipped par file was actually assembled, it wasn't actually skipped nzbFile.isSkippedPar = False if nzbFile in nzbFile.nzb.skippedParFiles: nzbFile.nzb.skippedParFiles.remove(nzbFile) if nzbFile.nzb.isSkippedParSubject(nzbFile.subject): nzbFile.nzb.skippedParSubjects.remove(nzbFile.subject) def segmentDone(self, nzbSegment, dequeue=False): """ Simply decrement the queued byte count and register this nzbSegment as finished downloading, unless the segment is part of a postponed download """ # NOTE: old code locked here: but this block should only contend with itself (only # called from the ArticleDecoder) ArticleDecoder thread (only segmentDone() and # isAllSegmentsDecoded() touches todoNzbSegments, dequeuedSegments, # totalQueuedBytes? self.nzbsLock.acquire() if nzbSegment in nzbSegment.nzbFile.todoNzbSegments: nzbSegment.nzbFile.todoNzbSegments.remove(nzbSegment) if dequeue: nzbSegment.nzbFile.dequeuedSegments.add(nzbSegment) debug('segmentDone: dequeued: %s %i' % (nzbSegment.nzbFile.subject, nzbSegment.number)) elif nzbSegment in nzbSegment.nzbFile.dequeuedSegments: # NOTE: this should really never occur # need this elif? debug( '*** segmentDone called on dequeued nzbSegment -- removing from ' 'nzbFile.dequeuedSegments!') nzbSegment.nzbFile.dequeuedSegments.remove(nzbSegment) if nzbSegment.nzbFile.nzb in Hellanzb.queue.nzbs: self.totalQueuedBytes -= nzbSegment.bytes self.nzbsLock.release() if not dequeue: # NOTE: currently don't have to lock -- only the ArticleDecoder thread (via # ->handleDupeNZBSegment->isBeingDownloaded) reads onDiskSegments self.parent.onDiskSegments[ nzbSegment.getDestination()] = nzbSegment if nzbSegment.isFirstSegment(): nzbSegment.nzbFile.nzb.firstSegmentsDownloaded += 1 def isBeingDownloadedFile(self, segmentFilename): """ Whether or not the file on disk is currently in the middle of being downloaded/assembled. Return the NZBSegment representing the segment specified by the filename """ # see segmentDone segmentFilename = segmentFilename if self.parent.onDiskSegments.has_key(segmentFilename): return self.parent.onDiskSegments[segmentFilename] def parseNZB(self, nzb, verbose=True): """ Initialize the queue from the specified nzb file """ # Create a parser parser = make_parser() # No XML namespaces here parser.setFeature(feature_namespaces, 0) parser.setFeature(feature_external_ges, 0) # Create the handler fileName = nzb.nzbFileName self.nzbAdd(nzb) needWorkFiles = [] needWorkSegments = [] nzbp = NZBParser(nzb, needWorkFiles, needWorkSegments) # Tell the parser to use it parser.setContentHandler(nzbp) nzb.calculatingBytes = True # Parse the input try: parser.parse(fileName) except SAXParseException, saxpe: nzb.calculatingBytes = False self.nzbDone(nzb) msg = 'Unable to parse invalid NZB file: %s: %s' % \ (os.path.basename(fileName), saxpe.getException()) raise FatalError(msg) nzb.calculatingBytes = False # We trust the NZB XML's <segment number="111"> attribute, but if the sequence of # segments does not begin at "1", the parser wouldn't have found the # nzbFile.firstSegment for needWorkFile in nzbp.needWorkFiles: if needWorkFile.firstSegment is None and len( needWorkFile.nzbSegments): # Set the firstSegment to the smallest segment number sortedSegments = [(nzbSegment.number, nzbSegment) for nzbSegment in \ needWorkFile.nzbSegments] sortedSegments.sort() needWorkFile.firstSegment = sortedSegments[0][1] needWorkFile.firstSegment.priority = NZBSegmentQueue.NZB_CONTENT_P s = time.time() # The parser will add all the segments of all the NZBFiles that have not already # been downloaded. After the parsing, we'll check if each of those segments have # already been downloaded. it's faster to check all segments at one time needDlFiles, needDlSegments, onDiskSegments = segmentsNeedDownload(needWorkSegments, overwriteZeroByteSegments = \ nzb.overwriteZeroByteFiles) e = time.time() - s # firstSegmentsDownloaded needs to be tweaked if isSkippedPar and no segments were # found on disk by segmentsNeedDownload. i.e. first segments have ALWAYS already # been downloaded in isParRecovery mode fauxFirstSegmentsDownloaded = 0 if Hellanzb.SMART_PAR and nzb.isParRecovery: for nzbFile in nzb.nzbFiles: if nzbFile.isSkippedPar and nzbFile.firstSegment not in onDiskSegments: nzb.firstSegmentsDownloaded += 1 fauxFirstSegmentsDownloaded += 1 # Calculate and print parsed/skipped/queued statistics skippedPars = 0 queuedParBlocks = 0 for nzbFile in needDlFiles: if nzbFile.isSkippedPar: skippedPars += 1 elif nzb.isParRecovery and nzbFile.isExtraPar and \ not nzbFile.isSkippedPar and len(nzbFile.todoNzbSegments) and \ nzbFile.filename is not None and not isHellaTemp(nzbFile.filename): queuedParBlocks += getParSize(nzbFile.filename) onDiskBytes = 0 for nzbSegment in onDiskSegments: onDiskBytes += nzbSegment.bytes for nzbFile in nzb.nzbFiles: if nzbFile not in needDlFiles: onDiskBytes += nzbFile.totalBytes onDiskFilesCount = nzbp.fileCount - len(needWorkFiles) onDiskSegmentsCount = len(onDiskSegments) info('Parsed: %i files (%i posts), %s' % (nzbp.fileCount, nzbp.segmentCount, prettySize(nzb.totalBytes))) if onDiskFilesCount or onDiskSegmentsCount: filesMsg = segmentsMsg = separator = '' if onDiskFilesCount: filesMsg = '%i files' % onDiskFilesCount if onDiskSegmentsCount: segmentsMsg = '%i segments' % onDiskSegmentsCount if onDiskFilesCount and onDiskSegmentsCount: separator = ' and ' info('Skipped (on disk): %s%s%s, %s' % (filesMsg, separator, segmentsMsg, prettySize(onDiskBytes))) # Tally what was skipped for correct percentages in the UI for nzbSegment in onDiskSegments: nzbSegment.nzbFile.totalSkippedBytes += nzbSegment.bytes nzbSegment.nzbFile.nzb.totalSkippedBytes += nzbSegment.bytes # The needWorkFiles will tell us what nzbFiles are missing from the # FS. segmentsNeedDownload will further tell us what files need to be # downloaded. files missing from the FS (needWorkFiles) but not needing to be # downloaded (in needDlFiles) simply need to be assembled for nzbFile in needWorkFiles: if nzbFile not in needDlFiles: # Don't automatically 'finish' the NZB, we'll take care of that in this # function if necessary if verbose: info(nzbFile.getFilename() + ': Assembling -- all segments were on disk') # NOTE: this function is destructive to the passed in nzbFile! And is only # called on occasion (might bite you in the ass one day) try: assembleNZBFile(nzbFile, autoFinish=False) except OutOfDiskSpace: self.nzbDone(nzb) # FIXME: Shouldn't exit here error('Cannot assemble ' + nzbFile.getFilename() + ': No space left on device! Exiting..') Hellanzb.Core.shutdown(True) for nzbSegment in needDlSegments: # smartDequeue called from segmentsNeedDownload would have set # isSkippedParFile for us if not nzbSegment.nzbFile.isSkippedPar: self.put((nzbSegment.priority, nzbSegment)) else: # This would need to be downloaded if we didn't skip the segment, they are # officially dequeued, and can be requeued later nzbSegment.nzbFile.dequeuedSegments.add(nzbSegment) # Requeue files in certain situations if nzb.firstSegmentsDownloaded == len(nzb.nzbFiles): # NOTE: This block of code does not commonly happen with newzbin.com NZBs: due # to how the DupeHandler handles .NFO files. newzbin.com seems to always # duplicate the .NFO file in their NZBs smartRequeue(nzb) logSkippedPars(nzb) if nzb.isParRecovery and nzb.skippedParSubjects and len(nzb.skippedParSubjects) and \ not len(self): # FIXME: This recovering ALL pars should be a mode (with a flag on the NZB # object). No par skipping would occur in this mode -- for the incredibly rare # case that first segments are lost prior to this mode taking place. What will # happen doesn't make sense: hellanzb will say 'recovering ALL pars', then # SmartPar will later skip pars msg = 'Par recovery download: No pars with prefix: %s -- recovering ALL pars' % \ nzb.parPrefix if skippedPars: msg = '%s (%i par files)' % (msg, skippedPars) if verbose: warn(msg) for nzbSegment in needDlSegments: if nzbSegment.nzbFile.isSkippedPar: self.put((nzbSegment.priority, nzbSegment)) nzbSegment.nzbFile.todoNzbSegments.add(nzbSegment) # Only reset the isSkippedPar flag after queueing for nzbSegment in needDlSegments: if nzbSegment.nzbFile.isSkippedPar: nzbSegment.nzbFile.isSkippedPar = False # We might have faked the value of this: reset it nzb.firstSegmentsDownloaded -= fauxFirstSegmentsDownloaded if not len(self): self.nzbDone(nzb) if verbose: info(nzb.archiveName + ': Assembled archive!') reactor.callLater(0, Hellanzb.Daemon.handleNZBDone, nzb) # True == the archive is complete return True # Finally tally the size of the queue self.calculateTotalQueuedBytes() dlMsg = 'Queued: %s' % prettySize(self.totalQueuedBytes) if nzb.isParRecovery and queuedParBlocks: dlMsg += ' (recovering %i %s)' % (queuedParBlocks, getParRecoveryName(nzb.parType)) info(dlMsg) # Archive not complete return False