def __init__(self, nzb, needWorkFiles, needWorkSegments): # nzb file to parse self.nzb = nzb # to be populated with the files that either need to be downloaded or simply # assembled, and their segments self.needWorkFiles = needWorkFiles self.needWorkSegments = needWorkSegments # parsing variables self.file = None self.bytes = None self.number = None self.chars = None self.fileNeedsDownload = None self.fileCount = 0 self.segmentCount = 0 self.fileSegmentNumber = 1 # All encountered segment numbers for the current NZBFile self.segmentNumbers = set() # Current listing of existing files in the WORKING_DIR self.workingDirListing = [] # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk self.workingDirDupeMap = {} # heapq priority from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P files = os.listdir(Hellanzb.WORKING_DIR) files.sort() for file in files: # Anonymous duplicate file segments lying around are too painful to keep track # of. As are segments that previously failed on different servers if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file): os.remove(os.path.join(Hellanzb.WORKING_DIR, file)) continue # Add an entry to the self.workingDirDupeMap if this file looks like a # duplicate, and also skip adding it to self.workingDirListing (dupes are # handled specially so we don't care for them there) if handleDupeOnDisk(file, self.workingDirDupeMap): continue if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), self.nzb.overwriteZeroByteFiles): continue self.workingDirListing.append(file)
def handleDupeNZBSegment(nzbSegment): """ Handle a duplicate NZBSegment file on disk (prior to writing a new one), if one exists """ dest = nzbSegment.getDestination() if validWorkingFile(dest, overwriteZeroByteFiles = \ nzbSegment.nzbFile.nzb.overwriteZeroByteFiles): # We have lazily found a duplicate segment (a .segmentXXXX already on disk that we # were about to write to). Determine the new, duplicate filename, that either the # on disk file or the segment ABOUT to be written to disk will be renamed to. We # must avoid renaming it a filename already on disk (nextDupeName will check on # disk for us) OR to an already reserved filename that may not already be on disk # (represented by eschewNames) parentFilename = dest[:-12] # remove .segmentXXXX segmentNumStr = dest[-12:] # just .segmentXXXX dupeNZBFileName = nextDupeName(parentFilename, eschewNames=knownRealNZBFilenames()) beingDownloadedNZBSegment = Hellanzb.queue.isBeingDownloadedFile(dest) info('Duplicate segment (%s), renaming parent file: %s to %s' % \ (segmentNumStr, os.path.basename(parentFilename), os.path.basename(dupeNZBFileName))) if beingDownloadedNZBSegment is not None: debug('handleDupeNZBSegment: handling dupe: %s renaming to: %s' % \ (os.path.basename(dest), os.path.basename(dupeNZBFileName))) # FIXME: should probably assert beingDownloadedNZBSegment.nzbFile.number != # nzbSegment.nzbFile.number here # Maintain the correct order when renaming -- the earliest (as they appear in # the NZB) clashing NZBFile gets renamed if beingDownloadedNZBSegment.nzbFile.number < nzbSegment.nzbFile.number: renameFile = beingDownloadedNZBSegment.nzbFile else: renameFile = nzbSegment.nzbFile ArticleDecoder.setRealFileName(renameFile, os.path.basename(dupeNZBFileName), forceChange=True) else: # NOTE: Probably nothing should trigger this, except maybe .par .segment0001 # files (when smartpar is added). CAUTION: Other cases that might trigger this # block should no longer happen! debug('handleDupeNZBSegment: handling dupe (not ' + \ 'beingDownloadedNZBSegment!?): %s renaming to: %s' % \ (os.path.basename(dest), os.path.basename(dupeNZBFileName))) os.rename(dest, dupeNZBFileName + segmentNumStr)
def handleDupeNZBSegment(nzbSegment): """ Handle a duplicate NZBSegment file on disk (prior to writing a new one), if one exists """ dest = nzbSegment.getDestination() if validWorkingFile(dest, overwriteZeroByteFiles = \ nzbSegment.nzbFile.nzb.overwriteZeroByteFiles): # We have lazily found a duplicate segment (a .segmentXXXX already on disk that we # were about to write to). Determine the new, duplicate filename, that either the # on disk file or the segment ABOUT to be written to disk will be renamed to. We # must avoid renaming it a filename already on disk (nextDupeName will check on # disk for us) OR to an already reserved filename that may not already be on disk # (represented by eschewNames) parentFilename = dest[:-12] # remove .segmentXXXX segmentNumStr = dest[-12:] # just .segmentXXXX dupeNZBFileName = nextDupeName(parentFilename, eschewNames = knownRealNZBFilenames()) beingDownloadedNZBSegment = Hellanzb.queue.isBeingDownloadedFile(dest) info('Duplicate segment (%s), renaming parent file: %s to %s' % \ (segmentNumStr, os.path.basename(parentFilename), os.path.basename(dupeNZBFileName))) if beingDownloadedNZBSegment is not None: debug('handleDupeNZBSegment: handling dupe: %s renaming to: %s' % \ (os.path.basename(dest), os.path.basename(dupeNZBFileName))) # FIXME: should probably assert beingDownloadedNZBSegment.nzbFile.number != # nzbSegment.nzbFile.number here # Maintain the correct order when renaming -- the earliest (as they appear in # the NZB) clashing NZBFile gets renamed if beingDownloadedNZBSegment.nzbFile.number < nzbSegment.nzbFile.number: renameFile = beingDownloadedNZBSegment.nzbFile else: renameFile = nzbSegment.nzbFile ArticleDecoder.setRealFileName(renameFile, os.path.basename(dupeNZBFileName), forceChange = True) else: # NOTE: Probably nothing should trigger this, except maybe .par .segment0001 # files (when smartpar is added). CAUTION: Other cases that might trigger this # block should no longer happen! debug('handleDupeNZBSegment: handling dupe (not ' + \ 'beingDownloadedNZBSegment!?): %s renaming to: %s' % \ (os.path.basename(dest), os.path.basename(dupeNZBFileName))) os.rename(dest, dupeNZBFileName + segmentNumStr)
def handleDupeNZBFile(nzbFile): """ Handle a duplicate NZBFile file on disk (prior to writing a new one), if one exists """ dest = nzbFile.getDestination() # Ignore .nfo files -- newzbin.com dumps the .nfo file to the end of every nzb (if one # exists) -- so it's commonly a dupe. If it's already been downloaded (is an actual # fully assembled NZBFile on disk, not an NZBSegment), just overwrite it if validWorkingFile(dest, overwriteZeroByteFiles = nzbFile.nzb.overwriteZeroByteFiles) and \ getFileExtension(dest) != 'nfo': # Set a new dupeName -- avoid setting a dupeName that is on disk or in the # eschewNames (like above in handleDupeNZBSegment) dupeNZBFileName = dupeName(dest, eschewNames = knownRealNZBFilenames()) info('Duplicate file, renaming: %s to %s' % (os.path.basename(dest), os.path.basename(dupeNZBFileName))) debug('handleDupeNZBFile: renaming: %s to %s' % (os.path.basename(dest), os.path.basename(dupeNZBFileName))) os.rename(dest, dupeNZBFileName)
def handleDupeNZBFile(nzbFile): """ Handle a duplicate NZBFile file on disk (prior to writing a new one), if one exists """ dest = nzbFile.getDestination() # Ignore .nfo files -- newzbin.com dumps the .nfo file to the end of every nzb (if one # exists) -- so it's commonly a dupe. If it's already been downloaded (is an actual # fully assembled NZBFile on disk, not an NZBSegment), just overwrite it if validWorkingFile(dest, overwriteZeroByteFiles = nzbFile.nzb.overwriteZeroByteFiles) and \ getFileExtension(dest) != 'nfo': # Set a new dupeName -- avoid setting a dupeName that is on disk or in the # eschewNames (like above in handleDupeNZBSegment) dupeNZBFileName = dupeName(dest, eschewNames=knownRealNZBFilenames()) info('Duplicate file, renaming: %s to %s' % (os.path.basename(dest), os.path.basename(dupeNZBFileName))) debug('handleDupeNZBFile: renaming: %s to %s' % (os.path.basename(dest), os.path.basename(dupeNZBFileName))) os.rename(dest, dupeNZBFileName)
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments=False): """ Faster version of needsDownload for multiple segments that do not have their real file name (for use by the Queue). When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing are marked as needing to be downloaded. (An easy first pass of figuring out exactly what needs to be downloaded). This function is the second pass. It takes all of those NZBFiles that need to be downloaded's child NZBSegments and scans the disk, detecting which segments are already on disk and can be skipped """ # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment # number in a map. Loop through the specified segmentList, doing a subject.find for # each segment filename with a matching segment number onDiskSegmentsByNumber = {} needDlFiles = set() # for speed while iterating needDlSegments = [] onDiskSegments = [] # Cache all WORKING_DIR segment filenames in a map of lists for file in os.listdir(Hellanzb.WORKING_DIR): if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), overwriteZeroByteSegments): continue ext = getFileExtension(file) if ext is not None and segmentEndRe.match(ext): segmentNumber = int(ext[-4:]) if onDiskSegmentsByNumber.has_key(segmentNumber): segmentFileNames = onDiskSegmentsByNumber[segmentNumber] else: segmentFileNames = [] onDiskSegmentsByNumber[segmentNumber] = segmentFileNames # cut off .segmentXXXX fileNoExt = file[:-12] segmentFileNames.append(fileNoExt) # Determine if each segment needs to be downloaded for segment in segmentList: if not onDiskSegmentsByNumber.has_key(segment.number): # No matching segment numbers, obviously needs to be downloaded needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) continue segmentFileNames = onDiskSegmentsByNumber[segment.number] foundFileName = None for segmentFileName in segmentFileNames: # We've matched to our on disk segment if we: # a) find that on disk segment's file name in our potential segment's subject # b) match that on disk segment's file name to our potential segment's temp # file name (w/ .segmentXXXX cutoff) if segment.nzbFile.subject.find(segmentFileName) > -1 or \ segment.getTempFileName()[:-12] == segmentFileName: foundFileName = segmentFileName break if not foundFileName: needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) else: if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \ segment.nzbFile.filename is None: # HACK: filename is None. so we only have the temporary name in # memory. since we didnt see the temporary name on the filesystem, but we # found a subject match, that means we have the real name on the # filesystem. In the case where this happens we've figured out the real # filename (hopefully!). Set it if it hasn't already been set setRealFileName(segment.nzbFile, foundFileName, settingSegmentNumber=segment.number) if Hellanzb.SMART_PAR: # smartDequeue won't actually 'dequeue' any of this segment's # nzbFile's segments (because there are no segments in the queue at # this point). It will identifyPar the segment AND more importantly it # will mark nzbFiles as isSkippedPar (taken into account later during # parseNZB) and print a 'Skipping par' message for those isSkippedPar # nzbFiles segment.smartDequeue(readOnlyQueue=True) onDiskSegments.append(segment) # Originally the main reason to call segmentDone here is to update the queue's # onDiskSegments (so isBeingDownloaded can safely detect things on disk during # Dupe renaming). However it's correct to call this here, it's as if hellanzb # just finished downloading and decoding the segment. The only incorrect part # about the call is the queue's totalQueuedBytes is decremented. That total is # reset to zero just before it is recalculated at the end of parseNZB, however Hellanzb.queue.segmentDone(segment) # This segment was matched. Remove it from the list to avoid matching it again # later (dupes) segmentFileNames.remove(foundFileName) #else: # debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \ # segment.nzbFile.subject) return needDlFiles, needDlSegments, onDiskSegments
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments = False): """ Faster version of needsDownload for multiple segments that do not have their real file name (for use by the Queue). When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing are marked as needing to be downloaded. (An easy first pass of figuring out exactly what needs to be downloaded). This function is the second pass. It takes all of those NZBFiles that need to be downloaded's child NZBSegments and scans the disk, detecting which segments are already on disk and can be skipped """ # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment # number in a map. Loop through the specified segmentList, doing a subject.find for # each segment filename with a matching segment number onDiskSegmentsByNumber = {} needDlFiles = set() # for speed while iterating needDlSegments = [] onDiskSegments = [] # Cache all WORKING_DIR segment filenames in a map of lists for file in os.listdir(Hellanzb.WORKING_DIR): if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file), overwriteZeroByteSegments): continue ext = getFileExtension(file) if ext is not None and segmentEndRe.match(ext): segmentNumber = int(ext[-4:]) if onDiskSegmentsByNumber.has_key(segmentNumber): segmentFileNames = onDiskSegmentsByNumber[segmentNumber] else: segmentFileNames = [] onDiskSegmentsByNumber[segmentNumber] = segmentFileNames # cut off .segmentXXXX fileNoExt = file[:-12] segmentFileNames.append(fileNoExt) # Determine if each segment needs to be downloaded for segment in segmentList: if not onDiskSegmentsByNumber.has_key(segment.number): # No matching segment numbers, obviously needs to be downloaded needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) continue segmentFileNames = onDiskSegmentsByNumber[segment.number] foundFileName = None for segmentFileName in segmentFileNames: # We've matched to our on disk segment if we: # a) find that on disk segment's file name in our potential segment's subject # b) match that on disk segment's file name to our potential segment's temp # file name (w/ .segmentXXXX cutoff) if segment.nzbFile.subject.find(segmentFileName) > -1 or \ segment.getTempFileName()[:-12] == segmentFileName: foundFileName = segmentFileName break if not foundFileName: needDlSegments.append(segment) needDlFiles.add(segment.nzbFile) else: if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \ segment.nzbFile.filename is None: # HACK: filename is None. so we only have the temporary name in # memory. since we didnt see the temporary name on the filesystem, but we # found a subject match, that means we have the real name on the # filesystem. In the case where this happens we've figured out the real # filename (hopefully!). Set it if it hasn't already been set setRealFileName(segment.nzbFile, foundFileName, settingSegmentNumber = segment.number) if Hellanzb.SMART_PAR: # smartDequeue won't actually 'dequeue' any of this segment's # nzbFile's segments (because there are no segments in the queue at # this point). It will identifyPar the segment AND more importantly it # will mark nzbFiles as isSkippedPar (taken into account later during # parseNZB) and print a 'Skipping par' message for those isSkippedPar # nzbFiles segment.smartDequeue(readOnlyQueue = True) onDiskSegments.append(segment) # Originally the main reason to call segmentDone here is to update the queue's # onDiskSegments (so isBeingDownloaded can safely detect things on disk during # Dupe renaming). However it's correct to call this here, it's as if hellanzb # just finished downloading and decoding the segment. The only incorrect part # about the call is the queue's totalQueuedBytes is decremented. That total is # reset to zero just before it is recalculated at the end of parseNZB, however Hellanzb.queue.segmentDone(segment) # This segment was matched. Remove it from the list to avoid matching it again # later (dupes) segmentFileNames.remove(foundFileName) #else: # debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \ # segment.nzbFile.subject) return needDlFiles, needDlSegments, onDiskSegments