예제 #1
0
    def __init__(self, nzb, needWorkFiles, needWorkSegments):
        # nzb file to parse
        self.nzb = nzb

        # to be populated with the files that either need to be downloaded or simply
        # assembled, and their segments
        self.needWorkFiles = needWorkFiles
        self.needWorkSegments = needWorkSegments

        # parsing variables
        self.file = None
        self.bytes = None
        self.number = None
        self.chars = None
        self.fileNeedsDownload = None
        
        self.fileCount = 0
        self.segmentCount = 0
        self.fileSegmentNumber = 1

        # All encountered segment numbers for the current NZBFile
        self.segmentNumbers = set()
        
        # Current listing of existing files in the WORKING_DIR
        self.workingDirListing = []
        
        # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk
        self.workingDirDupeMap = {}

        # heapq priority
        from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue
        self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P
        
        files = os.listdir(Hellanzb.WORKING_DIR)
        files.sort()
        for file in files:

            # Anonymous duplicate file segments lying around are too painful to keep track
            # of. As are segments that previously failed on different servers
            if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file):
                os.remove(os.path.join(Hellanzb.WORKING_DIR, file))
                continue

            # Add an entry to the self.workingDirDupeMap if this file looks like a
            # duplicate, and also skip adding it to self.workingDirListing (dupes are
            # handled specially so we don't care for them there)
            if handleDupeOnDisk(file, self.workingDirDupeMap):
                continue
            
            if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),
                                    self.nzb.overwriteZeroByteFiles):
                continue

            self.workingDirListing.append(file)
예제 #2
0
    def __init__(self, nzb, needWorkFiles, needWorkSegments):
        # nzb file to parse
        self.nzb = nzb

        # to be populated with the files that either need to be downloaded or simply
        # assembled, and their segments
        self.needWorkFiles = needWorkFiles
        self.needWorkSegments = needWorkSegments

        # parsing variables
        self.file = None
        self.bytes = None
        self.number = None
        self.chars = None
        self.fileNeedsDownload = None
        
        self.fileCount = 0
        self.segmentCount = 0
        self.fileSegmentNumber = 1

        # All encountered segment numbers for the current NZBFile
        self.segmentNumbers = set()
        
        # Current listing of existing files in the WORKING_DIR
        self.workingDirListing = []
        
        # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk
        self.workingDirDupeMap = {}

        # heapq priority
        from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue
        self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P
        
        files = os.listdir(Hellanzb.WORKING_DIR)
        files.sort()
        for file in files:

            # Anonymous duplicate file segments lying around are too painful to keep track
            # of. As are segments that previously failed on different servers
            if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file):
                os.remove(os.path.join(Hellanzb.WORKING_DIR, file))
                continue

            # Add an entry to the self.workingDirDupeMap if this file looks like a
            # duplicate, and also skip adding it to self.workingDirListing (dupes are
            # handled specially so we don't care for them there)
            if handleDupeOnDisk(file, self.workingDirDupeMap):
                continue
            
            if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),
                                    self.nzb.overwriteZeroByteFiles):
                continue

            self.workingDirListing.append(file)
예제 #3
0
def handleDupeNZBSegment(nzbSegment):
    """ Handle a duplicate NZBSegment file on disk (prior to writing a new one), if one exists
    """
    dest = nzbSegment.getDestination()
    if validWorkingFile(dest, overwriteZeroByteFiles = \
                        nzbSegment.nzbFile.nzb.overwriteZeroByteFiles):
        # We have lazily found a duplicate segment (a .segmentXXXX already on disk that we
        # were about to write to). Determine the new, duplicate filename, that either the
        # on disk file or the segment ABOUT to be written to disk will be renamed to. We
        # must avoid renaming it a filename already on disk (nextDupeName will check on
        # disk for us) OR to an already reserved filename that may not already be on disk
        # (represented by eschewNames)
        parentFilename = dest[:-12]  # remove .segmentXXXX
        segmentNumStr = dest[-12:]  # just .segmentXXXX
        dupeNZBFileName = nextDupeName(parentFilename,
                                       eschewNames=knownRealNZBFilenames())

        beingDownloadedNZBSegment = Hellanzb.queue.isBeingDownloadedFile(dest)

        info('Duplicate segment (%s), renaming parent file: %s to %s' % \
             (segmentNumStr, os.path.basename(parentFilename),
              os.path.basename(dupeNZBFileName)))

        if beingDownloadedNZBSegment is not None:
            debug('handleDupeNZBSegment: handling dupe: %s renaming to: %s' % \
                  (os.path.basename(dest), os.path.basename(dupeNZBFileName)))

            # FIXME: should probably assert beingDownloadedNZBSegment.nzbFile.number !=
            # nzbSegment.nzbFile.number here

            # Maintain the correct order when renaming -- the earliest (as they appear in
            # the NZB) clashing NZBFile gets renamed
            if beingDownloadedNZBSegment.nzbFile.number < nzbSegment.nzbFile.number:
                renameFile = beingDownloadedNZBSegment.nzbFile
            else:
                renameFile = nzbSegment.nzbFile

            ArticleDecoder.setRealFileName(renameFile,
                                           os.path.basename(dupeNZBFileName),
                                           forceChange=True)
        else:
            # NOTE: Probably nothing should trigger this, except maybe .par .segment0001
            # files (when smartpar is added). CAUTION: Other cases that might trigger this
            # block should no longer happen!
            debug('handleDupeNZBSegment: handling dupe (not ' + \
                  'beingDownloadedNZBSegment!?): %s renaming to: %s' % \
                  (os.path.basename(dest), os.path.basename(dupeNZBFileName)))
            os.rename(dest, dupeNZBFileName + segmentNumStr)
예제 #4
0
def handleDupeNZBSegment(nzbSegment):
    """ Handle a duplicate NZBSegment file on disk (prior to writing a new one), if one exists
    """
    dest = nzbSegment.getDestination()
    if validWorkingFile(dest, overwriteZeroByteFiles = \
                        nzbSegment.nzbFile.nzb.overwriteZeroByteFiles):
        # We have lazily found a duplicate segment (a .segmentXXXX already on disk that we
        # were about to write to). Determine the new, duplicate filename, that either the
        # on disk file or the segment ABOUT to be written to disk will be renamed to. We
        # must avoid renaming it a filename already on disk (nextDupeName will check on
        # disk for us) OR to an already reserved filename that may not already be on disk
        # (represented by eschewNames)
        parentFilename = dest[:-12] # remove .segmentXXXX
        segmentNumStr = dest[-12:] # just .segmentXXXX
        dupeNZBFileName = nextDupeName(parentFilename, eschewNames = knownRealNZBFilenames())

        beingDownloadedNZBSegment = Hellanzb.queue.isBeingDownloadedFile(dest)

        info('Duplicate segment (%s), renaming parent file: %s to %s' % \
             (segmentNumStr, os.path.basename(parentFilename),
              os.path.basename(dupeNZBFileName)))
        
        if beingDownloadedNZBSegment is not None:
            debug('handleDupeNZBSegment: handling dupe: %s renaming to: %s' % \
                  (os.path.basename(dest), os.path.basename(dupeNZBFileName)))

            # FIXME: should probably assert beingDownloadedNZBSegment.nzbFile.number !=
            # nzbSegment.nzbFile.number here
            
            # Maintain the correct order when renaming -- the earliest (as they appear in
            # the NZB) clashing NZBFile gets renamed
            if beingDownloadedNZBSegment.nzbFile.number < nzbSegment.nzbFile.number:
                renameFile = beingDownloadedNZBSegment.nzbFile
            else:
                renameFile = nzbSegment.nzbFile

            ArticleDecoder.setRealFileName(renameFile, os.path.basename(dupeNZBFileName),
                                           forceChange = True)
        else:
            # NOTE: Probably nothing should trigger this, except maybe .par .segment0001
            # files (when smartpar is added). CAUTION: Other cases that might trigger this
            # block should no longer happen!
            debug('handleDupeNZBSegment: handling dupe (not ' + \
                  'beingDownloadedNZBSegment!?): %s renaming to: %s' % \
                  (os.path.basename(dest), os.path.basename(dupeNZBFileName)))
            os.rename(dest, dupeNZBFileName + segmentNumStr)
예제 #5
0
def handleDupeNZBFile(nzbFile):
    """ Handle a duplicate NZBFile file on disk (prior to writing a new one), if one exists
    """
    dest = nzbFile.getDestination()
    # Ignore .nfo files -- newzbin.com dumps the .nfo file to the end of every nzb (if one
    # exists) -- so it's commonly a dupe. If it's already been downloaded (is an actual
    # fully assembled NZBFile on disk, not an NZBSegment), just overwrite it
    if validWorkingFile(dest, overwriteZeroByteFiles = 
                        nzbFile.nzb.overwriteZeroByteFiles) and \
                        getFileExtension(dest) != 'nfo':
        # Set a new dupeName -- avoid setting a dupeName that is on disk or in the
        # eschewNames (like above in handleDupeNZBSegment)
        dupeNZBFileName = dupeName(dest, eschewNames = knownRealNZBFilenames())
        
        info('Duplicate file, renaming: %s to %s' % (os.path.basename(dest),
                                                     os.path.basename(dupeNZBFileName)))
        debug('handleDupeNZBFile: renaming: %s to %s' % (os.path.basename(dest),
                                                        os.path.basename(dupeNZBFileName)))

        os.rename(dest, dupeNZBFileName)
예제 #6
0
def handleDupeNZBFile(nzbFile):
    """ Handle a duplicate NZBFile file on disk (prior to writing a new one), if one exists
    """
    dest = nzbFile.getDestination()
    # Ignore .nfo files -- newzbin.com dumps the .nfo file to the end of every nzb (if one
    # exists) -- so it's commonly a dupe. If it's already been downloaded (is an actual
    # fully assembled NZBFile on disk, not an NZBSegment), just overwrite it
    if validWorkingFile(dest, overwriteZeroByteFiles =
                        nzbFile.nzb.overwriteZeroByteFiles) and \
                        getFileExtension(dest) != 'nfo':
        # Set a new dupeName -- avoid setting a dupeName that is on disk or in the
        # eschewNames (like above in handleDupeNZBSegment)
        dupeNZBFileName = dupeName(dest, eschewNames=knownRealNZBFilenames())

        info('Duplicate file, renaming: %s to %s' %
             (os.path.basename(dest), os.path.basename(dupeNZBFileName)))
        debug('handleDupeNZBFile: renaming: %s to %s' %
              (os.path.basename(dest), os.path.basename(dupeNZBFileName)))

        os.rename(dest, dupeNZBFileName)
예제 #7
0
파일: NZBModel.py 프로젝트: r0mdau/hellanzb
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments=False):
    """ Faster version of needsDownload for multiple segments that do not have their real file
    name (for use by the Queue).

    When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing
    are marked as needing to be downloaded. (An easy first pass of figuring out exactly
    what needs to be downloaded).

    This function is the second pass. It takes all of those NZBFiles that need to be
    downloaded's child NZBSegments and scans the disk, detecting which segments are
    already on disk and can be skipped
    """
    # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment
    # number in a map. Loop through the specified segmentList, doing a subject.find for
    # each segment filename with a matching segment number

    onDiskSegmentsByNumber = {}

    needDlFiles = set()  # for speed while iterating
    needDlSegments = []
    onDiskSegments = []

    # Cache all WORKING_DIR segment filenames in a map of lists
    for file in os.listdir(Hellanzb.WORKING_DIR):
        if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),
                                overwriteZeroByteSegments):
            continue

        ext = getFileExtension(file)
        if ext is not None and segmentEndRe.match(ext):
            segmentNumber = int(ext[-4:])

            if onDiskSegmentsByNumber.has_key(segmentNumber):
                segmentFileNames = onDiskSegmentsByNumber[segmentNumber]
            else:
                segmentFileNames = []
                onDiskSegmentsByNumber[segmentNumber] = segmentFileNames

            # cut off .segmentXXXX
            fileNoExt = file[:-12]
            segmentFileNames.append(fileNoExt)

    # Determine if each segment needs to be downloaded
    for segment in segmentList:

        if not onDiskSegmentsByNumber.has_key(segment.number):
            # No matching segment numbers, obviously needs to be downloaded
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
            continue

        segmentFileNames = onDiskSegmentsByNumber[segment.number]

        foundFileName = None
        for segmentFileName in segmentFileNames:
            # We've matched to our on disk segment if we:
            # a) find that on disk segment's file name in our potential segment's subject
            # b) match that on disk segment's file name to our potential segment's temp
            # file name (w/ .segmentXXXX cutoff)
            if segment.nzbFile.subject.find(segmentFileName) > -1 or \
                    segment.getTempFileName()[:-12] == segmentFileName:
                foundFileName = segmentFileName
                break

        if not foundFileName:
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
        else:
            if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \
                    segment.nzbFile.filename is None:
                # HACK: filename is None. so we only have the temporary name in
                # memory. since we didnt see the temporary name on the filesystem, but we
                # found a subject match, that means we have the real name on the
                # filesystem. In the case where this happens we've figured out the real
                # filename (hopefully!). Set it if it hasn't already been set
                setRealFileName(segment.nzbFile,
                                foundFileName,
                                settingSegmentNumber=segment.number)

                if Hellanzb.SMART_PAR:
                    # smartDequeue won't actually 'dequeue' any of this segment's
                    # nzbFile's segments (because there are no segments in the queue at
                    # this point). It will identifyPar the segment AND more importantly it
                    # will mark nzbFiles as isSkippedPar (taken into account later during
                    # parseNZB) and print a 'Skipping par' message for those isSkippedPar
                    # nzbFiles
                    segment.smartDequeue(readOnlyQueue=True)

            onDiskSegments.append(segment)

            # Originally the main reason to call segmentDone here is to update the queue's
            # onDiskSegments (so isBeingDownloaded can safely detect things on disk during
            # Dupe renaming). However it's correct to call this here, it's as if hellanzb
            # just finished downloading and decoding the segment. The only incorrect part
            # about the call is the queue's totalQueuedBytes is decremented. That total is
            # reset to zero just before it is recalculated at the end of parseNZB, however
            Hellanzb.queue.segmentDone(segment)

            # This segment was matched. Remove it from the list to avoid matching it again
            # later (dupes)
            segmentFileNames.remove(foundFileName)

        #else:
        #    debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \
        #          segment.nzbFile.subject)

    return needDlFiles, needDlSegments, onDiskSegments
예제 #8
0
def segmentsNeedDownload(segmentList, overwriteZeroByteSegments = False):
    """ Faster version of needsDownload for multiple segments that do not have their real file
    name (for use by the Queue).

    When an NZB is loaded and parsed, NZB<file>s not found on disk at the time of parsing
    are marked as needing to be downloaded. (An easy first pass of figuring out exactly
    what needs to be downloaded).

    This function is the second pass. It takes all of those NZBFiles that need to be
    downloaded's child NZBSegments and scans the disk, detecting which segments are
    already on disk and can be skipped
    """
    # Arrange all WORKING_DIR segment's filenames in a list. Key this list by segment
    # number in a map. Loop through the specified segmentList, doing a subject.find for
    # each segment filename with a matching segment number

    onDiskSegmentsByNumber = {}
    
    needDlFiles = set() # for speed while iterating
    needDlSegments = []
    onDiskSegments = []

    # Cache all WORKING_DIR segment filenames in a map of lists
    for file in os.listdir(Hellanzb.WORKING_DIR):
        if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),
                                overwriteZeroByteSegments):
            continue
        
        ext = getFileExtension(file)
        if ext is not None and segmentEndRe.match(ext):
            segmentNumber = int(ext[-4:])
            
            if onDiskSegmentsByNumber.has_key(segmentNumber):
                segmentFileNames = onDiskSegmentsByNumber[segmentNumber]
            else:
                segmentFileNames = []
                onDiskSegmentsByNumber[segmentNumber] = segmentFileNames

            # cut off .segmentXXXX
            fileNoExt = file[:-12]
            segmentFileNames.append(fileNoExt)

    # Determine if each segment needs to be downloaded
    for segment in segmentList:

        if not onDiskSegmentsByNumber.has_key(segment.number):
            # No matching segment numbers, obviously needs to be downloaded
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
            continue

        segmentFileNames = onDiskSegmentsByNumber[segment.number]
        
        foundFileName = None
        for segmentFileName in segmentFileNames:
            # We've matched to our on disk segment if we:
            # a) find that on disk segment's file name in our potential segment's subject
            # b) match that on disk segment's file name to our potential segment's temp
            # file name (w/ .segmentXXXX cutoff)
            if segment.nzbFile.subject.find(segmentFileName) > -1 or \
                    segment.getTempFileName()[:-12] == segmentFileName:
                foundFileName = segmentFileName
                break

        if not foundFileName:
            needDlSegments.append(segment)
            needDlFiles.add(segment.nzbFile)
        else:
            if segment.isFirstSegment() and not isHellaTemp(foundFileName) and \
                    segment.nzbFile.filename is None:
                # HACK: filename is None. so we only have the temporary name in
                # memory. since we didnt see the temporary name on the filesystem, but we
                # found a subject match, that means we have the real name on the
                # filesystem. In the case where this happens we've figured out the real
                # filename (hopefully!). Set it if it hasn't already been set
                setRealFileName(segment.nzbFile, foundFileName,
                            settingSegmentNumber = segment.number)

                if Hellanzb.SMART_PAR:
                    # smartDequeue won't actually 'dequeue' any of this segment's
                    # nzbFile's segments (because there are no segments in the queue at
                    # this point). It will identifyPar the segment AND more importantly it
                    # will mark nzbFiles as isSkippedPar (taken into account later during
                    # parseNZB) and print a 'Skipping par' message for those isSkippedPar
                    # nzbFiles
                    segment.smartDequeue(readOnlyQueue = True)
                
            onDiskSegments.append(segment)
            
            # Originally the main reason to call segmentDone here is to update the queue's
            # onDiskSegments (so isBeingDownloaded can safely detect things on disk during
            # Dupe renaming). However it's correct to call this here, it's as if hellanzb
            # just finished downloading and decoding the segment. The only incorrect part
            # about the call is the queue's totalQueuedBytes is decremented. That total is
            # reset to zero just before it is recalculated at the end of parseNZB, however
            Hellanzb.queue.segmentDone(segment)

            # This segment was matched. Remove it from the list to avoid matching it again
            # later (dupes)
            segmentFileNames.remove(foundFileName)

        #else:
        #    debug('SKIPPING SEGMENT: ' + segment.getTempFileName() + ' subject: ' + \
        #          segment.nzbFile.subject)

    return needDlFiles, needDlSegments, onDiskSegments