Example #1
    def startElement(self, name, attrs):
        if name == 'file':
            subject = self.parseUnicode(attrs.get('subject'))
            poster = self.parseUnicode(attrs.get('poster'))

            self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb)

            self.fileNeedsDownload = \
                self.file.needsDownload(workingDirListing = self.workingDirListing,
                                        workingDirDupeMap = self.workingDirDupeMap)

            # Special handling for par recovery downloads
            extraMsg = ''
            if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery:
                if not self.nzb.isSkippedParSubject(subject):
                    # Only download previously marked pars
                    self.fileNeedsDownload = False
                    extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)'
                    self.file.nzb.firstSegmentsDownloaded += 1
                elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject):
                    # Previously marked par -- only download it if it pertains to the
                    # particular par. We keep it set to needsDownload here so it gets to
                    # parseNZB -- parseNZB won't actually queue it
                    self.file.isSkippedPar = True
            if not self.fileNeedsDownload:
                debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(),

            self.fileCount += 1
            self.file.number = self.fileCount
            self.fileSegmentNumber = 1
        elif name == 'group':
            self.chars = []
        elif name == 'segment':
                self.bytes = int(attrs.get('bytes'))
            except ValueError:
                self.bytes = 0
                self.number = int(attrs.get('number'))
            except ValueError:
                self.number = self.fileSegmentNumber
            self.fileSegmentNumber += 1
            self.chars = []
Example #2
    def assertRecoveredSkippedPar(self, subject):
        n = NZB('test.nzb')
        n.isParRecovery = True
        n.parPrefix = 'test'
        n.parType = PAR2
        file = NZBFile(subject, 'today', '*****@*****.**', n)
        file.isSkippedPar = True
        del n

        n2 = NZB.fromStateXML('downloading', 'test')
        #print str(Hellanzb.recoveredState)
        #print str(n2.skippedParSubjects)
        self.assertEquals(True, n2.isSkippedParSubject(subject))
Example #3
    def assertRecoveredSkippedPar(self, subject):
        n = NZB('test.nzb')
        n.isParRecovery = True
        n.parPrefix = 'test'
        n.parType = PAR2

        file = NZBFile(subject, 'today', '*****@*****.**', n)
        file.isSkippedPar = True

        del n

        n2 = NZB.fromStateXML('downloading', 'test')
        #print str(Hellanzb.recoveredState)
        #print str(n2.skippedParSubjects)
        self.assertEquals(True, n2.isSkippedParSubject(subject))
Example #4
    def startElement(self, name, attrs):
        if name == 'file':
            subject = self.parseUnicode(attrs.get('subject'))
            poster = self.parseUnicode(attrs.get('poster'))

            self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb)

            self.fileNeedsDownload = \
                self.file.needsDownload(workingDirListing = self.workingDirListing,
                                        workingDirDupeMap = self.workingDirDupeMap)

            # Special handling for par recovery downloads
            extraMsg = ''
            if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery:
                if not self.nzb.isSkippedParSubject(subject):
                    # Only download previously marked pars
                    self.fileNeedsDownload = False
                    extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)'
                    self.file.nzb.firstSegmentsDownloaded += 1
                elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject):
                    # Previously marked par -- only download it if it pertains to the
                    # particular par. We keep it set to needsDownload here so it gets to
                    # parseNZB -- parseNZB won't actually queue it
                    self.file.isSkippedPar = True
            if not self.fileNeedsDownload:
                debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(),

            self.fileCount += 1
            self.file.number = self.fileCount
            self.fileSegmentNumber = 1
        elif name == 'group':
            self.chars = []
        elif name == 'segment':
                self.bytes = int(attrs.get('bytes'))
            except ValueError:
                self.bytes = 0
                self.number = int(attrs.get('number'))
            except ValueError:
                self.number = self.fileSegmentNumber
            self.fileSegmentNumber += 1
            self.chars = []
Example #5
class NZBParser(ContentHandler):
    """ Parse an NZB 1.0 file into an NZBSegmentQueue
    http://www.newzbin.com/DTD/nzb/nzb-1.0.dtd """
    def __init__(self, nzb, needWorkFiles, needWorkSegments):
        # nzb file to parse
        self.nzb = nzb

        # to be populated with the files that either need to be downloaded or simply
        # assembled, and their segments
        self.needWorkFiles = needWorkFiles
        self.needWorkSegments = needWorkSegments

        # parsing variables
        self.file = None
        self.bytes = None
        self.number = None
        self.chars = None
        self.fileNeedsDownload = None
        self.fileCount = 0
        self.segmentCount = 0
        self.fileSegmentNumber = 1

        # All encountered segment numbers for the current NZBFile
        self.segmentNumbers = set()
        # Current listing of existing files in the WORKING_DIR
        self.workingDirListing = []
        # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk
        self.workingDirDupeMap = {}

        # heapq priority
        from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue
        self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P
        files = os.listdir(Hellanzb.WORKING_DIR)
        for file in files:

            # Anonymous duplicate file segments lying around are too painful to keep track
            # of. As are segments that previously failed on different servers
            if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file):
                os.remove(os.path.join(Hellanzb.WORKING_DIR, file))

            # Add an entry to the self.workingDirDupeMap if this file looks like a
            # duplicate, and also skip adding it to self.workingDirListing (dupes are
            # handled specially so we don't care for them there)
            if handleDupeOnDisk(file, self.workingDirDupeMap):
            if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),

    def startElement(self, name, attrs):
        if name == 'file':
            subject = self.parseUnicode(attrs.get('subject'))
            poster = self.parseUnicode(attrs.get('poster'))

            self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb)

            self.fileNeedsDownload = \
                self.file.needsDownload(workingDirListing = self.workingDirListing,
                                        workingDirDupeMap = self.workingDirDupeMap)

            # Special handling for par recovery downloads
            extraMsg = ''
            if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery:
                if not self.nzb.isSkippedParSubject(subject):
                    # Only download previously marked pars
                    self.fileNeedsDownload = False
                    extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)'
                    self.file.nzb.firstSegmentsDownloaded += 1
                elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject):
                    # Previously marked par -- only download it if it pertains to the
                    # particular par. We keep it set to needsDownload here so it gets to
                    # parseNZB -- parseNZB won't actually queue it
                    self.file.isSkippedPar = True
            if not self.fileNeedsDownload:
                debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(),

            self.fileCount += 1
            self.file.number = self.fileCount
            self.fileSegmentNumber = 1
        elif name == 'group':
            self.chars = []
        elif name == 'segment':
                self.bytes = int(attrs.get('bytes'))
            except ValueError:
                self.bytes = 0
                self.number = int(attrs.get('number'))
            except ValueError:
                self.number = self.fileSegmentNumber
            self.fileSegmentNumber += 1
            self.chars = []
    def characters(self, content):
        if self.chars is not None:
    def endElement(self, name):
        if name == 'file':
            if self.fileNeedsDownload:
                # done adding all child segments to this NZBFile. make note that none of
                # them need to be downloaded
                self.file.nzb.totalSkippedBytes += self.file.totalBytes
            self.file = None
            self.fileNeedsDownload = None
        elif name == 'group':
            newsgroup = self.parseUnicode(''.join(self.chars))
            self.chars = None
        elif name == 'segment':
            if self.number in self.segmentNumbers:
                # This segment number was already registered

            self.segmentCount += 1

            messageId = self.parseUnicode(''.join(self.chars))
            nzbs = NZBSegment(self.bytes, self.number, messageId, self.file)
            if self.number == 1:
                self.file.firstSegment = nzbs

            if self.fileNeedsDownload:
                # HACK: Maintain the order in which we encountered the segments by adding
                # segmentCount to the priority. lame afterthought -- after realizing
                # heapqs aren't ordered. nzbContentPriority must now be large enough so
                # that it won't ever clash with EXTRA_PAR2_P + i
                nzbs.priority = self.nzbContentPriority
                if nzbs.number != 1:
                    nzbs.priority += self.segmentCount

            self.chars = None
            self.number = None
            self.bytes = None    

    def parseUnicode(self, unicodeOrStr):
        if isinstance(unicodeOrStr, unicode):
            return unicodeOrStr.encode('latin-1')
        return unicodeOrStr
Example #6
class NZBParser(ContentHandler):
    """ Parse an NZB 1.0 file into an NZBSegmentQueue
    http://www.newzbin.com/DTD/nzb/nzb-1.0.dtd """
    def __init__(self, nzb, needWorkFiles, needWorkSegments):
        # nzb file to parse
        self.nzb = nzb

        # to be populated with the files that either need to be downloaded or simply
        # assembled, and their segments
        self.needWorkFiles = needWorkFiles
        self.needWorkSegments = needWorkSegments

        # parsing variables
        self.file = None
        self.bytes = None
        self.number = None
        self.chars = None
        self.fileNeedsDownload = None
        self.fileCount = 0
        self.segmentCount = 0
        self.fileSegmentNumber = 1

        # All encountered segment numbers for the current NZBFile
        self.segmentNumbers = set()
        # Current listing of existing files in the WORKING_DIR
        self.workingDirListing = []
        # Map of duplicate filenames -- @see DupeHandler.handleDupeOnDisk
        self.workingDirDupeMap = {}

        # heapq priority
        from Hellanzb.NZBLeecher.NZBSegmentQueue import NZBSegmentQueue
        self.nzbContentPriority = NZBSegmentQueue.NZB_CONTENT_P
        files = os.listdir(Hellanzb.WORKING_DIR)
        for file in files:

            # Anonymous duplicate file segments lying around are too painful to keep track
            # of. As are segments that previously failed on different servers
            if DUPE_SEGMENT_RE.match(file) or FAILED_ALT_SERVER_SEGMENT_RE.match(file):
                os.remove(os.path.join(Hellanzb.WORKING_DIR, file))

            # Add an entry to the self.workingDirDupeMap if this file looks like a
            # duplicate, and also skip adding it to self.workingDirListing (dupes are
            # handled specially so we don't care for them there)
            if handleDupeOnDisk(file, self.workingDirDupeMap):
            if not validWorkingFile(os.path.join(Hellanzb.WORKING_DIR, file),

    def startElement(self, name, attrs):
        if name == 'file':
            subject = self.parseUnicode(attrs.get('subject'))
            poster = self.parseUnicode(attrs.get('poster'))

            self.file = NZBFile(subject, attrs.get('date'), poster, self.nzb)

            self.fileNeedsDownload = \
                self.file.needsDownload(workingDirListing = self.workingDirListing,
                                        workingDirDupeMap = self.workingDirDupeMap)

            # Special handling for par recovery downloads
            extraMsg = ''
            if Hellanzb.SMART_PAR and self.fileNeedsDownload and self.nzb.isParRecovery:
                if not self.nzb.isSkippedParSubject(subject):
                    # Only download previously marked pars
                    self.fileNeedsDownload = False
                    extraMsg = ' (not on disk but wasn\'t previously marked as an skippedParFile)'
                    self.file.nzb.firstSegmentsDownloaded += 1
                elif toUnicode(self.nzb.parPrefix) not in toUnicode(subject):
                    # Previously marked par -- only download it if it pertains to the
                    # particular par. We keep it set to needsDownload here so it gets to
                    # parseNZB -- parseNZB won't actually queue it
                    self.file.isSkippedPar = True
            if not self.fileNeedsDownload:
                debug('SKIPPING FILE%s: %s subject: %s' % (extraMsg, self.file.getTempFileName(),

            self.fileCount += 1
            self.file.number = self.fileCount
            self.fileSegmentNumber = 1
        elif name == 'group':
            self.chars = []
        elif name == 'segment':
                self.bytes = int(attrs.get('bytes'))
            except ValueError:
                self.bytes = 0
                self.number = int(attrs.get('number'))
            except ValueError:
                self.number = self.fileSegmentNumber
            self.fileSegmentNumber += 1
            self.chars = []
    def characters(self, content):
        if self.chars is not None:
    def endElement(self, name):
        if name == 'file':
            if self.fileNeedsDownload:
                # done adding all child segments to this NZBFile. make note that none of
                # them need to be downloaded
                self.file.nzb.totalSkippedBytes += self.file.totalBytes
            self.file = None
            self.fileNeedsDownload = None
        elif name == 'group':
            newsgroup = self.parseUnicode(''.join(self.chars))
            self.chars = None
        elif name == 'segment':
            if self.number in self.segmentNumbers:
                # This segment number was already registered

            self.segmentCount += 1

            messageId = self.parseUnicode(''.join(self.chars))
            nzbs = NZBSegment(self.bytes, self.number, messageId, self.file)
            if self.number == 1:
                self.file.firstSegment = nzbs

            if self.fileNeedsDownload:
                # HACK: Maintain the order in which we encountered the segments by adding
                # segmentCount to the priority. lame afterthought -- after realizing
                # heapqs aren't ordered. nzbContentPriority must now be large enough so
                # that it won't ever clash with EXTRA_PAR2_P + i
                nzbs.priority = self.nzbContentPriority
                if nzbs.number != 1:
                    nzbs.priority += self.segmentCount

            self.chars = None
            self.number = None
            self.bytes = None    

    def parseUnicode(self, unicodeOrStr):
        if isinstance(unicodeOrStr, unicode):
            unicodeOrStr = unicodeOrStr.encode('latin-1')
        return unicodeOrStr.strip()