def test_NNTPArticle_UU_encode_02(self):
        """
        Test the encoding of fresh new data
        """

        # Our private Key Location
        tmp_file = join(
            self.tmp_dir,
            'test_NNTPArticle_UU_encode_02.tmp',
        )

        # Create a larger file
        assert(self.touch(tmp_file, size='1M', random=True))

        # Create an NNTPContent Object pointing to our new data
        content = NNTPBinaryContent(tmp_file)

        # Create a Yenc Codec instance
        encoder = CodecUU(work_dir=self.test_dir)

        # This should produce our yEnc object now
        encoded = encoder.encode(content)
        assert isinstance(encoded, NNTPAsciiContent) is True

        # Now we want to decode the content we just encoded
        decoded = encoder.decode(encoded)

        # We should get a Binary Object in return
        assert isinstance(decoded, NNTPBinaryContent) is True

        # Our original content should be the same as our decoded
        # content
        assert(decoded.crc32() == content.crc32())
        assert(decoded.md5() == content.md5())
Exemple #2
0
    def test_yenc_v1_3_NNTPContent_encode(self):
        """
        Test the yEnc (v1.3) encoding of data (via NNTPContent)

        this is nessisary prior to a post
        """

        # A simple test for ensuring that the yEnc
        # library exists; otherwise we want this test
        # to fail; the below line will handle this for
        # us; we'll let the test fail on an import error
        import yenc

        # First we take a binary file
        binary_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(binary_filepath)

        # Initialize Codec
        encoder = CodecYenc(work_dir=self.test_dir)

        # Create an NNTPContent Object
        content = NNTPBinaryContent(binary_filepath, work_dir=self.test_dir)

        # Encode our content by object
        new_content_a = content.encode(encoder)

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content_a, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content_a) > 0

        # Encode our content by type
        new_content_b = content.encode(CodecYenc)

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content_b, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content_b) > 0

        # Our content should be the same when it was generated by both
        # methods
        assert new_content_a.md5() == new_content_b.md5()

        # Chain our encodings
        new_content = content.encode(
            [CodecYenc, CodecYenc(work_dir=self.test_dir)],
        )

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content) > 0
Exemple #3
0
    def test_yenc_v1_3_NNTPContent_encode(self):
        """
        Test the yEnc (v1.3) encoding of data (via NNTPContent)

        this is nessisary prior to a post
        """

        # A simple test for ensuring that the yEnc
        # library exists; otherwise we want this test
        # to fail; the below line will handle this for
        # us; we'll let the test fail on an import error
        import yenc

        # First we take a binary file
        binary_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(binary_filepath)

        # Initialize Codec
        encoder = CodecYenc(work_dir=self.test_dir)

        # Create an NNTPContent Object
        content = NNTPBinaryContent(binary_filepath, work_dir=self.test_dir)

        # Encode our content by object
        new_content_a = content.encode(encoder)

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content_a, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content_a) > 0

        # Encode our content by type
        new_content_b = content.encode(CodecYenc)

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content_b, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content_b) > 0

        # Our content should be the same when it was generated by both
        # methods
        assert new_content_a.md5() == new_content_b.md5()

        # Chain our encodings
        new_content = content.encode(
            [CodecYenc, CodecYenc(work_dir=self.test_dir)], )

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content) > 0
    def test_article_copy(self):
        """
        The copy() function built into the article allows you
        to create a duplicate copy of the original article without
        obstructing the content from within.
        """

        tmp_dir = join(self.tmp_dir, 'NNTPArticle_Test.test_article_copy')
        # First we create a 512K file
        tmp_file_01 = join(tmp_dir, 'file01.tmp')
        tmp_file_02 = join(tmp_dir, 'file02.tmp')

        # Allow our files to exist
        assert(self.touch(tmp_file_01, size='512K', random=True) is True)
        assert(self.touch(tmp_file_02, size='512K', random=True) is True)

        # Duplicates groups are are removed automatically
        article = NNTPArticle(
            subject='woo-hoo',
            poster='<*****@*****.**>',
            id='random-id',
            groups='alt.binaries.l2g',
            work_dir=self.tmp_dir,
        )

        # Store some content
        content = NNTPBinaryContent(
            filepath=tmp_file_01, part=1, work_dir=self.tmp_dir)
        assert(article.add(content) is True)
        content = NNTPBinaryContent(
            filepath=tmp_file_02, part=2, work_dir=self.tmp_dir)
        assert(article.add(content) is True)

        # Detect our 2 articles
        assert(len(article) == 2)

        # Set a few header entries
        article.header['Test'] = 'test'
        article.header['Another-Entry'] = 'test2'

        # Create a copy of our object
        article_copy = article.copy()

        assert(len(article_copy) == len(article))
        assert(len(article_copy.header) == len(article.header))

        # Make sure that if we obstruct 1 object it doesn't
        # effect the other (hence we should have a pointer to
        # the same location in memory
        article.header['Yet-Another-Entry'] = 'test3'
        assert(len(article_copy.header)+1 == len(article.header))
Exemple #5
0
    def test_mime(self):
        """
        Tests mime types on different types of content
        """
        ac = NNTPAsciiContent()
        bc = NNTPBinaryContent()

        # Mime Types aren't detectable with new files
        assert(ac.mime().type() == 'application/x-empty')
        assert(bc.mime().type() == 'application/x-empty')

        # Open up a jpeg
        bc = NNTPBinaryContent(join(self.var_dir, 'joystick.jpg'))
        assert(bc.mime().type() == 'image/jpeg')

        # Make a copy of our image as a different name
        assert(bc.save(join(self.tmp_dir, 'weird.name'), copy=True) is True)
        # We still know it's an image
        assert(bc.mime().type() == 'image/jpeg')

        # Create ourselves a new file
        tmp_file = join(self.tmp_dir, 'test.rar')
        assert(self.touch(tmp_file, size='2KB', random=True) is True)
        bc = NNTPBinaryContent(tmp_file)

        # Now we can guess the name from it's file type
        assert(bc.mime().type() == 'application/x-rar-compressed')
    def test_posting_content(self):
        """
        Tests the group variations
        """
        # Duplicates groups are are removed automatically
        article = NNTPArticle(
            subject='woo-hoo',
            poster='<*****@*****.**>',
            id='random-id',
            groups='alt.binaries.l2g',
            work_dir=self.tmp_dir,
        )

        # First we create a 512K file
        tmp_file = join(
            self.tmp_dir, 'NNTPArticle_Test.posting', 'file.tmp')

        # File should not already exist
        assert(isfile(tmp_file) is False)
        # Create a random file
        assert(self.touch(tmp_file, size='512K', random=True) is True)
        # File should exist now
        assert(isfile(tmp_file) is True)

        # Now we want to load it into a NNTPContent object
        content = NNTPBinaryContent(filepath=tmp_file, work_dir=self.tmp_dir)
        assert(article.add(content) is True)

        # Now we want to split the file up
        results = article.split('128K')
        # Tests that our results are expected
        assert(isinstance(results, sortedset) is True)
        assert(len(results) == 4)
    def test_nzbfile_generation(self):
        """
        Tests the creation of NZB Files
        """
        nzbfile = join(self.tmp_dir, 'test.nzbfile.nzb')
        payload = join(self.var_dir, 'uudecoded.tax.jpg')
        assert isfile(nzbfile) is False
        # Create our NZB Object
        nzbobj = NNTPnzb()

        # create a fake article
        segpost = NNTPSegmentedPost(basename(payload))
        content = NNTPBinaryContent(payload)

        article = NNTPArticle('testfile', groups='newsreap.is.awesome')

        # Note that our nzb object segment tracker is not marked as being
        # complete. This flag gets toggled when we add segments manually to
        # our nzb object or if we parse an NZB-File
        assert(nzbobj._segments_loaded is None)

        # Add our Content to the article
        article.add(content)
        # now add our article to the NZBFile
        segpost.add(article)
        # now add our Segmented Post to the NZBFile
        nzbobj.add(segpost)

        # Since .add() was called, this will be set to True now
        assert(nzbobj._segments_loaded is True)

        # Store our file
        assert nzbobj.save(nzbfile) is True
        assert isfile(nzbfile) is True
    def test_NNTPContent_encode(self):
        """
        Test the encoding of data; this is nessisary prior to a post
        """

        # First we take a binary file
        binary_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(binary_filepath)

        # Initialize Codec
        encoder = CodecUU(work_dir=self.test_dir)

        # Create an NNTPContent Object
        content = NNTPBinaryContent(binary_filepath)

        # Encode our content by object
        new_content_a = content.encode(encoder)

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content_a, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content_a) > 0

        # Encode our content by type
        new_content_b = content.encode(CodecUU)

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content_b, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content_b) > 0

        # Our content should be the same when it was generated by both
        # methods
        assert new_content_a.md5() == new_content_b.md5()

        # Chain our encodings
        new_content = content.encode(
            [CodecUU, CodecUU(work_dir=self.test_dir)],
        )

        # We should have gotten an ASCII Content Object
        assert isinstance(new_content, NNTPAsciiContent) is True

        # We should actually have content associated with out data
        assert len(new_content) > 0
    def test_article_splitting(self):
        """
        Tests that articles can split
        """
        # Duplicates groups are are removed automatically
        article = NNTPArticle(
            work_dir=self.tmp_dir,
            subject='split-test',
            poster='<*****@*****.**>',
            groups='alt.binaries.l2g',
        )

        # Nothing to split gives an error
        assert(article.split() is None)

        tmp_file = join(self.tmp_dir, 'NNTPArticle_Test.chunk', '1MB.rar')
        # The file doesn't exist at first
        assert(isfile(tmp_file) is False)
        # Create it
        assert(self.touch(tmp_file, size='1MB', random=True) is True)
        # Now it does
        assert(isfile(tmp_file) is True)

        # Now we want to load it into a NNTPContent object
        content = NNTPBinaryContent(filepath=tmp_file, work_dir=self.tmp_dir)

        # Add our object to our article
        assert(article.add(content) is True)

        # No size to split on gives an error
        assert(article.split(size=0) is None)
        assert(article.split(size=-1) is None)
        assert(article.split(size=None) is None)
        assert(article.split(size='bad_string') is None)

        # Invalid Memory Limit
        assert(article.split(mem_buf=0) is None)
        assert(article.split(mem_buf=-1) is None)
        assert(article.split(mem_buf=None) is None)
        assert(article.split(mem_buf='bad_string') is None)

        # We'll split it in 2
        results = article.split(strsize_to_bytes('512K'))

        # Tests that our results are expected
        assert(isinstance(results, sortedset) is True)
        assert(len(results) == 2)

        # Test that the parts were assigned correctly
        for i, article in enumerate(results):
            # We should only have one content object
            assert(isinstance(article, NNTPArticle) is True)
            assert(len(article) == 1)
            # Our content object should correctly have the part and
            # total part contents populated correctly
            assert(article[0].part == (i+1))
            assert(article[0].total_parts == len(results))
Exemple #10
0
    def test_yenc_v1_3_NNTPArticle_encode_02(self):
        """
        Test the encoding of fresh new data
        """

        # A simple test for ensuring that the yEnc
        # library exists; otherwise we want this test
        # to fail; the below line will handle this for
        # us; we'll let the test fail on an import error
        import yenc

        # Our private Key Location
        tmp_file = join(
            self.tmp_dir,
            'test_yenc_v1_3_NNTPArticle_encode_02.tmp',
        )

        # Create a larger file
        assert(self.touch(tmp_file, size='1M', random=True))

        # Create an NNTPContent Object pointing to our new data
        content = NNTPBinaryContent(tmp_file)

        # Create a Yenc Codec instance
        encoder = CodecYenc(work_dir=self.test_dir)

        # This should produce our yEnc object now
        encoded = encoder.encode(content)
        assert isinstance(encoded, NNTPAsciiContent) is True

        # Now we want to decode the content we just encoded
        decoded = encoder.decode(encoded)

        # We should get a Binary Object in return
        assert isinstance(decoded, NNTPBinaryContent) is True

        # Our original content should be the same as our decoded
        # content
        assert(decoded.crc32() == content.crc32())
        assert(decoded.md5() == content.md5())
Exemple #11
0
    def test_yenc_v1_3_NNTPArticle_encode_02(self):
        """
        Test the encoding of fresh new data
        """

        # A simple test for ensuring that the yEnc
        # library exists; otherwise we want this test
        # to fail; the below line will handle this for
        # us; we'll let the test fail on an import error
        import yenc

        # Our private Key Location
        tmp_file = join(
            self.tmp_dir,
            'test_yenc_v1_3_NNTPArticle_encode_02.tmp',
        )

        # Create a larger file
        assert (self.touch(tmp_file, size='1M', random=True))

        # Create an NNTPContent Object pointing to our new data
        content = NNTPBinaryContent(tmp_file)

        # Create a Yenc Codec instance
        encoder = CodecYenc(work_dir=self.test_dir)

        # This should produce our yEnc object now
        encoded = encoder.encode(content)
        assert isinstance(encoded, NNTPAsciiContent) is True

        # Now we want to decode the content we just encoded
        decoded = encoder.decode(encoded)

        # We should get a Binary Object in return
        assert isinstance(decoded, NNTPBinaryContent) is True

        # Our original content should be the same as our decoded
        # content
        assert (decoded.crc32() == content.crc32())
        assert (decoded.md5() == content.md5())
Exemple #12
0
    def test_binary_article_iterations(self):
        """
        Binary Content can be loaded straight from file and can be processed
        in a for loop.
        """

        # Create a BytesIO Object
        bobj = BytesIO()

        # Fill our BytesIO object with random junk at least
        # 4x our expected block size
        for _ in range(4):
            bobj.write(urandom(BLOCK_SIZE))

        # Write just '1' more bytes so we ``overflow`` and require
        # a 5th query later
        bobj.write('0')

        # Content
        ba = NNTPBinaryContent()

        # No items means not valid
        assert (ba.is_valid() is False)

        assert (ba.load('unknown_file') is False)

        # a failed load means not valid
        assert (ba.is_valid() is False)

        temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp')

        with open(temp_file, 'wb') as fd:
            fd.write(bobj.getvalue())

        assert (isfile(temp_file) is True)

        assert (ba.load(temp_file) is True)

        # Binary Content read by chunk size
        chunk = 4
        for line in ba:
            if chunk > 0:
                assert (len(line) == BLOCK_SIZE)
            else:
                # 5th query
                assert (len(line) == 1)
            chunk -= 1

        # We should have performed 5 chunk requests and
        # -1 more since we decrement the chunk one last time
        # before we're done
        assert (chunk == -1)

        # Confirm our size is reading correctly too
        assert (len(ba) == (BLOCK_SIZE * 4) + 1)

        # Remove article
        del ba

        # Files are not attached by default so our temp file
        # should still exist
        assert (isfile(temp_file) is True)

        # We'll create another object
        ba = NNTPAsciiContent()
        assert (ba.load(temp_file) is True)
        # Successfully loaded files are never attached
        assert (ba.is_attached() is False)
        # our file still exists of course
        assert (isfile(temp_file) is True)
        # we'll detach it
        ba.detach()
        # Still all is good
        assert (isfile(temp_file) is True)
        # Check that we're no longer attached
        assert (ba.is_attached() is False)
        # Now, once we delete our object, the file will be gone for good
        del ba
        # it's gone for good
        assert (isfile(temp_file) is True)
Exemple #13
0
    def test_general_features(self):
        """
        Detaching makes managing a file no longer managed by this
        NNTPContent. Test that this works

        """
        # No parameters should create a file
        aa = NNTPAsciiContent()
        ba = NNTPBinaryContent()

        # open a temporary file
        aa.open()
        ba.open()

        # Test Files
        aa_filepath = aa.filepath
        ba_filepath = ba.filepath
        assert (isfile(aa_filepath) is True)
        assert (isfile(ba_filepath) is True)

        # Test Length
        assert (len(aa) == 0)
        assert (len(ba) == 0)

        # Test that files are destroyed if the object is
        del aa
        del ba

        # Files are destroyed
        assert (isfile(aa_filepath) is False)
        assert (isfile(ba_filepath) is False)

        # Test some parameters out during initialization
        aa = NNTPAsciiContent(
            filepath="ascii.file",
            part=2,
            work_dir=self.tmp_dir,
        )

        ba = NNTPBinaryContent(
            filepath="binary.file",
            part="10",
            work_dir=self.tmp_dir,
        )

        # Check our parts
        assert (aa.part == 2)

        # Strings are converted okay
        assert (ba.part == 10)

        # open a temporary file
        aa.open()
        ba.open()

        # files don't exist yet
        assert (isfile(join(self.tmp_dir, "binary.file")) is False)
        assert (isfile(join(self.tmp_dir, "ascii.file")) is False)

        # Grab a copy of these file paths so we can check them later
        aa_filepath = aa.filepath
        ba_filepath = ba.filepath

        # Save our content
        aa.save()
        ba.save()

        # check that it was created okay
        assert (isfile(join(self.tmp_dir, "binary.file")) is True)
        assert (isfile(join(self.tmp_dir, "ascii.file")) is True)

        # Temporary files are gone (moved from the save() command above)
        assert (isfile(aa_filepath) is False)
        assert (isfile(ba_filepath) is False)

        # They were never the same after the save()
        assert (aa_filepath != aa.filepath)
        assert (ba_filepath != ba.filepath)

        # However after save is called; the filepath is updated to reflect
        # the proper path; so this is still true
        assert (isfile(aa.filepath) is True)
        assert (isfile(ba.filepath) is True)

        # Even after the objects are gone
        del aa
        del ba

        # Files still exist even after the objects displayed
        assert (isfile(join(self.tmp_dir, "binary.file")) is True)
        assert (isfile(join(self.tmp_dir, "ascii.file")) is True)

        # Cleanup
        unlink(join(self.tmp_dir, "ascii.file"))
        unlink(join(self.tmp_dir, "binary.file"))
Exemple #14
0
    def decode(self, content=None, name=None, password=None, *args, **kwargs):
        """
        content must be pointing to a directory containing 7-Zip files that can
        be easily sorted on. Alternatively, path can be of type NNTPContent()
        or a set/list of.

        If no password is specified, then the password configuration loaded
        into the class is used instead.

        An NNTPBinaryContent() object containing the contents of the package
        within a sortedset() object.  All decoded() functions have to return
        a resultset() to be consistent with one another.

        """
        if content is not None:
            self.add(content)

        # Some simple error checking to save from doing to much here
        if len(self) == 0:
            return None

        if not self.can_exe(self._bin):
            return None

        if not password:
            password = self.password

        # Initialize our command
        execute = [
            # Our Executable 7-Zip Application
            self._bin,
            # Use Add Flag
            'x',
            # Assume Yes
            '-y',
        ]

        # Password Protection
        if password is not None:
            execute.append('-p%s' % password)
        else:
            # Do not prompt for password
            execute.append('-p-')

        if self.overwrite:
            # Overwrite files
            execute.append('-aoa')

        else:
            # Don't overwrite files
            execute.append('-aos')

        # Stop Switch Parsing
        execute.append('--')

        if not name:
            name = self.name
            if not name:
                name = random_str()

        for _path in self:
            # Temporary Path
            tmp_path, _ = self.mkstemp(content=name)

            with pushd(tmp_path):
                # Create our SubProcess Instance
                sp = SubProcess(list(execute) + [_path])

                # Start our execution now
                sp.start()

                found_set = None
                while not sp.is_complete(timeout=1.5):

                    found_set = self.watch_dir(
                        tmp_path,
                        ignore=found_set,
                    )

                # Handle remaining content
                found_set = self.watch_dir(
                    tmp_path,
                    ignore=found_set,
                    seconds=-1,
                )

                # Let the caller know our status
                if not sp.successful():
                    # Cleanup Temporary Path
                    rm(tmp_path)
                    return None

                if not len(found_set):
                    logger.warning(
                        '7Z archive (%s) contained no content.' %
                        basename(_path), )

        # Clean our are list of objects to archive
        self.clear()

        # Return path containing unrar'ed content
        results = NNTPBinaryContent(tmp_path)

        # We intentionally attach it's content
        results.attach()

        # Create a sortedset to return
        _resultset = sortedset(key=lambda x: x.key())
        _resultset.add(results)

        # Return our content
        return _resultset
    def test_loading_response(self):
        """
        Tests the load() function of the article
        """

        # Prepare a Response
        response = NNTPResponse(200, 'Great Data')
        response.decoded.add(NNTPBinaryContent(work_dir=self.tmp_dir))

        # Prepare Article
        article = NNTPArticle(id='random-id', work_dir=self.tmp_dir)

        # There is no data so our article can't be valid
        assert(article.is_valid() is False)

        # Load and Check
        assert(article.load(response) is True)
        assert(article.header is None)
        assert(len(article.decoded) == 1)
        assert(len(article.decoded) == len(article.files()))
        assert(str(article) == 'random-id')
        assert(unicode(article) == u'random-id')
        assert(article.size() == 0)

        # Now there is data, but it's an empty Object so it can't be valid
        assert(article.is_valid() is False)

        result = re.search(' Message-ID=\"(?P<id>[^\"]+)\"', repr(article))
        assert(result is not None)
        assert(result.group('id') == str(article))

        result = re.search(' attachments=\"(?P<no>[^\"]+)\"', repr(article))
        assert(result is not None)
        assert(int(result.group('no')) == len(article))

        # Prepare Article
        article_a = NNTPArticle(id='a', work_dir=self.tmp_dir)
        article_b = NNTPArticle(id='b', work_dir=self.tmp_dir)
        assert((article_a < article_b) is True)

        # playing with the sort order however alters things
        article_a.no += 1
        assert((article_a < article_b) is False)

        # Prepare a Response (with a Header)
        response = NNTPResponse(200, 'Great Data')
        response.decoded.add(NNTPHeader(work_dir=self.tmp_dir))
        response.decoded.add(NNTPBinaryContent(work_dir=self.tmp_dir))

        # Prepare Article
        article = NNTPArticle(id='random-id', work_dir=self.tmp_dir)

        # Load and Check
        assert(article.load(response) is True)
        assert(isinstance(article.header, NNTPHeader))
        assert(len(article.decoded) == 1)

        for no, decoded in enumerate(article.decoded):
            # Test equality
            assert(article[no] == decoded)

        # We can also load another article ontop of another
        # This used when associating downloaded articles with ones
        # found in NZB-Files
        new_article = NNTPArticle(
            msgid='brand-new-id',
            no=article.no+1,
            groups='a.b.c,d.e.f',
            work_dir=self.tmp_dir,
        )
        new_article.subject = 'test-subject-l2g'
        new_article.poster = 'test-poster-l2g'
        new_article.header = 'test-header-l2g'

        assert(article.load(new_article) is True)
        assert(article.id == new_article.id)
        assert(article.no == new_article.no)
        assert(article.groups == new_article.groups)
        assert(article.poster == new_article.poster)
        assert(article.subject == new_article.subject)
        assert(article.header == new_article.header)
        assert(article.body == new_article.body)
        assert(article.decoded == new_article.decoded)
        assert(article.groups == new_article.groups)
Exemple #16
0
    def encode(self, content=None, *args, **kwargs):
        """
        Takes a specified path (and or file) and creates par2 files based on
        it. If this function is successful, it returns a set of
        NNTPBinaryContent() objects identifying the PAR2 files generated
        based on the passed in content.

        The function returns None if it fails in any way.

        """

        if content is not None:
            self.add(content)

        # Some simple error checking to save from doing to much here
        if len(self) == 0:
            return None

        if not self.can_exe(self._par):
            return None

        for target in self.archive:
            # Base entry on first file in the list
            name = basename(target)
            target_dir = dirname(target)

            # tmp_path, tmp_file = self.mkstemp(content=name, suffix='.par2')

            # Initialize our command
            execute = [
                # Our Executable PAR Application
                self._par,
                # Use Create Flag
                'create',
            ]

            # Handle PAR Block Size
            if self.block_size:
                execute.append('-s%s' % self.block_size)

            if self.recovery_percent:
                execute.append('-r%d' % self.recovery_percent)

            if self.cpu_cores is not None and self.cpu_cores > 1:
                # to repair concurrently - uses multiple threads
                execute.append('-t+')

            # Stop Switch Parsing
            execute.append('--')

            # Now add our target (we can only do one at a time which i why we
            # loop) and run our setups
            execute.append(target)

            found_set = sortedset()
            with pushd(target_dir):
                # Create our SubProcess Instance
                sp = SubProcess(execute)

                # Start our execution now
                sp.start()

                while not sp.is_complete(timeout=1.5):

                    found_set = self.watch_dir(
                        target_dir,
                        prefix=name,
                        regex=PAR_PART_RE,
                        ignore=found_set,
                    )

            # Handle remaining content
            found_set = self.watch_dir(
                target_dir,
                prefix=name,
                regex=PAR_PART_RE,
                ignore=found_set,
                seconds=-1,
            )

            # Let the caller know our status
            if not sp.successful():
                # We're done; we failed
                return None

            if not len(found_set):
                # We're done; we failed
                return None

            # Create a resultset
            results = sortedset(key=lambda x: x.key())

            part = 0
            # iterate through our found_set and create NNTPBinaryContent()
            # objects from them.
            for path in found_set:
                # Iterate over our found files and determine their part
                # information
                part += 1
                content = NNTPBinaryContent(
                    path,
                    part=part,
                    total_parts=len(found_set),
                )

                # Loaded data is by default detached; we want to attach it
                content.attach()

                # Add our attached content to our results
                results.add(content)

        # Clean our are list of objects to archive
        self.clear()

        # Return our
        return results
    def test_encrytion(self):
        """
        Test te encryption and decryption of data

        """

        # Create our Cryptography Object
        obj = NNTPCryptography()

        # We can't save if we haven't created keys yet
        assert (obj.save() is False)

        # Generate our keys
        (prv, pub) = obj.genkeys()

        # Check that they're stored
        assert (prv, pub) == obj.keys()

        # Test small content first
        content = 'newsreap'

        # Let's encrypt our content
        encrypted = obj.encrypt(content)

        # Decrypt it now:
        decrypted = obj.decrypt(encrypted)

        # Test it out
        assert (str(content) == str(decrypted))

        # Note that the Hash value is important as encryption
        # and decryption will fail otherwise
        encrypted = obj.encrypt(
            content,
            alg=HashType.SHA512,
            mgf1=HashType.SHA512,
        )
        # Returns None in all cases below because either the alg
        assert (obj.decrypt(
            encrypted, alg=HashType.SHA256, mgf1=HashType.SHA512) is None)
        assert (obj.decrypt(
            encrypted, alg=HashType.SHA512, mgf1=HashType.SHA256) is None)
        assert (obj.decrypt(encrypted, alg=HashType.SHA384, mgf1=HashType.SHA1)
                is None)

        # However if we use the right hash
        decrypted = obj.decrypt(
            encrypted,
            alg=HashType.SHA512,
            mgf1=HashType.SHA512,
        )

        # It will succeed again
        assert (str(content) == str(decrypted))

        # Our private Key Location
        tmp_file = join(self.tmp_dir, 'NNTPCryptography.test_encrytion.tmp')

        # Let's create a slightly larger file; one we'll need to process
        # in chunks
        assert (self.touch(tmp_file, size='128KB', random=True))

        # We'll yEnc the file since we can't deal with binary
        # Create an NNTPContent Object
        content = NNTPBinaryContent(tmp_file)

        # We need to iterate over all of our possible compression types
        # so that we can test that the chunk sizes are valid in all cases
        # This big O(n2) will test all of our supported operations
        for alg in CRYPTOGRAPHY_HASH_MAP.keys():
            for mgf1 in CRYPTOGRAPHY_HASH_MAP.keys():

                # Create our Cryptography Object
                obj = NNTPCryptography(alg=alg, mgf1=mgf1)

                # We can't save if we haven't created keys yet
                assert (obj.save() is False)

                # Generate our keys
                (prv, pub) = obj.genkeys()

                encoder = CodecUU(work_dir=self.test_dir)
                response = encoder.encode(content)

                # We should have gotten an ASCII Content Object
                assert (len(response) > 0)

                with open(response.filepath, 'rb') as f:
                    # Any chunk size higher then 190 doesn't seem to work
                    for chunk in iter(lambda: f.read(obj.chunk_size()), b''):
                        # Let's encrypt our content
                        encrypted = obj.encrypt(chunk)
                        assert (encrypted is not None)

                        # Decrypt it now:
                        decrypted = obj.decrypt(encrypted)

                        # Test it out
                        assert (str(chunk) == str(decrypted))
Exemple #18
0
    def test_general_features(self):
        """
        Detaching makes managing a file no longer managed by this
        NNTPContent. Test that this works

        """
        # No parameters should create a file
        aa = NNTPAsciiContent()
        ba = NNTPBinaryContent()

        # open a temporary file
        aa.open()
        ba.open()

        # Test Files
        aa_filepath = aa.filepath
        ba_filepath = ba.filepath
        assert(isfile(aa_filepath) is True)
        assert(isfile(ba_filepath) is True)

        # Test Length
        assert(len(aa) == 0)
        assert(len(ba) == 0)

        # Test that files are destroyed if the object is
        del aa
        del ba

        # Files are destroyed
        assert(isfile(aa_filepath) is False)
        assert(isfile(ba_filepath) is False)

        # Test some parameters out during initialization
        aa = NNTPAsciiContent(
            filepath="ascii.file",
            part=2,
            work_dir=self.tmp_dir,
        )

        ba = NNTPBinaryContent(
            filepath="binary.file",
            part="10",
            work_dir=self.tmp_dir,
        )

        # Check our parts
        assert(aa.part == 2)

        # Strings are converted okay
        assert(ba.part == 10)

        # open a temporary file
        aa.open()
        ba.open()

        # files don't exist yet
        assert(isfile(join(self.tmp_dir, "binary.file")) is False)
        assert(isfile(join(self.tmp_dir, "ascii.file")) is False)

        # Grab a copy of these file paths so we can check them later
        aa_filepath = aa.filepath
        ba_filepath = ba.filepath

        # Save our content
        aa.save()
        ba.save()

        # check that it was created okay
        assert(isfile(join(self.tmp_dir, "binary.file")) is True)
        assert(isfile(join(self.tmp_dir, "ascii.file")) is True)

        # Temporary files are gone (moved from the save() command above)
        assert(isfile(aa_filepath) is False)
        assert(isfile(ba_filepath) is False)

        # They were never the same after the save()
        assert(aa_filepath != aa.filepath)
        assert(ba_filepath != ba.filepath)

        # However after save is called; the filepath is updated to reflect
        # the proper path; so this is still true
        assert(isfile(aa.filepath) is True)
        assert(isfile(ba.filepath) is True)

        # Even after the objects are gone
        del aa
        del ba

        # Files still exist even after the objects displayed
        assert(isfile(join(self.tmp_dir, "binary.file")) is True)
        assert(isfile(join(self.tmp_dir, "ascii.file")) is True)

        # Cleanup
        unlink(join(self.tmp_dir, "ascii.file"))
        unlink(join(self.tmp_dir, "binary.file"))
Exemple #19
0
    def decode(self, content=None, *args, **kwargs):
        """
        content must be pointing to a directory containing par files that can
        be easily retrieved. Alternatively, path can be of type NNTPContent()
        or a set/list of.

        An sortedset of NNTPBinaryContent() objects are returned containing
        any new content that was generated as a result of the par2 call

        If an error occurs then None is returned.

        """
        if content is not None:
            self.add(content)

        # Some simple error checking to save from doing to much here
        if len(self) == 0:
            return None

        if not self.can_exe(self._par):
            return None

        # filter our results by indexes
        indexes = self.__filter_pars(self.archive, indexes=True, volumes=False)

        # Initialize our command
        execute = [
            # Our Executable PAR Application
            self._par,
            # Use Repair
            'repair',
        ]

        if self.cpu_cores is not None and self.cpu_cores > 1:
            # to repair concurrently - uses multiple threads
            execute.append('-t+')

        # Stop Switch Parsing
        execute.append('--')

        results = sortedset(key=lambda x: x.key())
        for _path in indexes:

            # Get the directory the par file resides in
            par_path = dirname(_path)

            with pushd(par_path):
                # create a before snapshot
                before_snapshot = self.watch_dir(
                    par_path,
                    seconds=-1,
                )

                # Create our SubProcess Instance
                sp = SubProcess(list(execute) + [basename(_path)])

                # Start our execution now
                sp.start()

                # Track files after
                after_snapshot = sortedset()
                while not sp.is_complete(timeout=1.5):

                    after_snapshot = self.watch_dir(
                        par_path,
                        ignore=after_snapshot,
                    )

                # Handle remaining content
                after_snapshot = self.watch_dir(
                    par_path,
                    ignore=after_snapshot,
                    seconds=-1,
                )

                # Add any new files detected to our result set otherwise we
                # just return an empty set
                total_parts = after_snapshot - before_snapshot
                for no, path in enumerate(total_parts):
                    content = NNTPBinaryContent(
                        path,
                        part=no+1,
                        total_parts=len(total_parts),
                    )
                    # Loaded data is by default detached; we want to attach it
                    content.attach()

                    # Add our attached content to our results
                    results.add(content)

                # Let the caller know our status
                if not sp.successful():
                    return None

        # Clean our are list of objects to archive
        self.clear()

        return results
Exemple #20
0
    def test_decoding_yenc_multi_part(self):
        """
        Test decoding of a yEnc multi-part

        This test was generated after visiting http://www.yenc.org and finding
        the examples they provide on their site.

            Downloaded the following zip file:
                http://www.yenc.org/yenc2.zip

            Then extracting it revealed 3 files:
                - 00000020.ntx
                    This is the yEnc file as it would have been seen after
                    being downloaded from the NNTP server (part 1 of 2)

                - 00000021.ntx
                    This is the yEnc file as it would have been seen after
                    being downloaded from the NNTP server (part 2 of 2)

                - joystick.jpg
                    This is what the contents of the file should look like
                    after being decoded (and assembled). This is what we use
                    to test the file against.
        """

        # A simple test for ensuring that the yEnc
        # library exists; otherwise we want this test
        # to fail; the below line will handle this for
        # us; we'll let the test fail on an import error
        import yenc

        # Input File
        encoded_filepath_1 = join(self.var_dir, '00000020.ntx')
        encoded_filepath_2 = join(self.var_dir, '00000021.ntx')

        assert isfile(encoded_filepath_1)
        assert isfile(encoded_filepath_2)

        # Compare File
        decoded_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(decoded_filepath)

        # Python Solution
        fd1_py = BytesIO()
        fd2_py = BytesIO()

        # C Solution
        fd1_c = BytesIO()
        fd2_c = BytesIO()

        # Initialize Codec
        decoder = CodecYenc(work_dir=self.test_dir)

        contents_py = []
        contents_c = []

        # Force to operate in python (manual/slow) mode
        CodecYenc.FAST_YENC_SUPPORT = False
        with open(encoded_filepath_1, 'r') as fd_in:
            contents_py.append(decoder.decode(fd_in))
        with open(encoded_filepath_2, 'r') as fd_in:
            contents_py.append(decoder.decode(fd_in))

        for x in contents_py:
            # Verify our data is good
            assert x.is_valid() is True

        # Force to operate with the C extension yEnc
        # This require the extensions to be installed
        # on the system
        CodecYenc.FAST_YENC_SUPPORT = True
        with open(encoded_filepath_1, 'r') as fd_in:
            contents_c.append(decoder.decode(fd_in))
        with open(encoded_filepath_2, 'r') as fd_in:
            contents_c.append(decoder.decode(fd_in))

        for x in contents_c:
            # Verify our data is good
            assert x.is_valid() is True

        # Confirm that our output from our python implimentation
        # matches that of our yEnc C version.
        assert fd1_py.tell() == fd1_c.tell()
        assert fd2_py.tell() == fd2_c.tell()

        with open(decoded_filepath, 'r') as fd_in:
            decoded = fd_in.read()

        # Assemble (TODO)
        contents_py.sort()
        contents_c.sort()

        content_py = NNTPBinaryContent(
            filepath=contents_py[0].filename,
            save_dir=self.out_dir,
        )
        content_c = NNTPBinaryContent(
            filepath=contents_c[0].filename,
            save_dir=self.out_dir,
        )

        # append() takes a list or another NNTPContent
        # and appends it's content to the end of the content
        content_py.append(contents_py)
        content_c.append(contents_py)

        assert len(content_py) == len(decoded)
        assert len(content_c) == len(decoded)

        # Compare our processed content with the expected results
        assert content_py.getvalue() == decoded
        assert content_c.getvalue() == decoded
Exemple #21
0
    def test_binary_article_iterations(self):
        """
        Binary Content can be loaded straight from file and can be processed
        in a for loop.
        """

        # Create a BytesIO Object
        bobj = BytesIO()

        # Fill our BytesIO object with random junk at least
        # 4x our expected block size
        for _ in range(4):
            bobj.write(urandom(BLOCK_SIZE))

        # Write just '1' more bytes so we ``overflow`` and require
        # a 5th query later
        bobj.write('0')

        # Content
        ba = NNTPBinaryContent()

        # No items means not valid
        assert(ba.is_valid() is False)

        assert(ba.load('unknown_file') is False)

        # a failed load means not valid
        assert(ba.is_valid() is False)

        temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp')

        with open(temp_file, 'wb') as fd:
            fd.write(bobj.getvalue())

        assert(isfile(temp_file) is True)

        assert(ba.load(temp_file) is True)

        # Binary Content read by chunk size
        chunk = 4
        for line in ba:
            if chunk > 0:
                assert(len(line) == BLOCK_SIZE)
            else:
                # 5th query
                assert(len(line) == 1)
            chunk -= 1

        # We should have performed 5 chunk requests and
        # -1 more since we decrement the chunk one last time
        # before we're done
        assert(chunk == -1)

        # Confirm our size is reading correctly too
        assert(len(ba) == (BLOCK_SIZE*4)+1)

        # Remove article
        del ba

        # Files are not attached by default so our temp file
        # should still exist
        assert(isfile(temp_file) is True)

        # We'll create another object
        ba = NNTPAsciiContent()
        assert(ba.load(temp_file) is True)
        # Successfully loaded files are never attached
        assert(ba.is_attached() is False)
        # our file still exists of course
        assert(isfile(temp_file) is True)
        # we'll detach it
        ba.detach()
        # Still all is good
        assert(isfile(temp_file) is True)
        # Check that we're no longer attached
        assert(ba.is_attached() is False)
        # Now, once we delete our object, the file will be gone for good
        del ba
        # it's gone for good
        assert(isfile(temp_file) is True)
Exemple #22
0
class CodecYenc(CodecBase):
    def __init__(self,
                 descriptor=None,
                 work_dir=None,
                 linelen=128,
                 *args,
                 **kwargs):
        super(CodecYenc, self).__init__(descriptor=descriptor,
                                        work_dir=work_dir,
                                        *args,
                                        **kwargs)

        # Used for internal meta tracking when using the decode()
        self._meta = {}

        # Our Binary Object we can reference while we decode
        # content
        self.decoded = None

        # Used for encoding; This defines the maximum number of (encoded)
        # characters to display per line.
        self.linelen = linelen

    def parse_article(self, subject, *args, **kwargs):
        """
        Takes a an article header and returns it's parsed content if it's
        successful. Otherwise it returns None.
        """

        matched = NZB_SUBJECT_PARSE.match(subject)
        if matched is None:
            # subject is not parsable
            return None

        results = {}

        # Trim results
        if matched.group('desc') is not None:
            results['desc'] = re.sub('[\s-]+$', '', matched.group('desc'))
        if matched.group('fname') is not None:
            results['fname'] = matched.group('fname').strip()

        # Support conversion of integers
        for _attr in ['index', 'count', 'yindex', 'ycount', 'size']:
            if matched.group(_attr) is not None:
                results[_attr] = int(matched.group(_attr))

        return results

    def encode(self, content, mem_buf=DEFAULT_BUFFER_SIZE):
        """
        Encodes an NNTPContent object passed in
        """

        if isinstance(content, NNTPContent):
            # Create our ascii instance
            _encoded = NNTPAsciiContent(
                filepath=content.filename,
                part=content.part,
                total_parts=content.total_parts,
                sort_no=content.sort_no,
                work_dir=self.work_dir,
                # We want to ensure we're working with a unique attached file
                unique=True,
            )

        else:
            # If we reach here, we presume our content is a filename

            # Create our ascii instance
            _encoded = NNTPAsciiContent(
                filepath=content,
                work_dir=self.work_dir,
                # We want to ensure we're working with a unique attached file
                unique=True,
            )

            # Convert our content object into an NNTPContent object
            content = NNTPContent(
                filepath=content,
                work_dir=self.work_dir,
            )

        # yEnc (v1.3) begin
        fmt_ybegin = '=ybegin part=%d total=%d line=%d size=%d name=%s' % (
            content.part,
            content.total_parts,
            self.linelen,
            len(content),
            content.filename,
        )

        # yEnc part
        fmt_ypart = '=ypart begin=%d end=%d' % (
            content.begin() + 1,
            content.end(),
        )

        if isinstance(content._parent, NNTPContent):
            # yEnc end
            fmt_yend = '=yend size=%d part=%d pcrc32=%s crc32=%s' % (
                len(content),
                content.part,
                content.crc32(),
                content._parent.crc32(),
            )

        else:
            # yEnc end
            fmt_yend = '=yend size=%d part=%d pcrc32=%s' % (
                len(content),
                content.part,
                content.crc32(),
            )

        # Write =ybegin line
        _encoded.write(fmt_ybegin + EOL)
        # Write =ypart line
        _encoded.write(fmt_ypart + EOL)

        if not content.open():
            return None

        # Prepare our result set
        results = ""

        # Column is used for decoding
        column = 0
        crc = BIN_MASK

        # We need to parse the content until we either reach
        # the end of the file or get to an 'end' tag
        while True:
            # Read in our data
            data = content.stream.read(mem_buf)
            if not data:
                # We're done
                break

            if FAST_YENC_SUPPORT:
                try:
                    _results, crc, column = encode_string(data, crc, column)
                    # Append our parsed content onto our ongoing buffer
                    results += _results

                except YencError as e:
                    logger.error("Failed to encode Yenc for %s." % content)
                    logger.debug('Yenc exception: %s' % (str(e)))
                    return None

            else:
                # The slow and painful way, the below looks complicated
                # but it really isn't at the the end of the day; yEnc is
                # pretty basic;
                #  - first we translate the all of the characters by adding
                #    42 to their value with the exception of a few special
                #    characters that are explicitly reserved for the yEnc
                #    language (and conflict with the NNTP Server language).
                #
                #  - next, we need to apply our ENCODE_SPECIAL_MAP to be
                #    sure to handle the characters that are reserved as
                #    special keywords used by both NNTP Servers and the yEnc
                #    protocol itself.
                #
                #  - finally we want to prevent our string from going on for
                #    to many characters (horizontally).  So we need to split
                #    our content up
                #

                idx = 0
                while idx < len(data):
                    _byte = (ord(data[idx]) + 42) & 0xff
                    if _byte in YENC_ENCODE_ESCAPED_CHARACTERS:
                        _byte = (_byte + 64) & 0xff
                        # Esape Sequence
                        results += '='

                    # Store our character
                    results += chr(_byte)

                    # Increment Index
                    idx += 1

            # Our offset
            offset = 0

            while offset < (len(results) - self.linelen + 1):
                eol = offset + self.linelen
                if results[offset:eol][-1] == '=':
                    # Lines can't end with the escape sequence (=). If we get
                    # here then this one did. We just adjust our end-of-line
                    # by 1 and keep moving
                    eol -= 1

                _encoded.write(results[offset:eol] + EOL)
                offset = eol

            if offset < len(results):
                results = results[-(len(results) - offset):]

            else:
                # reset string
                results = ''

        # We're done reading our data
        content.close()

        if len(results):
            # We still have content left in our buffer
            _encoded.write(results + EOL)

        # Write footer
        _encoded.write(fmt_yend + EOL)

        if _encoded:
            # close article when complete
            _encoded.close()

        # Return our encoded object
        return _encoded

    def detect(self, line, relative=True):
        """
        A Simple function that can be used to determine if there is
        yEnc content on the line being checked.

        If relative is set to true, we additionally check the line
        content against content relative to the decoding process (`What are
        we expecting to have right now?`). For example, the `end` token would
        be ignored if we haven't received a `begin` first.

        It returns None if there is no yEnc key line, otherwise
        it returns a dictionary of the keys and their mapped values.

        """
        yenc_re = YENC_RE.match(line)
        if not yenc_re:
            return None

        # Merge Results
        f_map = dict((YENC_KEY_MAP[k], v)
                     for k, v in yenc_re.groupdict().iteritems() if v)

        # Tidy filename (whitespace)
        if 'name' in f_map:
            f_map['name'] = basename(f_map['name']).strip()

        if relative:
            # detect() relative to what has been decoded
            if f_map['key'] in self._meta:
                # We already processed this key
                return None

            if f_map['key'] == 'end' and 'begin' not in self._meta:
                # We can't handle this key
                return None

            if f_map['key'] == 'part' and 'begin' not in self._meta:
                # We can't handle this key
                return None

        # Integer types
        for kw in ['line', 'size', 'total', 'begin', 'end', 'part']:
            if kw in f_map:
                try:
                    f_map[kw] = int(f_map[kw])

                except (TypeError, ValueError):
                    # Eliminate bad kw
                    del f_map[kw]

        return f_map

    def decode(self, stream):
        """ Decode some data and decode the data
            to descriptor identified (by the stream)
        """

        # We need to parse the content until we either reach
        # the end of the file or get to an 'end' tag
        while self.decode_loop():
            # fall_back ptr
            ptr = stream.tell()

            # Read in our data
            data = stream.readline()
            if not data:
                # We're done for now
                return True

            # Total Line Tracking
            self._total_lines += 1

            # Detect a yEnc line
            _meta = self.detect(data, relative=False)
            if _meta is not None:
                #
                # We just read a yEnc keyword token such as
                # begin, part, or end
                #
                if _meta['key'] in self._meta:
                    # We already processed this key; uh oh
                    # Fix our stream
                    stream.seek(ptr, SEEK_SET)

                    # Fix our line count
                    self._total_lines -= 1

                    # We're done
                    break

                if _meta['key'] == 'end' and \
                   len(set(('begin', 'part')) - set(self._meta)) == 2:
                    # Why did we get an end before a begin or part?
                    # Just ignore it and keep going
                    continue

                # store our key
                self._meta[_meta['key']] = _meta

                if 'end' in self._meta:
                    # Mark the binary as being valid
                    self.decoded._is_valid = True

                    # We're done!
                    break

                elif _meta['key'] == 'begin':
                    # Depending on the version of yEnc we're using binary
                    # content starts now; thefore we create our binary
                    # instance now

                    if 'name' not in _meta:
                        # Why did we get a begin before a part
                        # Just ignore it and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', 1)

                    # Create our binary instance
                    self.decoded = NNTPBinaryContent(
                        filepath=_meta['name'],
                        part=self._part,
                        work_dir=self.work_dir,
                    )

                elif _meta['key'] == 'part':

                    if 'begin' not in self._meta:
                        # we must have a begin if we have a part
                        # This is a messed up message; treat this
                        # as junk and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', self._part)

                    # Update our Binary File if nessisary
                    self.decoded.part = self._part

                continue

            if len(set(('begin', 'part')) - set(self._meta)) == 2:
                # We haven't found the start yet which means we should just
                # keep going until we find it
                continue

            if FAST_YENC_SUPPORT:
                try:
                    decoded, self._crc, self._escape = \
                        decode_string(data, self._crc, self._escape)

                except YencError:
                    logger.warning(
                        "Yenc corruption detected on line %d." % self._lines, )

                    # Line Tracking
                    self._lines += 1

                    # keep storing our data
                    continue

            else:
                # The slow and painful way, the below looks complicated
                # but it really isn't at the the end of the day; yEnc is
                # pretty basic;
                #  - first we need to translate the special keyword tokens
                #    that are used by the yEnc language. We also want to
                #    ignore any trailing white space or new lines. This
                #    occurs by applying our DECODE_SPECIAL_MAP to the line
                #    being processed.
                #
                #  - finally we translate the remaining characters by taking
                #    away 42 from their value.
                #
                decoded = YENC_DECODE_SPECIAL_RE.sub(
                    lambda x: YENC_DECODE_SPECIAL_MAP[x.group()],
                    data,
                ).translate(YENC42)

                # CRC Calculations
                self._calc_crc(decoded)

            # Line Tracking
            self._lines += 1

            # Track the number of bytes decoded
            self._decoded += len(decoded)

            # Write data to out stream
            self.decoded.write(decoded)

            if self._max_bytes > 0 and self._decoded >= self._max_bytes:
                # If we specified a limit and hit it then we're done at
                # this point. Before we do so; advance to the end of our
                # stream
                stream.seek(0, SEEK_END)

                # We're done
                break

        # Reset our meta tracking
        self._meta = {}

        # Reset part information
        self._part = 1

        if self.decoded:
            # close article when complete
            self.decoded.close()

        # Return what we do have
        return self.decoded

    def reset(self):
        """
        Reset our decoded content
        """
        super(CodecYenc, self).reset()

        # Tracks part no; defaults to 1 and shifts if it's determined
        # that we're another part
        self._part = 1

        # Used for internal meta tracking when using the decode()
        self._meta = {}

        # Our Binary Object we can reference while we decode
        # content
        self.decoded = None

    def __lt__(self, other):
        """
        Sorts by part number
        """
        return self._part < other._part

    def __str__(self):
        """
        Return a printable version of the file being read
        """

        # Build a string using the data we know
        if self.decoded:
            return str(self.decoded)

        if 'begin' in self._meta:
            fname = self._meta.get('name', 'Unknown.File')
        else:
            fname = 'Undetermined.File'

        return '%s' % (fname)

    def __repr__(self):
        """
        Return a printable object
        """
        return '<CodecYenc lines_processed=%d />' % (self._lines, )
Exemple #23
0
    def decode(self, stream):
        """ Decode some data and decode the data
            to descriptor identified (by the stream)
        """

        # We need to parse the content until we either reach
        # the end of the file or get to an 'end' tag
        while self.decode_loop():
            # fall_back ptr
            ptr = stream.tell()

            # Read in our data
            data = stream.readline()
            if not data:
                # We're done for now
                return True

            # Total Line Tracking
            self._total_lines += 1

            # Detect a yEnc line
            _meta = self.detect(data, relative=False)
            if _meta is not None:
                #
                # We just read a yEnc keyword token such as
                # begin, part, or end
                #
                if _meta['key'] in self._meta:
                    # We already processed this key; uh oh
                    # Fix our stream
                    stream.seek(ptr, SEEK_SET)

                    # Fix our line count
                    self._total_lines -= 1

                    # We're done
                    break

                if _meta['key'] == 'end' and \
                   len(set(('begin', 'part')) - set(self._meta)) == 2:
                    # Why did we get an end before a begin or part?
                    # Just ignore it and keep going
                    continue

                # store our key
                self._meta[_meta['key']] = _meta

                if 'end' in self._meta:
                    # Mark the binary as being valid
                    self.decoded._is_valid = True

                    # We're done!
                    break

                elif _meta['key'] == 'begin':
                    # Depending on the version of yEnc we're using binary
                    # content starts now; thefore we create our binary
                    # instance now

                    if 'name' not in _meta:
                        # Why did we get a begin before a part
                        # Just ignore it and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', 1)

                    # Create our binary instance
                    self.decoded = NNTPBinaryContent(
                        filepath=_meta['name'],
                        part=self._part,
                        work_dir=self.work_dir,
                    )

                elif _meta['key'] == 'part':

                    if 'begin' not in self._meta:
                        # we must have a begin if we have a part
                        # This is a messed up message; treat this
                        # as junk and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', self._part)

                    # Update our Binary File if nessisary
                    self.decoded.part = self._part

                continue

            if len(set(('begin', 'part')) - set(self._meta)) == 2:
                # We haven't found the start yet which means we should just
                # keep going until we find it
                continue

            if FAST_YENC_SUPPORT:
                try:
                    decoded, self._crc, self._escape = \
                        decode_string(data, self._crc, self._escape)

                except YencError:
                    logger.warning(
                        "Yenc corruption detected on line %d." %
                        self._lines,
                    )

                    # Line Tracking
                    self._lines += 1

                    # keep storing our data
                    continue

            else:
                # The slow and painful way, the below looks complicated
                # but it really isn't at the the end of the day; yEnc is
                # pretty basic;
                #  - first we need to translate the special keyword tokens
                #    that are used by the yEnc language. We also want to
                #    ignore any trailing white space or new lines. This
                #    occurs by applying our DECODE_SPECIAL_MAP to the line
                #    being processed.
                #
                #  - finally we translate the remaining characters by taking
                #    away 42 from their value.
                #
                decoded = YENC_DECODE_SPECIAL_RE.sub(
                    lambda x: YENC_DECODE_SPECIAL_MAP[x.group()], data,
                ).translate(YENC42)

                # CRC Calculations
                self._calc_crc(decoded)

            # Line Tracking
            self._lines += 1

            # Track the number of bytes decoded
            self._decoded += len(decoded)

            # Write data to out stream
            self.decoded.write(decoded)

            if self._max_bytes > 0 and self._decoded >= self._max_bytes:
                # If we specified a limit and hit it then we're done at
                # this point. Before we do so; advance to the end of our
                # stream
                stream.seek(0, SEEK_END)

                # We're done
                break

        # Reset our meta tracking
        self._meta = {}

        # Reset part information
        self._part = 1

        if self.decoded:
            # close article when complete
            self.decoded.close()

        # Return what we do have
        return self.decoded
Exemple #24
0
class CodecYenc(CodecBase):

    def __init__(self, descriptor=None, work_dir=None,
                 linelen=128, *args, **kwargs):
        super(CodecYenc, self).__init__(
                descriptor=descriptor, work_dir=work_dir, *args, **kwargs)

        # Used for internal meta tracking when using the decode()
        self._meta = {}

        # Our Binary Object we can reference while we decode
        # content
        self.decoded = None

        # Used for encoding; This defines the maximum number of (encoded)
        # characters to display per line.
        self.linelen = linelen

    def parse_article(self, subject, *args, **kwargs):
        """
        Takes a an article header and returns it's parsed content if it's
        successful. Otherwise it returns None.
        """

        matched = NZB_SUBJECT_PARSE.match(subject)
        if matched is None:
            # subject is not parsable
            return None

        results = {}

        # Trim results
        if matched.group('desc') is not None:
            results['desc'] = re.sub('[\s-]+$', '', matched.group('desc'))
        if matched.group('fname') is not None:
            results['fname'] = matched.group('fname').strip()

        # Support conversion of integers
        for _attr in ['index', 'count', 'yindex', 'ycount', 'size']:
            if matched.group(_attr) is not None:
                results[_attr] = int(matched.group(_attr))

        return results

    def encode(self, content, mem_buf=DEFAULT_BUFFER_SIZE):
        """
        Encodes an NNTPContent object passed in
        """

        if isinstance(content, NNTPContent):
            # Create our ascii instance
            _encoded = NNTPAsciiContent(
                filepath=content.filename,
                part=content.part,
                total_parts=content.total_parts,
                sort_no=content.sort_no,
                work_dir=self.work_dir,
                # We want to ensure we're working with a unique attached file
                unique=True,
            )

        else:
            # If we reach here, we presume our content is a filename

            # Create our ascii instance
            _encoded = NNTPAsciiContent(
                filepath=content,
                work_dir=self.work_dir,
                # We want to ensure we're working with a unique attached file
                unique=True,
            )

            # Convert our content object into an NNTPContent object
            content = NNTPContent(
                filepath=content,
                work_dir=self.work_dir,
            )

        # yEnc (v1.3) begin
        fmt_ybegin = '=ybegin part=%d total=%d line=%d size=%d name=%s' % (
            content.part, content.total_parts, self.linelen,
            len(content), content.filename,
        )

        # yEnc part
        fmt_ypart = '=ypart begin=%d end=%d' % (
            content.begin() + 1,
            content.end(),
        )

        if isinstance(content._parent, NNTPContent):
            # yEnc end
            fmt_yend = '=yend size=%d part=%d pcrc32=%s crc32=%s' % (
                len(content), content.part,
                content.crc32(), content._parent.crc32(),
            )

        else:
            # yEnc end
            fmt_yend = '=yend size=%d part=%d pcrc32=%s' % (
                len(content), content.part, content.crc32(),
            )

        # Write =ybegin line
        _encoded.write(fmt_ybegin + EOL)
        # Write =ypart line
        _encoded.write(fmt_ypart + EOL)

        if not content.open():
            return None

        # Prepare our result set
        results = ""

        # Column is used for decoding
        column = 0
        crc = BIN_MASK

        # We need to parse the content until we either reach
        # the end of the file or get to an 'end' tag
        while True:
            # Read in our data
            data = content.stream.read(mem_buf)
            if not data:
                # We're done
                break

            if FAST_YENC_SUPPORT:
                try:
                    _results, crc, column = encode_string(data, crc, column)
                    # Append our parsed content onto our ongoing buffer
                    results += _results

                except YencError as e:
                    logger.error("Failed to encode Yenc for %s." % content)
                    logger.debug('Yenc exception: %s' % (str(e)))
                    return None

            else:
                # The slow and painful way, the below looks complicated
                # but it really isn't at the the end of the day; yEnc is
                # pretty basic;
                #  - first we translate the all of the characters by adding
                #    42 to their value with the exception of a few special
                #    characters that are explicitly reserved for the yEnc
                #    language (and conflict with the NNTP Server language).
                #
                #  - next, we need to apply our ENCODE_SPECIAL_MAP to be
                #    sure to handle the characters that are reserved as
                #    special keywords used by both NNTP Servers and the yEnc
                #    protocol itself.
                #
                #  - finally we want to prevent our string from going on for
                #    to many characters (horizontally).  So we need to split
                #    our content up
                #

                idx = 0
                while idx < len(data):
                    _byte = (ord(data[idx]) + 42) & 0xff
                    if _byte in YENC_ENCODE_ESCAPED_CHARACTERS:
                        _byte = (_byte + 64) & 0xff
                        # Esape Sequence
                        results += '='

                    # Store our character
                    results += chr(_byte)

                    # Increment Index
                    idx += 1

            # Our offset
            offset = 0

            while offset < (len(results)-self.linelen+1):
                eol = offset+self.linelen
                if results[offset:eol][-1] == '=':
                    # Lines can't end with the escape sequence (=). If we get
                    # here then this one did. We just adjust our end-of-line
                    # by 1 and keep moving
                    eol -= 1

                _encoded.write(results[offset:eol] + EOL)
                offset = eol

            if offset < len(results):
                results = results[-(len(results) - offset):]

            else:
                # reset string
                results = ''

        # We're done reading our data
        content.close()

        if len(results):
            # We still have content left in our buffer
            _encoded.write(results + EOL)

        # Write footer
        _encoded.write(fmt_yend + EOL)

        if _encoded:
            # close article when complete
            _encoded.close()

        # Return our encoded object
        return _encoded

    def detect(self, line, relative=True):
        """
        A Simple function that can be used to determine if there is
        yEnc content on the line being checked.

        If relative is set to true, we additionally check the line
        content against content relative to the decoding process (`What are
        we expecting to have right now?`). For example, the `end` token would
        be ignored if we haven't received a `begin` first.

        It returns None if there is no yEnc key line, otherwise
        it returns a dictionary of the keys and their mapped values.

        """
        yenc_re = YENC_RE.match(line)
        if not yenc_re:
            return None

        # Merge Results
        f_map = dict((YENC_KEY_MAP[k], v) for k, v
                     in yenc_re.groupdict().iteritems() if v)

        # Tidy filename (whitespace)
        if 'name' in f_map:
            f_map['name'] = basename(f_map['name']).strip()

        if relative:
            # detect() relative to what has been decoded
            if f_map['key'] in self._meta:
                # We already processed this key
                return None

            if f_map['key'] == 'end' and 'begin' not in self._meta:
                # We can't handle this key
                return None

            if f_map['key'] == 'part' and 'begin' not in self._meta:
                # We can't handle this key
                return None

        # Integer types
        for kw in ['line', 'size', 'total', 'begin', 'end', 'part']:
            if kw in f_map:
                try:
                    f_map[kw] = int(f_map[kw])

                except (TypeError, ValueError):
                    # Eliminate bad kw
                    del f_map[kw]

        return f_map

    def decode(self, stream):
        """ Decode some data and decode the data
            to descriptor identified (by the stream)
        """

        # We need to parse the content until we either reach
        # the end of the file or get to an 'end' tag
        while self.decode_loop():
            # fall_back ptr
            ptr = stream.tell()

            # Read in our data
            data = stream.readline()
            if not data:
                # We're done for now
                return True

            # Total Line Tracking
            self._total_lines += 1

            # Detect a yEnc line
            _meta = self.detect(data, relative=False)
            if _meta is not None:
                #
                # We just read a yEnc keyword token such as
                # begin, part, or end
                #
                if _meta['key'] in self._meta:
                    # We already processed this key; uh oh
                    # Fix our stream
                    stream.seek(ptr, SEEK_SET)

                    # Fix our line count
                    self._total_lines -= 1

                    # We're done
                    break

                if _meta['key'] == 'end' and \
                   len(set(('begin', 'part')) - set(self._meta)) == 2:
                    # Why did we get an end before a begin or part?
                    # Just ignore it and keep going
                    continue

                # store our key
                self._meta[_meta['key']] = _meta

                if 'end' in self._meta:
                    # Mark the binary as being valid
                    self.decoded._is_valid = True

                    # We're done!
                    break

                elif _meta['key'] == 'begin':
                    # Depending on the version of yEnc we're using binary
                    # content starts now; thefore we create our binary
                    # instance now

                    if 'name' not in _meta:
                        # Why did we get a begin before a part
                        # Just ignore it and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', 1)

                    # Create our binary instance
                    self.decoded = NNTPBinaryContent(
                        filepath=_meta['name'],
                        part=self._part,
                        work_dir=self.work_dir,
                    )

                elif _meta['key'] == 'part':

                    if 'begin' not in self._meta:
                        # we must have a begin if we have a part
                        # This is a messed up message; treat this
                        # as junk and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', self._part)

                    # Update our Binary File if nessisary
                    self.decoded.part = self._part

                continue

            if len(set(('begin', 'part')) - set(self._meta)) == 2:
                # We haven't found the start yet which means we should just
                # keep going until we find it
                continue

            if FAST_YENC_SUPPORT:
                try:
                    decoded, self._crc, self._escape = \
                        decode_string(data, self._crc, self._escape)

                except YencError:
                    logger.warning(
                        "Yenc corruption detected on line %d." %
                        self._lines,
                    )

                    # Line Tracking
                    self._lines += 1

                    # keep storing our data
                    continue

            else:
                # The slow and painful way, the below looks complicated
                # but it really isn't at the the end of the day; yEnc is
                # pretty basic;
                #  - first we need to translate the special keyword tokens
                #    that are used by the yEnc language. We also want to
                #    ignore any trailing white space or new lines. This
                #    occurs by applying our DECODE_SPECIAL_MAP to the line
                #    being processed.
                #
                #  - finally we translate the remaining characters by taking
                #    away 42 from their value.
                #
                decoded = YENC_DECODE_SPECIAL_RE.sub(
                    lambda x: YENC_DECODE_SPECIAL_MAP[x.group()], data,
                ).translate(YENC42)

                # CRC Calculations
                self._calc_crc(decoded)

            # Line Tracking
            self._lines += 1

            # Track the number of bytes decoded
            self._decoded += len(decoded)

            # Write data to out stream
            self.decoded.write(decoded)

            if self._max_bytes > 0 and self._decoded >= self._max_bytes:
                # If we specified a limit and hit it then we're done at
                # this point. Before we do so; advance to the end of our
                # stream
                stream.seek(0, SEEK_END)

                # We're done
                break

        # Reset our meta tracking
        self._meta = {}

        # Reset part information
        self._part = 1

        if self.decoded:
            # close article when complete
            self.decoded.close()

        # Return what we do have
        return self.decoded

    def reset(self):
        """
        Reset our decoded content
        """
        super(CodecYenc, self).reset()

        # Tracks part no; defaults to 1 and shifts if it's determined
        # that we're another part
        self._part = 1

        # Used for internal meta tracking when using the decode()
        self._meta = {}

        # Our Binary Object we can reference while we decode
        # content
        self.decoded = None

    def __lt__(self, other):
        """
        Sorts by part number
        """
        return self._part < other._part

    def __str__(self):
        """
        Return a printable version of the file being read
        """

        # Build a string using the data we know
        if self.decoded:
            return str(self.decoded)

        if 'begin' in self._meta:
            fname = self._meta.get('name', 'Unknown.File')
        else:
            fname = 'Undetermined.File'

        return '%s' % (
            fname
        )

    def __repr__(self):
        """
        Return a printable object
        """
        return '<CodecYenc lines_processed=%d />' % (
            self._lines,
        )
Exemple #25
0
    def test_mime(self):
        """
        Tests mime types on different types of content
        """
        ac = NNTPAsciiContent()
        bc = NNTPBinaryContent()

        # Mime Types aren't detectable with new files
        assert (ac.mime().type() == 'application/x-empty')
        assert (bc.mime().type() == 'application/x-empty')

        # Open up a jpeg
        bc = NNTPBinaryContent(join(self.var_dir, 'joystick.jpg'))
        assert (bc.mime().type() == 'image/jpeg')

        # Make a copy of our image as a different name
        assert (bc.save(join(self.tmp_dir, 'weird.name'), copy=True) is True)
        # We still know it's an image
        assert (bc.mime().type() == 'image/jpeg')

        # Create ourselves a new file
        tmp_file = join(self.tmp_dir, 'test.rar')
        assert (self.touch(tmp_file, size='2KB', random=True) is True)
        bc = NNTPBinaryContent(tmp_file)

        # Now we can guess the name from it's file type
        assert (bc.mime().type() == 'application/x-rar-compressed')
Exemple #26
0
    def test_yenc_multi_message(self):
        """
        Tests the handling of a yenc multi-message
        """

        # Create a non-secure connection
        sock = NNTPConnection(
            host=self.nttp_ipaddr,
            port=self.nntp_portno,
            username='******',
            password='******',
            secure=False,
            join_group=True,
        )

        assert sock.connect() is True
        assert sock._iostream == NNTPIOStream.RFC3977_GZIP

        articles = sortedset(key=lambda x: x.key())

        # We intententionally fetch the content out of order
        # ideally we'd want 20 followed by 21
        articles.add(
            sock.get(id='21', work_dir=self.tmp_dir, group=self.common_group))
        assert sock.group_name == self.common_group
        articles.add(sock.get(id='20', work_dir=self.tmp_dir))
        assert sock.group_name == self.common_group

        newfile = NNTPBinaryContent(
            # This looks rough;
            # we're basically looking at the first article stored (since our
            # set is sorted, and then we're looking at the first content entry

            # TODO: update the article function so it's much easier to get
            # an iterator to decoded list
            filepath=iter(iter(articles).next().decoded).next().filename,
            work_dir=self.tmp_dir,
        )

        for article in articles:
            assert isinstance(article, NNTPArticle) is True
            assert len(article.decoded) == 1
            assert isinstance(iter(article.decoded).next(), NNTPBinaryContent)
            assert iter(article.decoded).next().is_valid() is True

            # Build on new file
            newfile.append(iter(article.decoded).next())
            # keep open file count low
            iter(article.decoded).next().close()

        # Compare File
        decoded_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(decoded_filepath)
        with open(decoded_filepath, 'r') as fd_in:
            decoded = fd_in.read()

        assert isfile(newfile.filepath) is True
        old_filepath = newfile.filepath
        newfile.save()
        new_filepath = newfile.filepath
        assert old_filepath != new_filepath
        assert isfile(old_filepath) is False
        assert isfile(new_filepath) is True

        assert decoded == newfile.getvalue()

        # Close up our socket
        sock.close()

        while len(articles):
            article = articles.pop()
            # length hasn't changed
            assert len(article.decoded) == 1
            old_filepath = iter(article.decoded).next().filepath
            assert isfile(old_filepath) is True

            # If we remove the article, we automatically destroy
            # all associated decoded with it (that aren't detached)
            del article

            # Since there is only 1 attachment per article in this test
            # we can see that the file is now gone
            assert isfile(old_filepath) is False

        # Remove the file
        del newfile

        # We called save() so the file has been detached and will still exist!
        assert isfile(new_filepath) is True

        # cleanup our file
        unlink(new_filepath)
Exemple #27
0
    def encode(self, content=None, name=None, *args, **kwargs):
        """
        Takes a specified path (and or file) and compresses it. If this
        function is successful, it returns a set of NNTPBinaryContent()
        objects that are 'not' detached.

        The function returns None if it fails in any way

        """

        if content is not None:
            self.add(content)

        # Some simple error checking to save from doing to much here
        if len(self) == 0:
            return None

        if not self.can_exe(self._bin):
            return None

        if not name:
            name = self.name
            if not name:
                name = random_str()

        tmp_path, tmp_file = self.mkstemp(content=name, suffix='.7z')

        # Initialize our command
        execute = [
            # Our Executable 7-Zip Application
            self._bin,
            # Use Add Flag
            'a',
            # Default mode is 7-Zip
            '-t7z',
        ]

        # Password Protection
        if self.password is not None:
            execute.append('-p%s' % self.password)

        # Handle Compression Level
        if self.level is CompressionLevel.Maximum:
            execute.append('-mx9')

        elif self.level is CompressionLevel.Average:
            execute.append('-mx5')

        elif self.level is CompressionLevel.Minimum:
            execute.append('-mx1')

        # Don't prompt for anything
        execute.append('-y')

        if not name:
            name = splitext(basename(tmp_file))[0]

        # Handle 7Z Volume Splitting
        if self.volume_size:
            execute.append('-v%sb' % self.volume_size)

        if self.cpu_cores is not None and self.cpu_cores > 1:
            # create archive using multiple threads
            execute.append('-mmt%d' % self.cpu_cores)

        # Stop Switch Parsing
        execute.append('--')

        # Specify the Destination Path
        execute.append(tmp_file)

        # Add all of our paths now
        for _path in self:
            execute.append(_path)

        # Create our SubProcess Instance
        sp = SubProcess(execute)

        # Start our execution now
        sp.start()

        found_set = None
        while not sp.is_complete(timeout=1.5):

            found_set = self.watch_dir(
                tmp_path,
                prefix=name,
                ignore=found_set,
            )

        # Handle remaining content
        found_set = self.watch_dir(
            tmp_path,
            prefix=name,
            ignore=found_set,
            seconds=-1,
        )

        # Let the caller know our status
        if not sp.successful():
            # Cleanup Temporary Path
            rm(tmp_path)
            return None

        if not len(found_set):
            return None

        # Create a resultset
        results = sortedset(key=lambda x: x.key())

        # iterate through our found_set and create NNTPBinaryContent()
        # objects from them.
        part = 0
        for path in found_set:
            # Iterate over our found files and determine their part
            # information
            _re_results = SEVEN_ZIP_PART_RE.match(path)
            if _re_results:
                if _re_results.group('part') is not None:
                    part = int(_re_results.group('part'))

                elif _re_results.group('part0') is not None:
                    part = int(_re_results.group('part0'))

                else:
                    part += 1

            else:
                part += 1

            content = NNTPBinaryContent(
                path,
                part=part,
                total_parts=len(found_set),
            )

            # Loaded data is by default detached; we want to attach it
            content.attach()

            # Add our attached content to our results
            results.add(content)

        # Clean our are list of objects to archive
        self.clear()

        # Return our
        return results
Exemple #28
0
    def decode(self, content=None, name=None, password=None, *args, **kwargs):
        """
        content must be pointing to a directory containing rar files that can
        be easily sorted on. Alternatively, path can be of type NNTPContent()
        or a set/list of.

        If no password is specified, then the password configuration loaded
        into the class is used instead.

        An NNTPBinaryContent() object containing the contents of the package
        within a sortedset() object.  All decoded() functions have to return
        a resultset() to be consistent with one another.

        """
        if content is not None:
            self.add(content)

        # Some simple error checking to save from doing to much here
        if len(self) == 0:
            return None

        if not self.can_exe(self._unrar):
            return None

        if not password:
            password = self.password

        # Initialize our command
        execute = [
            # Our Executable RAR Application
            self._unrar,
            # Use Add Flag
            'x',
            # Assume Yes
            '-y',
        ]

        # Password Protection
        if password is not None:
            execute.append('-p%s' % password)
        else:
            # Do not prompt for password
            execute.append('-p-')

        if self.keep_broken:
            # Keep Broken Flag
            execute.append('-kb')

        if self.overwrite:
            # Overwrite files
            execute.append('-o+')

        else:
            # Don't overwrite files
            execute.append('-o-')

        if self.freshen:
            # Freshen files
            execute.append('-f')

        # Stop Switch Parsing
        execute.append('--')

        if not name:
            name = self.name
            if not name:
                name = random_str()

        for _path in self:
            # Temporary Path
            tmp_path, _ = self.mkstemp(content=name)

            with pushd(tmp_path):
                # Create our SubProcess Instance
                sp = SubProcess(list(execute) + [_path])

                # Start our execution now
                sp.start()

                found_set = None
                while not sp.is_complete(timeout=1.5):

                    found_set = self.watch_dir(
                        tmp_path,
                        ignore=found_set,
                    )

                # Handle remaining content
                found_set = self.watch_dir(
                    tmp_path,
                    ignore=found_set,
                    seconds=-1,
                )

                # Let the caller know our status
                if not sp.successful():
                    # Cleanup Temporary Path
                    rm(tmp_path)
                    return None

                if not len(found_set):
                    logger.warning(
                        'RAR archive (%s) contained no content.' %
                        basename(_path),
                    )

        # Clean our are list of objects to archive
        self.clear()

        # Return path containing unrar'ed content
        results = NNTPBinaryContent(tmp_path)

        # We intentionally attach it's content
        results.attach()

        # Create a sortedset to return
        _resultset = sortedset(key=lambda x: x.key())
        _resultset.add(results)

        # Return our content
        return _resultset
Exemple #29
0
    def test_decoding_yenc_multi_part(self):
        """
        Test decoding of a yEnc multi-part

        This test was generated after visiting http://www.yenc.org and finding
        the examples they provide on their site.

            Downloaded the following zip file:
                http://www.yenc.org/yenc2.zip

            Then extracting it revealed 3 files:
                - 00000020.ntx
                    This is the yEnc file as it would have been seen after
                    being downloaded from the NNTP server (part 1 of 2)

                - 00000021.ntx
                    This is the yEnc file as it would have been seen after
                    being downloaded from the NNTP server (part 2 of 2)

                - joystick.jpg
                    This is what the contents of the file should look like
                    after being decoded (and assembled). This is what we use
                    to test the file against.
        """

        # A simple test for ensuring that the yEnc
        # library exists; otherwise we want this test
        # to fail; the below line will handle this for
        # us; we'll let the test fail on an import error
        import yenc

        # Input File
        encoded_filepath_1 = join(self.var_dir, '00000020.ntx')
        encoded_filepath_2 = join(self.var_dir, '00000021.ntx')

        assert isfile(encoded_filepath_1)
        assert isfile(encoded_filepath_2)

        # Compare File
        decoded_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(decoded_filepath)

        # Python Solution
        fd1_py = BytesIO()
        fd2_py = BytesIO()

        # C Solution
        fd1_c = BytesIO()
        fd2_c = BytesIO()

        # Initialize Codec
        decoder = CodecYenc(work_dir=self.test_dir)

        contents_py = []
        contents_c = []

        # Force to operate in python (manual/slow) mode
        CodecYenc.FAST_YENC_SUPPORT = False
        with open(encoded_filepath_1, 'r') as fd_in:
            contents_py.append(decoder.decode(fd_in))
        with open(encoded_filepath_2, 'r') as fd_in:
            contents_py.append(decoder.decode(fd_in))

        for x in contents_py:
            # Verify our data is good
            assert x.is_valid() is True

        # Force to operate with the C extension yEnc
        # This require the extensions to be installed
        # on the system
        CodecYenc.FAST_YENC_SUPPORT = True
        with open(encoded_filepath_1, 'r') as fd_in:
            contents_c.append(decoder.decode(fd_in))
        with open(encoded_filepath_2, 'r') as fd_in:
            contents_c.append(decoder.decode(fd_in))

        for x in contents_c:
            # Verify our data is good
            assert x.is_valid() is True

        # Confirm that our output from our python implimentation
        # matches that of our yEnc C version.
        assert fd1_py.tell() == fd1_c.tell()
        assert fd2_py.tell() == fd2_c.tell()

        with open(decoded_filepath, 'r') as fd_in:
            decoded = fd_in.read()

        # Assemble (TODO)
        contents_py.sort()
        contents_c.sort()

        content_py = NNTPBinaryContent(
            filepath=contents_py[0].filename,
            save_dir=self.out_dir,
        )
        content_c = NNTPBinaryContent(
            filepath=contents_c[0].filename,
            save_dir=self.out_dir,
        )

        # append() takes a list or another NNTPContent
        # and appends it's content to the end of the content
        content_py.append(contents_py)
        content_c.append(contents_py)

        assert len(content_py) == len(decoded)
        assert len(content_c) == len(decoded)

        # Compare our processed content with the expected results
        assert content_py.getvalue() == decoded
        assert content_c.getvalue() == decoded
Exemple #30
0
    def encode(self, content=None, name=None, *args, **kwargs):
        """
        Takes a specified path (and or file) and compresses it. If this
        function is successful, it returns a set of NNTPBinaryContent()
        objects that are 'not' detached.

        The function returns None if it fails in any way

        """

        if content is not None:
            self.add(content)

        # Some simple error checking to save from doing to much here
        if len(self) == 0:
            return None

        if not self.can_exe(self._rar):
            return None

        if not name:
            name = self.name
            if not name:
                name = random_str()

        tmp_path, tmp_file = self.mkstemp(content=name, suffix='.rar')

        # Initialize our command
        execute = [
            # Our Executable RAR Application
            self._rar,
            # Use Add Flag
            'a',
        ]

        # Password Protection
        if self.password is not None:
            execute.append('-p%s' % self.password)

        # Handle Compression Level
        if self.level is CompressionLevel.Maximum:
            execute.append('-m5')

        elif self.level is CompressionLevel.Average:
            execute.append('-m3')

        elif self.level is CompressionLevel.Minimum:
            execute.append('-m0')

        # Exclude base directory from archive
        execute.append('-ep1')

        if not name:
            name = splitext(basename(tmp_file))[0]

        # Now place content within directory identifed by it's name
        execute.append('-ap%s' % name)

        # Handle RAR Volume Splitting
        if self.volume_size:
            execute.append('-v%sb' % self.volume_size)

        # Handle Recovery Record
        if self.recovery_record is not None:
            execute.append('-rr%s' % self.recovery_record)

        if self.cpu_cores is not None and self.cpu_cores > 1:
            # create archive using multiple threads
            execute.append('-mt%d' % self.cpu_cores)

        # Stop Switch Parsing
        execute.append('--')

        # Specify the Destination Path
        execute.append(tmp_file)

        # Add all of our paths now
        for _path in self:
            execute.append(_path)

        # Create our SubProcess Instance
        sp = SubProcess(execute)

        # Start our execution now
        sp.start()

        found_set = None
        while not sp.is_complete(timeout=1.5):

            found_set = self.watch_dir(
                tmp_path,
                prefix=name,
                ignore=found_set,
            )

        # Handle remaining content
        found_set = self.watch_dir(
            tmp_path,
            prefix=name,
            ignore=found_set,
            seconds=-1,
        )

        # Let the caller know our status
        if not sp.successful():
            # Cleanup Temporary Path
            rm(tmp_path)
            return None

        if not len(found_set):
            return None

        # Create a resultset
        results = sortedset(key=lambda x: x.key())

        # iterate through our found_set and create NNTPBinaryContent()
        # objects from them.
        part = 0
        for path in found_set:
            # Iterate over our found files and determine their part
            # information
            _re_results = RAR_PART_RE.match(path)
            if _re_results:
                if _re_results.group('part') is not None:
                    part = int(_re_results.group('part'))

                else:
                    part += 1

            else:
                part += 1

            content = NNTPBinaryContent(
                path,
                part=part,
                total_parts=len(found_set),
            )

            # Loaded data is by default detached; we want to attach it
            content.attach()

            # Add our attached content to our results
            results.add(content)

        # Clean our are list of objects to archive
        self.clear()

        # Return our
        return results
Exemple #31
0
    def decode(self, stream):
        """ Decode some data and decode the data
            to descriptor identified (by the stream)
        """

        # We need to parse the content until we either reach
        # the end of the file or get to an 'end' tag
        while self.decode_loop():
            # fall_back ptr
            ptr = stream.tell()

            # Read in our data
            data = stream.readline()
            if not data:
                # We're done for now
                return True

            # Total Line Tracking
            self._total_lines += 1

            # Detect a yEnc line
            _meta = self.detect(data, relative=False)
            if _meta is not None:
                #
                # We just read a yEnc keyword token such as
                # begin, part, or end
                #
                if _meta['key'] in self._meta:
                    # We already processed this key; uh oh
                    # Fix our stream
                    stream.seek(ptr, SEEK_SET)

                    # Fix our line count
                    self._total_lines -= 1

                    # We're done
                    break

                if _meta['key'] == 'end' and \
                   len(set(('begin', 'part')) - set(self._meta)) == 2:
                    # Why did we get an end before a begin or part?
                    # Just ignore it and keep going
                    continue

                # store our key
                self._meta[_meta['key']] = _meta

                if 'end' in self._meta:
                    # Mark the binary as being valid
                    self.decoded._is_valid = True

                    # We're done!
                    break

                elif _meta['key'] == 'begin':
                    # Depending on the version of yEnc we're using binary
                    # content starts now; thefore we create our binary
                    # instance now

                    if 'name' not in _meta:
                        # Why did we get a begin before a part
                        # Just ignore it and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', 1)

                    # Create our binary instance
                    self.decoded = NNTPBinaryContent(
                        filepath=_meta['name'],
                        part=self._part,
                        work_dir=self.work_dir,
                    )

                elif _meta['key'] == 'part':

                    if 'begin' not in self._meta:
                        # we must have a begin if we have a part
                        # This is a messed up message; treat this
                        # as junk and keep going
                        continue

                    # Save part no globally if present (for sorting)
                    self._part = _meta.get('part', self._part)

                    # Update our Binary File if nessisary
                    self.decoded.part = self._part

                continue

            if len(set(('begin', 'part')) - set(self._meta)) == 2:
                # We haven't found the start yet which means we should just
                # keep going until we find it
                continue

            if FAST_YENC_SUPPORT:
                try:
                    decoded, self._crc, self._escape = \
                        decode_string(data, self._crc, self._escape)

                except YencError:
                    logger.warning(
                        "Yenc corruption detected on line %d." % self._lines, )

                    # Line Tracking
                    self._lines += 1

                    # keep storing our data
                    continue

            else:
                # The slow and painful way, the below looks complicated
                # but it really isn't at the the end of the day; yEnc is
                # pretty basic;
                #  - first we need to translate the special keyword tokens
                #    that are used by the yEnc language. We also want to
                #    ignore any trailing white space or new lines. This
                #    occurs by applying our DECODE_SPECIAL_MAP to the line
                #    being processed.
                #
                #  - finally we translate the remaining characters by taking
                #    away 42 from their value.
                #
                decoded = YENC_DECODE_SPECIAL_RE.sub(
                    lambda x: YENC_DECODE_SPECIAL_MAP[x.group()],
                    data,
                ).translate(YENC42)

                # CRC Calculations
                self._calc_crc(decoded)

            # Line Tracking
            self._lines += 1

            # Track the number of bytes decoded
            self._decoded += len(decoded)

            # Write data to out stream
            self.decoded.write(decoded)

            if self._max_bytes > 0 and self._decoded >= self._max_bytes:
                # If we specified a limit and hit it then we're done at
                # this point. Before we do so; advance to the end of our
                # stream
                stream.seek(0, SEEK_END)

                # We're done
                break

        # Reset our meta tracking
        self._meta = {}

        # Reset part information
        self._part = 1

        if self.decoded:
            # close article when complete
            self.decoded.close()

        # Return what we do have
        return self.decoded
Exemple #32
0
    def test_yenc_multi_message(self):
        """
        Tests the handling of a yenc multi-message
        """

        # Create a non-secure connection
        sock = NNTPConnection(
            host=self.nttp_ipaddr,
            port=self.nntp_portno,
            username='******',
            password='******',
            secure=False,
            join_group=True,
        )

        assert sock.connect() is True
        assert sock._iostream == NNTPIOStream.RFC3977_GZIP

        articles = sortedset(key=lambda x: x.key())

        # We intententionally fetch the content out of order
        # ideally we'd want 20 followed by 21
        articles.add(sock.get(id='21', work_dir=self.tmp_dir, group=self.common_group))
        assert sock.group_name == self.common_group
        articles.add(sock.get(id='20', work_dir=self.tmp_dir))
        assert sock.group_name == self.common_group

        newfile = NNTPBinaryContent(
            # This looks rough;
            # we're basically looking at the first article stored (since our
            # set is sorted, and then we're looking at the first content entry

            # TODO: update the article function so it's much easier to get
            # an iterator to decoded list
            filepath=iter(iter(articles).next().decoded).next().filename,
            work_dir=self.tmp_dir,
        )

        for article in articles:
            assert isinstance(article, NNTPArticle) is True
            assert len(article.decoded) == 1
            assert isinstance(iter(article.decoded).next(), NNTPBinaryContent)
            assert iter(article.decoded).next().is_valid() is True

            # Build on new file
            newfile.append(iter(article.decoded).next())
            # keep open file count low
            iter(article.decoded).next().close()

        # Compare File
        decoded_filepath = join(self.var_dir, 'joystick.jpg')
        assert isfile(decoded_filepath)
        with open(decoded_filepath, 'r') as fd_in:
            decoded = fd_in.read()

        assert isfile(newfile.filepath) is True
        old_filepath = newfile.filepath
        newfile.save()
        new_filepath = newfile.filepath
        assert old_filepath != new_filepath
        assert isfile(old_filepath) is False
        assert isfile(new_filepath) is True

        assert decoded == newfile.getvalue()

        # Close up our socket
        sock.close()

        while len(articles):
            article = articles.pop()
            # length hasn't changed
            assert len(article.decoded) == 1
            old_filepath = iter(article.decoded).next().filepath
            assert isfile(old_filepath) is True

            # If we remove the article, we automatically destroy
            # all associated decoded with it (that aren't detached)
            del article

            # Since there is only 1 attachment per article in this test
            # we can see that the file is now gone
            assert isfile(old_filepath) is False

        # Remove the file
        del newfile

        # We called save() so the file has been detached and will still exist!
        assert isfile(new_filepath) is True

        # cleanup our file
        unlink(new_filepath)