def test_NNTPArticle_UU_encode_02(self): """ Test the encoding of fresh new data """ # Our private Key Location tmp_file = join( self.tmp_dir, 'test_NNTPArticle_UU_encode_02.tmp', ) # Create a larger file assert(self.touch(tmp_file, size='1M', random=True)) # Create an NNTPContent Object pointing to our new data content = NNTPBinaryContent(tmp_file) # Create a Yenc Codec instance encoder = CodecUU(work_dir=self.test_dir) # This should produce our yEnc object now encoded = encoder.encode(content) assert isinstance(encoded, NNTPAsciiContent) is True # Now we want to decode the content we just encoded decoded = encoder.decode(encoded) # We should get a Binary Object in return assert isinstance(decoded, NNTPBinaryContent) is True # Our original content should be the same as our decoded # content assert(decoded.crc32() == content.crc32()) assert(decoded.md5() == content.md5())
def test_yenc_v1_3_NNTPContent_encode(self): """ Test the yEnc (v1.3) encoding of data (via NNTPContent) this is nessisary prior to a post """ # A simple test for ensuring that the yEnc # library exists; otherwise we want this test # to fail; the below line will handle this for # us; we'll let the test fail on an import error import yenc # First we take a binary file binary_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(binary_filepath) # Initialize Codec encoder = CodecYenc(work_dir=self.test_dir) # Create an NNTPContent Object content = NNTPBinaryContent(binary_filepath, work_dir=self.test_dir) # Encode our content by object new_content_a = content.encode(encoder) # We should have gotten an ASCII Content Object assert isinstance(new_content_a, NNTPAsciiContent) is True # We should actually have content associated with out data assert len(new_content_a) > 0 # Encode our content by type new_content_b = content.encode(CodecYenc) # We should have gotten an ASCII Content Object assert isinstance(new_content_b, NNTPAsciiContent) is True # We should actually have content associated with out data assert len(new_content_b) > 0 # Our content should be the same when it was generated by both # methods assert new_content_a.md5() == new_content_b.md5() # Chain our encodings new_content = content.encode( [CodecYenc, CodecYenc(work_dir=self.test_dir)], ) # We should have gotten an ASCII Content Object assert isinstance(new_content, NNTPAsciiContent) is True # We should actually have content associated with out data assert len(new_content) > 0
def test_article_copy(self): """ The copy() function built into the article allows you to create a duplicate copy of the original article without obstructing the content from within. """ tmp_dir = join(self.tmp_dir, 'NNTPArticle_Test.test_article_copy') # First we create a 512K file tmp_file_01 = join(tmp_dir, 'file01.tmp') tmp_file_02 = join(tmp_dir, 'file02.tmp') # Allow our files to exist assert(self.touch(tmp_file_01, size='512K', random=True) is True) assert(self.touch(tmp_file_02, size='512K', random=True) is True) # Duplicates groups are are removed automatically article = NNTPArticle( subject='woo-hoo', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # Store some content content = NNTPBinaryContent( filepath=tmp_file_01, part=1, work_dir=self.tmp_dir) assert(article.add(content) is True) content = NNTPBinaryContent( filepath=tmp_file_02, part=2, work_dir=self.tmp_dir) assert(article.add(content) is True) # Detect our 2 articles assert(len(article) == 2) # Set a few header entries article.header['Test'] = 'test' article.header['Another-Entry'] = 'test2' # Create a copy of our object article_copy = article.copy() assert(len(article_copy) == len(article)) assert(len(article_copy.header) == len(article.header)) # Make sure that if we obstruct 1 object it doesn't # effect the other (hence we should have a pointer to # the same location in memory article.header['Yet-Another-Entry'] = 'test3' assert(len(article_copy.header)+1 == len(article.header))
def test_mime(self): """ Tests mime types on different types of content """ ac = NNTPAsciiContent() bc = NNTPBinaryContent() # Mime Types aren't detectable with new files assert(ac.mime().type() == 'application/x-empty') assert(bc.mime().type() == 'application/x-empty') # Open up a jpeg bc = NNTPBinaryContent(join(self.var_dir, 'joystick.jpg')) assert(bc.mime().type() == 'image/jpeg') # Make a copy of our image as a different name assert(bc.save(join(self.tmp_dir, 'weird.name'), copy=True) is True) # We still know it's an image assert(bc.mime().type() == 'image/jpeg') # Create ourselves a new file tmp_file = join(self.tmp_dir, 'test.rar') assert(self.touch(tmp_file, size='2KB', random=True) is True) bc = NNTPBinaryContent(tmp_file) # Now we can guess the name from it's file type assert(bc.mime().type() == 'application/x-rar-compressed')
def test_posting_content(self): """ Tests the group variations """ # Duplicates groups are are removed automatically article = NNTPArticle( subject='woo-hoo', poster='<*****@*****.**>', id='random-id', groups='alt.binaries.l2g', work_dir=self.tmp_dir, ) # First we create a 512K file tmp_file = join( self.tmp_dir, 'NNTPArticle_Test.posting', 'file.tmp') # File should not already exist assert(isfile(tmp_file) is False) # Create a random file assert(self.touch(tmp_file, size='512K', random=True) is True) # File should exist now assert(isfile(tmp_file) is True) # Now we want to load it into a NNTPContent object content = NNTPBinaryContent(filepath=tmp_file, work_dir=self.tmp_dir) assert(article.add(content) is True) # Now we want to split the file up results = article.split('128K') # Tests that our results are expected assert(isinstance(results, sortedset) is True) assert(len(results) == 4)
def test_nzbfile_generation(self): """ Tests the creation of NZB Files """ nzbfile = join(self.tmp_dir, 'test.nzbfile.nzb') payload = join(self.var_dir, 'uudecoded.tax.jpg') assert isfile(nzbfile) is False # Create our NZB Object nzbobj = NNTPnzb() # create a fake article segpost = NNTPSegmentedPost(basename(payload)) content = NNTPBinaryContent(payload) article = NNTPArticle('testfile', groups='newsreap.is.awesome') # Note that our nzb object segment tracker is not marked as being # complete. This flag gets toggled when we add segments manually to # our nzb object or if we parse an NZB-File assert(nzbobj._segments_loaded is None) # Add our Content to the article article.add(content) # now add our article to the NZBFile segpost.add(article) # now add our Segmented Post to the NZBFile nzbobj.add(segpost) # Since .add() was called, this will be set to True now assert(nzbobj._segments_loaded is True) # Store our file assert nzbobj.save(nzbfile) is True assert isfile(nzbfile) is True
def test_NNTPContent_encode(self): """ Test the encoding of data; this is nessisary prior to a post """ # First we take a binary file binary_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(binary_filepath) # Initialize Codec encoder = CodecUU(work_dir=self.test_dir) # Create an NNTPContent Object content = NNTPBinaryContent(binary_filepath) # Encode our content by object new_content_a = content.encode(encoder) # We should have gotten an ASCII Content Object assert isinstance(new_content_a, NNTPAsciiContent) is True # We should actually have content associated with out data assert len(new_content_a) > 0 # Encode our content by type new_content_b = content.encode(CodecUU) # We should have gotten an ASCII Content Object assert isinstance(new_content_b, NNTPAsciiContent) is True # We should actually have content associated with out data assert len(new_content_b) > 0 # Our content should be the same when it was generated by both # methods assert new_content_a.md5() == new_content_b.md5() # Chain our encodings new_content = content.encode( [CodecUU, CodecUU(work_dir=self.test_dir)], ) # We should have gotten an ASCII Content Object assert isinstance(new_content, NNTPAsciiContent) is True # We should actually have content associated with out data assert len(new_content) > 0
def test_article_splitting(self): """ Tests that articles can split """ # Duplicates groups are are removed automatically article = NNTPArticle( work_dir=self.tmp_dir, subject='split-test', poster='<*****@*****.**>', groups='alt.binaries.l2g', ) # Nothing to split gives an error assert(article.split() is None) tmp_file = join(self.tmp_dir, 'NNTPArticle_Test.chunk', '1MB.rar') # The file doesn't exist at first assert(isfile(tmp_file) is False) # Create it assert(self.touch(tmp_file, size='1MB', random=True) is True) # Now it does assert(isfile(tmp_file) is True) # Now we want to load it into a NNTPContent object content = NNTPBinaryContent(filepath=tmp_file, work_dir=self.tmp_dir) # Add our object to our article assert(article.add(content) is True) # No size to split on gives an error assert(article.split(size=0) is None) assert(article.split(size=-1) is None) assert(article.split(size=None) is None) assert(article.split(size='bad_string') is None) # Invalid Memory Limit assert(article.split(mem_buf=0) is None) assert(article.split(mem_buf=-1) is None) assert(article.split(mem_buf=None) is None) assert(article.split(mem_buf='bad_string') is None) # We'll split it in 2 results = article.split(strsize_to_bytes('512K')) # Tests that our results are expected assert(isinstance(results, sortedset) is True) assert(len(results) == 2) # Test that the parts were assigned correctly for i, article in enumerate(results): # We should only have one content object assert(isinstance(article, NNTPArticle) is True) assert(len(article) == 1) # Our content object should correctly have the part and # total part contents populated correctly assert(article[0].part == (i+1)) assert(article[0].total_parts == len(results))
def test_yenc_v1_3_NNTPArticle_encode_02(self): """ Test the encoding of fresh new data """ # A simple test for ensuring that the yEnc # library exists; otherwise we want this test # to fail; the below line will handle this for # us; we'll let the test fail on an import error import yenc # Our private Key Location tmp_file = join( self.tmp_dir, 'test_yenc_v1_3_NNTPArticle_encode_02.tmp', ) # Create a larger file assert(self.touch(tmp_file, size='1M', random=True)) # Create an NNTPContent Object pointing to our new data content = NNTPBinaryContent(tmp_file) # Create a Yenc Codec instance encoder = CodecYenc(work_dir=self.test_dir) # This should produce our yEnc object now encoded = encoder.encode(content) assert isinstance(encoded, NNTPAsciiContent) is True # Now we want to decode the content we just encoded decoded = encoder.decode(encoded) # We should get a Binary Object in return assert isinstance(decoded, NNTPBinaryContent) is True # Our original content should be the same as our decoded # content assert(decoded.crc32() == content.crc32()) assert(decoded.md5() == content.md5())
def test_yenc_v1_3_NNTPArticle_encode_02(self): """ Test the encoding of fresh new data """ # A simple test for ensuring that the yEnc # library exists; otherwise we want this test # to fail; the below line will handle this for # us; we'll let the test fail on an import error import yenc # Our private Key Location tmp_file = join( self.tmp_dir, 'test_yenc_v1_3_NNTPArticle_encode_02.tmp', ) # Create a larger file assert (self.touch(tmp_file, size='1M', random=True)) # Create an NNTPContent Object pointing to our new data content = NNTPBinaryContent(tmp_file) # Create a Yenc Codec instance encoder = CodecYenc(work_dir=self.test_dir) # This should produce our yEnc object now encoded = encoder.encode(content) assert isinstance(encoded, NNTPAsciiContent) is True # Now we want to decode the content we just encoded decoded = encoder.decode(encoded) # We should get a Binary Object in return assert isinstance(decoded, NNTPBinaryContent) is True # Our original content should be the same as our decoded # content assert (decoded.crc32() == content.crc32()) assert (decoded.md5() == content.md5())
def test_binary_article_iterations(self): """ Binary Content can be loaded straight from file and can be processed in a for loop. """ # Create a BytesIO Object bobj = BytesIO() # Fill our BytesIO object with random junk at least # 4x our expected block size for _ in range(4): bobj.write(urandom(BLOCK_SIZE)) # Write just '1' more bytes so we ``overflow`` and require # a 5th query later bobj.write('0') # Content ba = NNTPBinaryContent() # No items means not valid assert (ba.is_valid() is False) assert (ba.load('unknown_file') is False) # a failed load means not valid assert (ba.is_valid() is False) temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp') with open(temp_file, 'wb') as fd: fd.write(bobj.getvalue()) assert (isfile(temp_file) is True) assert (ba.load(temp_file) is True) # Binary Content read by chunk size chunk = 4 for line in ba: if chunk > 0: assert (len(line) == BLOCK_SIZE) else: # 5th query assert (len(line) == 1) chunk -= 1 # We should have performed 5 chunk requests and # -1 more since we decrement the chunk one last time # before we're done assert (chunk == -1) # Confirm our size is reading correctly too assert (len(ba) == (BLOCK_SIZE * 4) + 1) # Remove article del ba # Files are not attached by default so our temp file # should still exist assert (isfile(temp_file) is True) # We'll create another object ba = NNTPAsciiContent() assert (ba.load(temp_file) is True) # Successfully loaded files are never attached assert (ba.is_attached() is False) # our file still exists of course assert (isfile(temp_file) is True) # we'll detach it ba.detach() # Still all is good assert (isfile(temp_file) is True) # Check that we're no longer attached assert (ba.is_attached() is False) # Now, once we delete our object, the file will be gone for good del ba # it's gone for good assert (isfile(temp_file) is True)
def test_general_features(self): """ Detaching makes managing a file no longer managed by this NNTPContent. Test that this works """ # No parameters should create a file aa = NNTPAsciiContent() ba = NNTPBinaryContent() # open a temporary file aa.open() ba.open() # Test Files aa_filepath = aa.filepath ba_filepath = ba.filepath assert (isfile(aa_filepath) is True) assert (isfile(ba_filepath) is True) # Test Length assert (len(aa) == 0) assert (len(ba) == 0) # Test that files are destroyed if the object is del aa del ba # Files are destroyed assert (isfile(aa_filepath) is False) assert (isfile(ba_filepath) is False) # Test some parameters out during initialization aa = NNTPAsciiContent( filepath="ascii.file", part=2, work_dir=self.tmp_dir, ) ba = NNTPBinaryContent( filepath="binary.file", part="10", work_dir=self.tmp_dir, ) # Check our parts assert (aa.part == 2) # Strings are converted okay assert (ba.part == 10) # open a temporary file aa.open() ba.open() # files don't exist yet assert (isfile(join(self.tmp_dir, "binary.file")) is False) assert (isfile(join(self.tmp_dir, "ascii.file")) is False) # Grab a copy of these file paths so we can check them later aa_filepath = aa.filepath ba_filepath = ba.filepath # Save our content aa.save() ba.save() # check that it was created okay assert (isfile(join(self.tmp_dir, "binary.file")) is True) assert (isfile(join(self.tmp_dir, "ascii.file")) is True) # Temporary files are gone (moved from the save() command above) assert (isfile(aa_filepath) is False) assert (isfile(ba_filepath) is False) # They were never the same after the save() assert (aa_filepath != aa.filepath) assert (ba_filepath != ba.filepath) # However after save is called; the filepath is updated to reflect # the proper path; so this is still true assert (isfile(aa.filepath) is True) assert (isfile(ba.filepath) is True) # Even after the objects are gone del aa del ba # Files still exist even after the objects displayed assert (isfile(join(self.tmp_dir, "binary.file")) is True) assert (isfile(join(self.tmp_dir, "ascii.file")) is True) # Cleanup unlink(join(self.tmp_dir, "ascii.file")) unlink(join(self.tmp_dir, "binary.file"))
def decode(self, content=None, name=None, password=None, *args, **kwargs): """ content must be pointing to a directory containing 7-Zip files that can be easily sorted on. Alternatively, path can be of type NNTPContent() or a set/list of. If no password is specified, then the password configuration loaded into the class is used instead. An NNTPBinaryContent() object containing the contents of the package within a sortedset() object. All decoded() functions have to return a resultset() to be consistent with one another. """ if content is not None: self.add(content) # Some simple error checking to save from doing to much here if len(self) == 0: return None if not self.can_exe(self._bin): return None if not password: password = self.password # Initialize our command execute = [ # Our Executable 7-Zip Application self._bin, # Use Add Flag 'x', # Assume Yes '-y', ] # Password Protection if password is not None: execute.append('-p%s' % password) else: # Do not prompt for password execute.append('-p-') if self.overwrite: # Overwrite files execute.append('-aoa') else: # Don't overwrite files execute.append('-aos') # Stop Switch Parsing execute.append('--') if not name: name = self.name if not name: name = random_str() for _path in self: # Temporary Path tmp_path, _ = self.mkstemp(content=name) with pushd(tmp_path): # Create our SubProcess Instance sp = SubProcess(list(execute) + [_path]) # Start our execution now sp.start() found_set = None while not sp.is_complete(timeout=1.5): found_set = self.watch_dir( tmp_path, ignore=found_set, ) # Handle remaining content found_set = self.watch_dir( tmp_path, ignore=found_set, seconds=-1, ) # Let the caller know our status if not sp.successful(): # Cleanup Temporary Path rm(tmp_path) return None if not len(found_set): logger.warning( '7Z archive (%s) contained no content.' % basename(_path), ) # Clean our are list of objects to archive self.clear() # Return path containing unrar'ed content results = NNTPBinaryContent(tmp_path) # We intentionally attach it's content results.attach() # Create a sortedset to return _resultset = sortedset(key=lambda x: x.key()) _resultset.add(results) # Return our content return _resultset
def test_loading_response(self): """ Tests the load() function of the article """ # Prepare a Response response = NNTPResponse(200, 'Great Data') response.decoded.add(NNTPBinaryContent(work_dir=self.tmp_dir)) # Prepare Article article = NNTPArticle(id='random-id', work_dir=self.tmp_dir) # There is no data so our article can't be valid assert(article.is_valid() is False) # Load and Check assert(article.load(response) is True) assert(article.header is None) assert(len(article.decoded) == 1) assert(len(article.decoded) == len(article.files())) assert(str(article) == 'random-id') assert(unicode(article) == u'random-id') assert(article.size() == 0) # Now there is data, but it's an empty Object so it can't be valid assert(article.is_valid() is False) result = re.search(' Message-ID=\"(?P<id>[^\"]+)\"', repr(article)) assert(result is not None) assert(result.group('id') == str(article)) result = re.search(' attachments=\"(?P<no>[^\"]+)\"', repr(article)) assert(result is not None) assert(int(result.group('no')) == len(article)) # Prepare Article article_a = NNTPArticle(id='a', work_dir=self.tmp_dir) article_b = NNTPArticle(id='b', work_dir=self.tmp_dir) assert((article_a < article_b) is True) # playing with the sort order however alters things article_a.no += 1 assert((article_a < article_b) is False) # Prepare a Response (with a Header) response = NNTPResponse(200, 'Great Data') response.decoded.add(NNTPHeader(work_dir=self.tmp_dir)) response.decoded.add(NNTPBinaryContent(work_dir=self.tmp_dir)) # Prepare Article article = NNTPArticle(id='random-id', work_dir=self.tmp_dir) # Load and Check assert(article.load(response) is True) assert(isinstance(article.header, NNTPHeader)) assert(len(article.decoded) == 1) for no, decoded in enumerate(article.decoded): # Test equality assert(article[no] == decoded) # We can also load another article ontop of another # This used when associating downloaded articles with ones # found in NZB-Files new_article = NNTPArticle( msgid='brand-new-id', no=article.no+1, groups='a.b.c,d.e.f', work_dir=self.tmp_dir, ) new_article.subject = 'test-subject-l2g' new_article.poster = 'test-poster-l2g' new_article.header = 'test-header-l2g' assert(article.load(new_article) is True) assert(article.id == new_article.id) assert(article.no == new_article.no) assert(article.groups == new_article.groups) assert(article.poster == new_article.poster) assert(article.subject == new_article.subject) assert(article.header == new_article.header) assert(article.body == new_article.body) assert(article.decoded == new_article.decoded) assert(article.groups == new_article.groups)
def encode(self, content=None, *args, **kwargs): """ Takes a specified path (and or file) and creates par2 files based on it. If this function is successful, it returns a set of NNTPBinaryContent() objects identifying the PAR2 files generated based on the passed in content. The function returns None if it fails in any way. """ if content is not None: self.add(content) # Some simple error checking to save from doing to much here if len(self) == 0: return None if not self.can_exe(self._par): return None for target in self.archive: # Base entry on first file in the list name = basename(target) target_dir = dirname(target) # tmp_path, tmp_file = self.mkstemp(content=name, suffix='.par2') # Initialize our command execute = [ # Our Executable PAR Application self._par, # Use Create Flag 'create', ] # Handle PAR Block Size if self.block_size: execute.append('-s%s' % self.block_size) if self.recovery_percent: execute.append('-r%d' % self.recovery_percent) if self.cpu_cores is not None and self.cpu_cores > 1: # to repair concurrently - uses multiple threads execute.append('-t+') # Stop Switch Parsing execute.append('--') # Now add our target (we can only do one at a time which i why we # loop) and run our setups execute.append(target) found_set = sortedset() with pushd(target_dir): # Create our SubProcess Instance sp = SubProcess(execute) # Start our execution now sp.start() while not sp.is_complete(timeout=1.5): found_set = self.watch_dir( target_dir, prefix=name, regex=PAR_PART_RE, ignore=found_set, ) # Handle remaining content found_set = self.watch_dir( target_dir, prefix=name, regex=PAR_PART_RE, ignore=found_set, seconds=-1, ) # Let the caller know our status if not sp.successful(): # We're done; we failed return None if not len(found_set): # We're done; we failed return None # Create a resultset results = sortedset(key=lambda x: x.key()) part = 0 # iterate through our found_set and create NNTPBinaryContent() # objects from them. for path in found_set: # Iterate over our found files and determine their part # information part += 1 content = NNTPBinaryContent( path, part=part, total_parts=len(found_set), ) # Loaded data is by default detached; we want to attach it content.attach() # Add our attached content to our results results.add(content) # Clean our are list of objects to archive self.clear() # Return our return results
def test_encrytion(self): """ Test te encryption and decryption of data """ # Create our Cryptography Object obj = NNTPCryptography() # We can't save if we haven't created keys yet assert (obj.save() is False) # Generate our keys (prv, pub) = obj.genkeys() # Check that they're stored assert (prv, pub) == obj.keys() # Test small content first content = 'newsreap' # Let's encrypt our content encrypted = obj.encrypt(content) # Decrypt it now: decrypted = obj.decrypt(encrypted) # Test it out assert (str(content) == str(decrypted)) # Note that the Hash value is important as encryption # and decryption will fail otherwise encrypted = obj.encrypt( content, alg=HashType.SHA512, mgf1=HashType.SHA512, ) # Returns None in all cases below because either the alg assert (obj.decrypt( encrypted, alg=HashType.SHA256, mgf1=HashType.SHA512) is None) assert (obj.decrypt( encrypted, alg=HashType.SHA512, mgf1=HashType.SHA256) is None) assert (obj.decrypt(encrypted, alg=HashType.SHA384, mgf1=HashType.SHA1) is None) # However if we use the right hash decrypted = obj.decrypt( encrypted, alg=HashType.SHA512, mgf1=HashType.SHA512, ) # It will succeed again assert (str(content) == str(decrypted)) # Our private Key Location tmp_file = join(self.tmp_dir, 'NNTPCryptography.test_encrytion.tmp') # Let's create a slightly larger file; one we'll need to process # in chunks assert (self.touch(tmp_file, size='128KB', random=True)) # We'll yEnc the file since we can't deal with binary # Create an NNTPContent Object content = NNTPBinaryContent(tmp_file) # We need to iterate over all of our possible compression types # so that we can test that the chunk sizes are valid in all cases # This big O(n2) will test all of our supported operations for alg in CRYPTOGRAPHY_HASH_MAP.keys(): for mgf1 in CRYPTOGRAPHY_HASH_MAP.keys(): # Create our Cryptography Object obj = NNTPCryptography(alg=alg, mgf1=mgf1) # We can't save if we haven't created keys yet assert (obj.save() is False) # Generate our keys (prv, pub) = obj.genkeys() encoder = CodecUU(work_dir=self.test_dir) response = encoder.encode(content) # We should have gotten an ASCII Content Object assert (len(response) > 0) with open(response.filepath, 'rb') as f: # Any chunk size higher then 190 doesn't seem to work for chunk in iter(lambda: f.read(obj.chunk_size()), b''): # Let's encrypt our content encrypted = obj.encrypt(chunk) assert (encrypted is not None) # Decrypt it now: decrypted = obj.decrypt(encrypted) # Test it out assert (str(chunk) == str(decrypted))
def test_general_features(self): """ Detaching makes managing a file no longer managed by this NNTPContent. Test that this works """ # No parameters should create a file aa = NNTPAsciiContent() ba = NNTPBinaryContent() # open a temporary file aa.open() ba.open() # Test Files aa_filepath = aa.filepath ba_filepath = ba.filepath assert(isfile(aa_filepath) is True) assert(isfile(ba_filepath) is True) # Test Length assert(len(aa) == 0) assert(len(ba) == 0) # Test that files are destroyed if the object is del aa del ba # Files are destroyed assert(isfile(aa_filepath) is False) assert(isfile(ba_filepath) is False) # Test some parameters out during initialization aa = NNTPAsciiContent( filepath="ascii.file", part=2, work_dir=self.tmp_dir, ) ba = NNTPBinaryContent( filepath="binary.file", part="10", work_dir=self.tmp_dir, ) # Check our parts assert(aa.part == 2) # Strings are converted okay assert(ba.part == 10) # open a temporary file aa.open() ba.open() # files don't exist yet assert(isfile(join(self.tmp_dir, "binary.file")) is False) assert(isfile(join(self.tmp_dir, "ascii.file")) is False) # Grab a copy of these file paths so we can check them later aa_filepath = aa.filepath ba_filepath = ba.filepath # Save our content aa.save() ba.save() # check that it was created okay assert(isfile(join(self.tmp_dir, "binary.file")) is True) assert(isfile(join(self.tmp_dir, "ascii.file")) is True) # Temporary files are gone (moved from the save() command above) assert(isfile(aa_filepath) is False) assert(isfile(ba_filepath) is False) # They were never the same after the save() assert(aa_filepath != aa.filepath) assert(ba_filepath != ba.filepath) # However after save is called; the filepath is updated to reflect # the proper path; so this is still true assert(isfile(aa.filepath) is True) assert(isfile(ba.filepath) is True) # Even after the objects are gone del aa del ba # Files still exist even after the objects displayed assert(isfile(join(self.tmp_dir, "binary.file")) is True) assert(isfile(join(self.tmp_dir, "ascii.file")) is True) # Cleanup unlink(join(self.tmp_dir, "ascii.file")) unlink(join(self.tmp_dir, "binary.file"))
def decode(self, content=None, *args, **kwargs): """ content must be pointing to a directory containing par files that can be easily retrieved. Alternatively, path can be of type NNTPContent() or a set/list of. An sortedset of NNTPBinaryContent() objects are returned containing any new content that was generated as a result of the par2 call If an error occurs then None is returned. """ if content is not None: self.add(content) # Some simple error checking to save from doing to much here if len(self) == 0: return None if not self.can_exe(self._par): return None # filter our results by indexes indexes = self.__filter_pars(self.archive, indexes=True, volumes=False) # Initialize our command execute = [ # Our Executable PAR Application self._par, # Use Repair 'repair', ] if self.cpu_cores is not None and self.cpu_cores > 1: # to repair concurrently - uses multiple threads execute.append('-t+') # Stop Switch Parsing execute.append('--') results = sortedset(key=lambda x: x.key()) for _path in indexes: # Get the directory the par file resides in par_path = dirname(_path) with pushd(par_path): # create a before snapshot before_snapshot = self.watch_dir( par_path, seconds=-1, ) # Create our SubProcess Instance sp = SubProcess(list(execute) + [basename(_path)]) # Start our execution now sp.start() # Track files after after_snapshot = sortedset() while not sp.is_complete(timeout=1.5): after_snapshot = self.watch_dir( par_path, ignore=after_snapshot, ) # Handle remaining content after_snapshot = self.watch_dir( par_path, ignore=after_snapshot, seconds=-1, ) # Add any new files detected to our result set otherwise we # just return an empty set total_parts = after_snapshot - before_snapshot for no, path in enumerate(total_parts): content = NNTPBinaryContent( path, part=no+1, total_parts=len(total_parts), ) # Loaded data is by default detached; we want to attach it content.attach() # Add our attached content to our results results.add(content) # Let the caller know our status if not sp.successful(): return None # Clean our are list of objects to archive self.clear() return results
def test_decoding_yenc_multi_part(self): """ Test decoding of a yEnc multi-part This test was generated after visiting http://www.yenc.org and finding the examples they provide on their site. Downloaded the following zip file: http://www.yenc.org/yenc2.zip Then extracting it revealed 3 files: - 00000020.ntx This is the yEnc file as it would have been seen after being downloaded from the NNTP server (part 1 of 2) - 00000021.ntx This is the yEnc file as it would have been seen after being downloaded from the NNTP server (part 2 of 2) - joystick.jpg This is what the contents of the file should look like after being decoded (and assembled). This is what we use to test the file against. """ # A simple test for ensuring that the yEnc # library exists; otherwise we want this test # to fail; the below line will handle this for # us; we'll let the test fail on an import error import yenc # Input File encoded_filepath_1 = join(self.var_dir, '00000020.ntx') encoded_filepath_2 = join(self.var_dir, '00000021.ntx') assert isfile(encoded_filepath_1) assert isfile(encoded_filepath_2) # Compare File decoded_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(decoded_filepath) # Python Solution fd1_py = BytesIO() fd2_py = BytesIO() # C Solution fd1_c = BytesIO() fd2_c = BytesIO() # Initialize Codec decoder = CodecYenc(work_dir=self.test_dir) contents_py = [] contents_c = [] # Force to operate in python (manual/slow) mode CodecYenc.FAST_YENC_SUPPORT = False with open(encoded_filepath_1, 'r') as fd_in: contents_py.append(decoder.decode(fd_in)) with open(encoded_filepath_2, 'r') as fd_in: contents_py.append(decoder.decode(fd_in)) for x in contents_py: # Verify our data is good assert x.is_valid() is True # Force to operate with the C extension yEnc # This require the extensions to be installed # on the system CodecYenc.FAST_YENC_SUPPORT = True with open(encoded_filepath_1, 'r') as fd_in: contents_c.append(decoder.decode(fd_in)) with open(encoded_filepath_2, 'r') as fd_in: contents_c.append(decoder.decode(fd_in)) for x in contents_c: # Verify our data is good assert x.is_valid() is True # Confirm that our output from our python implimentation # matches that of our yEnc C version. assert fd1_py.tell() == fd1_c.tell() assert fd2_py.tell() == fd2_c.tell() with open(decoded_filepath, 'r') as fd_in: decoded = fd_in.read() # Assemble (TODO) contents_py.sort() contents_c.sort() content_py = NNTPBinaryContent( filepath=contents_py[0].filename, save_dir=self.out_dir, ) content_c = NNTPBinaryContent( filepath=contents_c[0].filename, save_dir=self.out_dir, ) # append() takes a list or another NNTPContent # and appends it's content to the end of the content content_py.append(contents_py) content_c.append(contents_py) assert len(content_py) == len(decoded) assert len(content_c) == len(decoded) # Compare our processed content with the expected results assert content_py.getvalue() == decoded assert content_c.getvalue() == decoded
def test_binary_article_iterations(self): """ Binary Content can be loaded straight from file and can be processed in a for loop. """ # Create a BytesIO Object bobj = BytesIO() # Fill our BytesIO object with random junk at least # 4x our expected block size for _ in range(4): bobj.write(urandom(BLOCK_SIZE)) # Write just '1' more bytes so we ``overflow`` and require # a 5th query later bobj.write('0') # Content ba = NNTPBinaryContent() # No items means not valid assert(ba.is_valid() is False) assert(ba.load('unknown_file') is False) # a failed load means not valid assert(ba.is_valid() is False) temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp') with open(temp_file, 'wb') as fd: fd.write(bobj.getvalue()) assert(isfile(temp_file) is True) assert(ba.load(temp_file) is True) # Binary Content read by chunk size chunk = 4 for line in ba: if chunk > 0: assert(len(line) == BLOCK_SIZE) else: # 5th query assert(len(line) == 1) chunk -= 1 # We should have performed 5 chunk requests and # -1 more since we decrement the chunk one last time # before we're done assert(chunk == -1) # Confirm our size is reading correctly too assert(len(ba) == (BLOCK_SIZE*4)+1) # Remove article del ba # Files are not attached by default so our temp file # should still exist assert(isfile(temp_file) is True) # We'll create another object ba = NNTPAsciiContent() assert(ba.load(temp_file) is True) # Successfully loaded files are never attached assert(ba.is_attached() is False) # our file still exists of course assert(isfile(temp_file) is True) # we'll detach it ba.detach() # Still all is good assert(isfile(temp_file) is True) # Check that we're no longer attached assert(ba.is_attached() is False) # Now, once we delete our object, the file will be gone for good del ba # it's gone for good assert(isfile(temp_file) is True)
class CodecYenc(CodecBase): def __init__(self, descriptor=None, work_dir=None, linelen=128, *args, **kwargs): super(CodecYenc, self).__init__(descriptor=descriptor, work_dir=work_dir, *args, **kwargs) # Used for internal meta tracking when using the decode() self._meta = {} # Our Binary Object we can reference while we decode # content self.decoded = None # Used for encoding; This defines the maximum number of (encoded) # characters to display per line. self.linelen = linelen def parse_article(self, subject, *args, **kwargs): """ Takes a an article header and returns it's parsed content if it's successful. Otherwise it returns None. """ matched = NZB_SUBJECT_PARSE.match(subject) if matched is None: # subject is not parsable return None results = {} # Trim results if matched.group('desc') is not None: results['desc'] = re.sub('[\s-]+$', '', matched.group('desc')) if matched.group('fname') is not None: results['fname'] = matched.group('fname').strip() # Support conversion of integers for _attr in ['index', 'count', 'yindex', 'ycount', 'size']: if matched.group(_attr) is not None: results[_attr] = int(matched.group(_attr)) return results def encode(self, content, mem_buf=DEFAULT_BUFFER_SIZE): """ Encodes an NNTPContent object passed in """ if isinstance(content, NNTPContent): # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content.filename, part=content.part, total_parts=content.total_parts, sort_no=content.sort_no, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) else: # If we reach here, we presume our content is a filename # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) # Convert our content object into an NNTPContent object content = NNTPContent( filepath=content, work_dir=self.work_dir, ) # yEnc (v1.3) begin fmt_ybegin = '=ybegin part=%d total=%d line=%d size=%d name=%s' % ( content.part, content.total_parts, self.linelen, len(content), content.filename, ) # yEnc part fmt_ypart = '=ypart begin=%d end=%d' % ( content.begin() + 1, content.end(), ) if isinstance(content._parent, NNTPContent): # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s crc32=%s' % ( len(content), content.part, content.crc32(), content._parent.crc32(), ) else: # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s' % ( len(content), content.part, content.crc32(), ) # Write =ybegin line _encoded.write(fmt_ybegin + EOL) # Write =ypart line _encoded.write(fmt_ypart + EOL) if not content.open(): return None # Prepare our result set results = "" # Column is used for decoding column = 0 crc = BIN_MASK # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while True: # Read in our data data = content.stream.read(mem_buf) if not data: # We're done break if FAST_YENC_SUPPORT: try: _results, crc, column = encode_string(data, crc, column) # Append our parsed content onto our ongoing buffer results += _results except YencError as e: logger.error("Failed to encode Yenc for %s." % content) logger.debug('Yenc exception: %s' % (str(e))) return None else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we translate the all of the characters by adding # 42 to their value with the exception of a few special # characters that are explicitly reserved for the yEnc # language (and conflict with the NNTP Server language). # # - next, we need to apply our ENCODE_SPECIAL_MAP to be # sure to handle the characters that are reserved as # special keywords used by both NNTP Servers and the yEnc # protocol itself. # # - finally we want to prevent our string from going on for # to many characters (horizontally). So we need to split # our content up # idx = 0 while idx < len(data): _byte = (ord(data[idx]) + 42) & 0xff if _byte in YENC_ENCODE_ESCAPED_CHARACTERS: _byte = (_byte + 64) & 0xff # Esape Sequence results += '=' # Store our character results += chr(_byte) # Increment Index idx += 1 # Our offset offset = 0 while offset < (len(results) - self.linelen + 1): eol = offset + self.linelen if results[offset:eol][-1] == '=': # Lines can't end with the escape sequence (=). If we get # here then this one did. We just adjust our end-of-line # by 1 and keep moving eol -= 1 _encoded.write(results[offset:eol] + EOL) offset = eol if offset < len(results): results = results[-(len(results) - offset):] else: # reset string results = '' # We're done reading our data content.close() if len(results): # We still have content left in our buffer _encoded.write(results + EOL) # Write footer _encoded.write(fmt_yend + EOL) if _encoded: # close article when complete _encoded.close() # Return our encoded object return _encoded def detect(self, line, relative=True): """ A Simple function that can be used to determine if there is yEnc content on the line being checked. If relative is set to true, we additionally check the line content against content relative to the decoding process (`What are we expecting to have right now?`). For example, the `end` token would be ignored if we haven't received a `begin` first. It returns None if there is no yEnc key line, otherwise it returns a dictionary of the keys and their mapped values. """ yenc_re = YENC_RE.match(line) if not yenc_re: return None # Merge Results f_map = dict((YENC_KEY_MAP[k], v) for k, v in yenc_re.groupdict().iteritems() if v) # Tidy filename (whitespace) if 'name' in f_map: f_map['name'] = basename(f_map['name']).strip() if relative: # detect() relative to what has been decoded if f_map['key'] in self._meta: # We already processed this key return None if f_map['key'] == 'end' and 'begin' not in self._meta: # We can't handle this key return None if f_map['key'] == 'part' and 'begin' not in self._meta: # We can't handle this key return None # Integer types for kw in ['line', 'size', 'total', 'begin', 'end', 'part']: if kw in f_map: try: f_map[kw] = int(f_map[kw]) except (TypeError, ValueError): # Eliminate bad kw del f_map[kw] return f_map def decode(self, stream): """ Decode some data and decode the data to descriptor identified (by the stream) """ # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while self.decode_loop(): # fall_back ptr ptr = stream.tell() # Read in our data data = stream.readline() if not data: # We're done for now return True # Total Line Tracking self._total_lines += 1 # Detect a yEnc line _meta = self.detect(data, relative=False) if _meta is not None: # # We just read a yEnc keyword token such as # begin, part, or end # if _meta['key'] in self._meta: # We already processed this key; uh oh # Fix our stream stream.seek(ptr, SEEK_SET) # Fix our line count self._total_lines -= 1 # We're done break if _meta['key'] == 'end' and \ len(set(('begin', 'part')) - set(self._meta)) == 2: # Why did we get an end before a begin or part? # Just ignore it and keep going continue # store our key self._meta[_meta['key']] = _meta if 'end' in self._meta: # Mark the binary as being valid self.decoded._is_valid = True # We're done! break elif _meta['key'] == 'begin': # Depending on the version of yEnc we're using binary # content starts now; thefore we create our binary # instance now if 'name' not in _meta: # Why did we get a begin before a part # Just ignore it and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', 1) # Create our binary instance self.decoded = NNTPBinaryContent( filepath=_meta['name'], part=self._part, work_dir=self.work_dir, ) elif _meta['key'] == 'part': if 'begin' not in self._meta: # we must have a begin if we have a part # This is a messed up message; treat this # as junk and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', self._part) # Update our Binary File if nessisary self.decoded.part = self._part continue if len(set(('begin', 'part')) - set(self._meta)) == 2: # We haven't found the start yet which means we should just # keep going until we find it continue if FAST_YENC_SUPPORT: try: decoded, self._crc, self._escape = \ decode_string(data, self._crc, self._escape) except YencError: logger.warning( "Yenc corruption detected on line %d." % self._lines, ) # Line Tracking self._lines += 1 # keep storing our data continue else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we need to translate the special keyword tokens # that are used by the yEnc language. We also want to # ignore any trailing white space or new lines. This # occurs by applying our DECODE_SPECIAL_MAP to the line # being processed. # # - finally we translate the remaining characters by taking # away 42 from their value. # decoded = YENC_DECODE_SPECIAL_RE.sub( lambda x: YENC_DECODE_SPECIAL_MAP[x.group()], data, ).translate(YENC42) # CRC Calculations self._calc_crc(decoded) # Line Tracking self._lines += 1 # Track the number of bytes decoded self._decoded += len(decoded) # Write data to out stream self.decoded.write(decoded) if self._max_bytes > 0 and self._decoded >= self._max_bytes: # If we specified a limit and hit it then we're done at # this point. Before we do so; advance to the end of our # stream stream.seek(0, SEEK_END) # We're done break # Reset our meta tracking self._meta = {} # Reset part information self._part = 1 if self.decoded: # close article when complete self.decoded.close() # Return what we do have return self.decoded def reset(self): """ Reset our decoded content """ super(CodecYenc, self).reset() # Tracks part no; defaults to 1 and shifts if it's determined # that we're another part self._part = 1 # Used for internal meta tracking when using the decode() self._meta = {} # Our Binary Object we can reference while we decode # content self.decoded = None def __lt__(self, other): """ Sorts by part number """ return self._part < other._part def __str__(self): """ Return a printable version of the file being read """ # Build a string using the data we know if self.decoded: return str(self.decoded) if 'begin' in self._meta: fname = self._meta.get('name', 'Unknown.File') else: fname = 'Undetermined.File' return '%s' % (fname) def __repr__(self): """ Return a printable object """ return '<CodecYenc lines_processed=%d />' % (self._lines, )
def decode(self, stream): """ Decode some data and decode the data to descriptor identified (by the stream) """ # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while self.decode_loop(): # fall_back ptr ptr = stream.tell() # Read in our data data = stream.readline() if not data: # We're done for now return True # Total Line Tracking self._total_lines += 1 # Detect a yEnc line _meta = self.detect(data, relative=False) if _meta is not None: # # We just read a yEnc keyword token such as # begin, part, or end # if _meta['key'] in self._meta: # We already processed this key; uh oh # Fix our stream stream.seek(ptr, SEEK_SET) # Fix our line count self._total_lines -= 1 # We're done break if _meta['key'] == 'end' and \ len(set(('begin', 'part')) - set(self._meta)) == 2: # Why did we get an end before a begin or part? # Just ignore it and keep going continue # store our key self._meta[_meta['key']] = _meta if 'end' in self._meta: # Mark the binary as being valid self.decoded._is_valid = True # We're done! break elif _meta['key'] == 'begin': # Depending on the version of yEnc we're using binary # content starts now; thefore we create our binary # instance now if 'name' not in _meta: # Why did we get a begin before a part # Just ignore it and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', 1) # Create our binary instance self.decoded = NNTPBinaryContent( filepath=_meta['name'], part=self._part, work_dir=self.work_dir, ) elif _meta['key'] == 'part': if 'begin' not in self._meta: # we must have a begin if we have a part # This is a messed up message; treat this # as junk and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', self._part) # Update our Binary File if nessisary self.decoded.part = self._part continue if len(set(('begin', 'part')) - set(self._meta)) == 2: # We haven't found the start yet which means we should just # keep going until we find it continue if FAST_YENC_SUPPORT: try: decoded, self._crc, self._escape = \ decode_string(data, self._crc, self._escape) except YencError: logger.warning( "Yenc corruption detected on line %d." % self._lines, ) # Line Tracking self._lines += 1 # keep storing our data continue else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we need to translate the special keyword tokens # that are used by the yEnc language. We also want to # ignore any trailing white space or new lines. This # occurs by applying our DECODE_SPECIAL_MAP to the line # being processed. # # - finally we translate the remaining characters by taking # away 42 from their value. # decoded = YENC_DECODE_SPECIAL_RE.sub( lambda x: YENC_DECODE_SPECIAL_MAP[x.group()], data, ).translate(YENC42) # CRC Calculations self._calc_crc(decoded) # Line Tracking self._lines += 1 # Track the number of bytes decoded self._decoded += len(decoded) # Write data to out stream self.decoded.write(decoded) if self._max_bytes > 0 and self._decoded >= self._max_bytes: # If we specified a limit and hit it then we're done at # this point. Before we do so; advance to the end of our # stream stream.seek(0, SEEK_END) # We're done break # Reset our meta tracking self._meta = {} # Reset part information self._part = 1 if self.decoded: # close article when complete self.decoded.close() # Return what we do have return self.decoded
class CodecYenc(CodecBase): def __init__(self, descriptor=None, work_dir=None, linelen=128, *args, **kwargs): super(CodecYenc, self).__init__( descriptor=descriptor, work_dir=work_dir, *args, **kwargs) # Used for internal meta tracking when using the decode() self._meta = {} # Our Binary Object we can reference while we decode # content self.decoded = None # Used for encoding; This defines the maximum number of (encoded) # characters to display per line. self.linelen = linelen def parse_article(self, subject, *args, **kwargs): """ Takes a an article header and returns it's parsed content if it's successful. Otherwise it returns None. """ matched = NZB_SUBJECT_PARSE.match(subject) if matched is None: # subject is not parsable return None results = {} # Trim results if matched.group('desc') is not None: results['desc'] = re.sub('[\s-]+$', '', matched.group('desc')) if matched.group('fname') is not None: results['fname'] = matched.group('fname').strip() # Support conversion of integers for _attr in ['index', 'count', 'yindex', 'ycount', 'size']: if matched.group(_attr) is not None: results[_attr] = int(matched.group(_attr)) return results def encode(self, content, mem_buf=DEFAULT_BUFFER_SIZE): """ Encodes an NNTPContent object passed in """ if isinstance(content, NNTPContent): # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content.filename, part=content.part, total_parts=content.total_parts, sort_no=content.sort_no, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) else: # If we reach here, we presume our content is a filename # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) # Convert our content object into an NNTPContent object content = NNTPContent( filepath=content, work_dir=self.work_dir, ) # yEnc (v1.3) begin fmt_ybegin = '=ybegin part=%d total=%d line=%d size=%d name=%s' % ( content.part, content.total_parts, self.linelen, len(content), content.filename, ) # yEnc part fmt_ypart = '=ypart begin=%d end=%d' % ( content.begin() + 1, content.end(), ) if isinstance(content._parent, NNTPContent): # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s crc32=%s' % ( len(content), content.part, content.crc32(), content._parent.crc32(), ) else: # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s' % ( len(content), content.part, content.crc32(), ) # Write =ybegin line _encoded.write(fmt_ybegin + EOL) # Write =ypart line _encoded.write(fmt_ypart + EOL) if not content.open(): return None # Prepare our result set results = "" # Column is used for decoding column = 0 crc = BIN_MASK # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while True: # Read in our data data = content.stream.read(mem_buf) if not data: # We're done break if FAST_YENC_SUPPORT: try: _results, crc, column = encode_string(data, crc, column) # Append our parsed content onto our ongoing buffer results += _results except YencError as e: logger.error("Failed to encode Yenc for %s." % content) logger.debug('Yenc exception: %s' % (str(e))) return None else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we translate the all of the characters by adding # 42 to their value with the exception of a few special # characters that are explicitly reserved for the yEnc # language (and conflict with the NNTP Server language). # # - next, we need to apply our ENCODE_SPECIAL_MAP to be # sure to handle the characters that are reserved as # special keywords used by both NNTP Servers and the yEnc # protocol itself. # # - finally we want to prevent our string from going on for # to many characters (horizontally). So we need to split # our content up # idx = 0 while idx < len(data): _byte = (ord(data[idx]) + 42) & 0xff if _byte in YENC_ENCODE_ESCAPED_CHARACTERS: _byte = (_byte + 64) & 0xff # Esape Sequence results += '=' # Store our character results += chr(_byte) # Increment Index idx += 1 # Our offset offset = 0 while offset < (len(results)-self.linelen+1): eol = offset+self.linelen if results[offset:eol][-1] == '=': # Lines can't end with the escape sequence (=). If we get # here then this one did. We just adjust our end-of-line # by 1 and keep moving eol -= 1 _encoded.write(results[offset:eol] + EOL) offset = eol if offset < len(results): results = results[-(len(results) - offset):] else: # reset string results = '' # We're done reading our data content.close() if len(results): # We still have content left in our buffer _encoded.write(results + EOL) # Write footer _encoded.write(fmt_yend + EOL) if _encoded: # close article when complete _encoded.close() # Return our encoded object return _encoded def detect(self, line, relative=True): """ A Simple function that can be used to determine if there is yEnc content on the line being checked. If relative is set to true, we additionally check the line content against content relative to the decoding process (`What are we expecting to have right now?`). For example, the `end` token would be ignored if we haven't received a `begin` first. It returns None if there is no yEnc key line, otherwise it returns a dictionary of the keys and their mapped values. """ yenc_re = YENC_RE.match(line) if not yenc_re: return None # Merge Results f_map = dict((YENC_KEY_MAP[k], v) for k, v in yenc_re.groupdict().iteritems() if v) # Tidy filename (whitespace) if 'name' in f_map: f_map['name'] = basename(f_map['name']).strip() if relative: # detect() relative to what has been decoded if f_map['key'] in self._meta: # We already processed this key return None if f_map['key'] == 'end' and 'begin' not in self._meta: # We can't handle this key return None if f_map['key'] == 'part' and 'begin' not in self._meta: # We can't handle this key return None # Integer types for kw in ['line', 'size', 'total', 'begin', 'end', 'part']: if kw in f_map: try: f_map[kw] = int(f_map[kw]) except (TypeError, ValueError): # Eliminate bad kw del f_map[kw] return f_map def decode(self, stream): """ Decode some data and decode the data to descriptor identified (by the stream) """ # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while self.decode_loop(): # fall_back ptr ptr = stream.tell() # Read in our data data = stream.readline() if not data: # We're done for now return True # Total Line Tracking self._total_lines += 1 # Detect a yEnc line _meta = self.detect(data, relative=False) if _meta is not None: # # We just read a yEnc keyword token such as # begin, part, or end # if _meta['key'] in self._meta: # We already processed this key; uh oh # Fix our stream stream.seek(ptr, SEEK_SET) # Fix our line count self._total_lines -= 1 # We're done break if _meta['key'] == 'end' and \ len(set(('begin', 'part')) - set(self._meta)) == 2: # Why did we get an end before a begin or part? # Just ignore it and keep going continue # store our key self._meta[_meta['key']] = _meta if 'end' in self._meta: # Mark the binary as being valid self.decoded._is_valid = True # We're done! break elif _meta['key'] == 'begin': # Depending on the version of yEnc we're using binary # content starts now; thefore we create our binary # instance now if 'name' not in _meta: # Why did we get a begin before a part # Just ignore it and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', 1) # Create our binary instance self.decoded = NNTPBinaryContent( filepath=_meta['name'], part=self._part, work_dir=self.work_dir, ) elif _meta['key'] == 'part': if 'begin' not in self._meta: # we must have a begin if we have a part # This is a messed up message; treat this # as junk and keep going continue # Save part no globally if present (for sorting) self._part = _meta.get('part', self._part) # Update our Binary File if nessisary self.decoded.part = self._part continue if len(set(('begin', 'part')) - set(self._meta)) == 2: # We haven't found the start yet which means we should just # keep going until we find it continue if FAST_YENC_SUPPORT: try: decoded, self._crc, self._escape = \ decode_string(data, self._crc, self._escape) except YencError: logger.warning( "Yenc corruption detected on line %d." % self._lines, ) # Line Tracking self._lines += 1 # keep storing our data continue else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we need to translate the special keyword tokens # that are used by the yEnc language. We also want to # ignore any trailing white space or new lines. This # occurs by applying our DECODE_SPECIAL_MAP to the line # being processed. # # - finally we translate the remaining characters by taking # away 42 from their value. # decoded = YENC_DECODE_SPECIAL_RE.sub( lambda x: YENC_DECODE_SPECIAL_MAP[x.group()], data, ).translate(YENC42) # CRC Calculations self._calc_crc(decoded) # Line Tracking self._lines += 1 # Track the number of bytes decoded self._decoded += len(decoded) # Write data to out stream self.decoded.write(decoded) if self._max_bytes > 0 and self._decoded >= self._max_bytes: # If we specified a limit and hit it then we're done at # this point. Before we do so; advance to the end of our # stream stream.seek(0, SEEK_END) # We're done break # Reset our meta tracking self._meta = {} # Reset part information self._part = 1 if self.decoded: # close article when complete self.decoded.close() # Return what we do have return self.decoded def reset(self): """ Reset our decoded content """ super(CodecYenc, self).reset() # Tracks part no; defaults to 1 and shifts if it's determined # that we're another part self._part = 1 # Used for internal meta tracking when using the decode() self._meta = {} # Our Binary Object we can reference while we decode # content self.decoded = None def __lt__(self, other): """ Sorts by part number """ return self._part < other._part def __str__(self): """ Return a printable version of the file being read """ # Build a string using the data we know if self.decoded: return str(self.decoded) if 'begin' in self._meta: fname = self._meta.get('name', 'Unknown.File') else: fname = 'Undetermined.File' return '%s' % ( fname ) def __repr__(self): """ Return a printable object """ return '<CodecYenc lines_processed=%d />' % ( self._lines, )
def test_mime(self): """ Tests mime types on different types of content """ ac = NNTPAsciiContent() bc = NNTPBinaryContent() # Mime Types aren't detectable with new files assert (ac.mime().type() == 'application/x-empty') assert (bc.mime().type() == 'application/x-empty') # Open up a jpeg bc = NNTPBinaryContent(join(self.var_dir, 'joystick.jpg')) assert (bc.mime().type() == 'image/jpeg') # Make a copy of our image as a different name assert (bc.save(join(self.tmp_dir, 'weird.name'), copy=True) is True) # We still know it's an image assert (bc.mime().type() == 'image/jpeg') # Create ourselves a new file tmp_file = join(self.tmp_dir, 'test.rar') assert (self.touch(tmp_file, size='2KB', random=True) is True) bc = NNTPBinaryContent(tmp_file) # Now we can guess the name from it's file type assert (bc.mime().type() == 'application/x-rar-compressed')
def test_yenc_multi_message(self): """ Tests the handling of a yenc multi-message """ # Create a non-secure connection sock = NNTPConnection( host=self.nttp_ipaddr, port=self.nntp_portno, username='******', password='******', secure=False, join_group=True, ) assert sock.connect() is True assert sock._iostream == NNTPIOStream.RFC3977_GZIP articles = sortedset(key=lambda x: x.key()) # We intententionally fetch the content out of order # ideally we'd want 20 followed by 21 articles.add( sock.get(id='21', work_dir=self.tmp_dir, group=self.common_group)) assert sock.group_name == self.common_group articles.add(sock.get(id='20', work_dir=self.tmp_dir)) assert sock.group_name == self.common_group newfile = NNTPBinaryContent( # This looks rough; # we're basically looking at the first article stored (since our # set is sorted, and then we're looking at the first content entry # TODO: update the article function so it's much easier to get # an iterator to decoded list filepath=iter(iter(articles).next().decoded).next().filename, work_dir=self.tmp_dir, ) for article in articles: assert isinstance(article, NNTPArticle) is True assert len(article.decoded) == 1 assert isinstance(iter(article.decoded).next(), NNTPBinaryContent) assert iter(article.decoded).next().is_valid() is True # Build on new file newfile.append(iter(article.decoded).next()) # keep open file count low iter(article.decoded).next().close() # Compare File decoded_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(decoded_filepath) with open(decoded_filepath, 'r') as fd_in: decoded = fd_in.read() assert isfile(newfile.filepath) is True old_filepath = newfile.filepath newfile.save() new_filepath = newfile.filepath assert old_filepath != new_filepath assert isfile(old_filepath) is False assert isfile(new_filepath) is True assert decoded == newfile.getvalue() # Close up our socket sock.close() while len(articles): article = articles.pop() # length hasn't changed assert len(article.decoded) == 1 old_filepath = iter(article.decoded).next().filepath assert isfile(old_filepath) is True # If we remove the article, we automatically destroy # all associated decoded with it (that aren't detached) del article # Since there is only 1 attachment per article in this test # we can see that the file is now gone assert isfile(old_filepath) is False # Remove the file del newfile # We called save() so the file has been detached and will still exist! assert isfile(new_filepath) is True # cleanup our file unlink(new_filepath)
def encode(self, content=None, name=None, *args, **kwargs): """ Takes a specified path (and or file) and compresses it. If this function is successful, it returns a set of NNTPBinaryContent() objects that are 'not' detached. The function returns None if it fails in any way """ if content is not None: self.add(content) # Some simple error checking to save from doing to much here if len(self) == 0: return None if not self.can_exe(self._bin): return None if not name: name = self.name if not name: name = random_str() tmp_path, tmp_file = self.mkstemp(content=name, suffix='.7z') # Initialize our command execute = [ # Our Executable 7-Zip Application self._bin, # Use Add Flag 'a', # Default mode is 7-Zip '-t7z', ] # Password Protection if self.password is not None: execute.append('-p%s' % self.password) # Handle Compression Level if self.level is CompressionLevel.Maximum: execute.append('-mx9') elif self.level is CompressionLevel.Average: execute.append('-mx5') elif self.level is CompressionLevel.Minimum: execute.append('-mx1') # Don't prompt for anything execute.append('-y') if not name: name = splitext(basename(tmp_file))[0] # Handle 7Z Volume Splitting if self.volume_size: execute.append('-v%sb' % self.volume_size) if self.cpu_cores is not None and self.cpu_cores > 1: # create archive using multiple threads execute.append('-mmt%d' % self.cpu_cores) # Stop Switch Parsing execute.append('--') # Specify the Destination Path execute.append(tmp_file) # Add all of our paths now for _path in self: execute.append(_path) # Create our SubProcess Instance sp = SubProcess(execute) # Start our execution now sp.start() found_set = None while not sp.is_complete(timeout=1.5): found_set = self.watch_dir( tmp_path, prefix=name, ignore=found_set, ) # Handle remaining content found_set = self.watch_dir( tmp_path, prefix=name, ignore=found_set, seconds=-1, ) # Let the caller know our status if not sp.successful(): # Cleanup Temporary Path rm(tmp_path) return None if not len(found_set): return None # Create a resultset results = sortedset(key=lambda x: x.key()) # iterate through our found_set and create NNTPBinaryContent() # objects from them. part = 0 for path in found_set: # Iterate over our found files and determine their part # information _re_results = SEVEN_ZIP_PART_RE.match(path) if _re_results: if _re_results.group('part') is not None: part = int(_re_results.group('part')) elif _re_results.group('part0') is not None: part = int(_re_results.group('part0')) else: part += 1 else: part += 1 content = NNTPBinaryContent( path, part=part, total_parts=len(found_set), ) # Loaded data is by default detached; we want to attach it content.attach() # Add our attached content to our results results.add(content) # Clean our are list of objects to archive self.clear() # Return our return results
def decode(self, content=None, name=None, password=None, *args, **kwargs): """ content must be pointing to a directory containing rar files that can be easily sorted on. Alternatively, path can be of type NNTPContent() or a set/list of. If no password is specified, then the password configuration loaded into the class is used instead. An NNTPBinaryContent() object containing the contents of the package within a sortedset() object. All decoded() functions have to return a resultset() to be consistent with one another. """ if content is not None: self.add(content) # Some simple error checking to save from doing to much here if len(self) == 0: return None if not self.can_exe(self._unrar): return None if not password: password = self.password # Initialize our command execute = [ # Our Executable RAR Application self._unrar, # Use Add Flag 'x', # Assume Yes '-y', ] # Password Protection if password is not None: execute.append('-p%s' % password) else: # Do not prompt for password execute.append('-p-') if self.keep_broken: # Keep Broken Flag execute.append('-kb') if self.overwrite: # Overwrite files execute.append('-o+') else: # Don't overwrite files execute.append('-o-') if self.freshen: # Freshen files execute.append('-f') # Stop Switch Parsing execute.append('--') if not name: name = self.name if not name: name = random_str() for _path in self: # Temporary Path tmp_path, _ = self.mkstemp(content=name) with pushd(tmp_path): # Create our SubProcess Instance sp = SubProcess(list(execute) + [_path]) # Start our execution now sp.start() found_set = None while not sp.is_complete(timeout=1.5): found_set = self.watch_dir( tmp_path, ignore=found_set, ) # Handle remaining content found_set = self.watch_dir( tmp_path, ignore=found_set, seconds=-1, ) # Let the caller know our status if not sp.successful(): # Cleanup Temporary Path rm(tmp_path) return None if not len(found_set): logger.warning( 'RAR archive (%s) contained no content.' % basename(_path), ) # Clean our are list of objects to archive self.clear() # Return path containing unrar'ed content results = NNTPBinaryContent(tmp_path) # We intentionally attach it's content results.attach() # Create a sortedset to return _resultset = sortedset(key=lambda x: x.key()) _resultset.add(results) # Return our content return _resultset
def encode(self, content=None, name=None, *args, **kwargs): """ Takes a specified path (and or file) and compresses it. If this function is successful, it returns a set of NNTPBinaryContent() objects that are 'not' detached. The function returns None if it fails in any way """ if content is not None: self.add(content) # Some simple error checking to save from doing to much here if len(self) == 0: return None if not self.can_exe(self._rar): return None if not name: name = self.name if not name: name = random_str() tmp_path, tmp_file = self.mkstemp(content=name, suffix='.rar') # Initialize our command execute = [ # Our Executable RAR Application self._rar, # Use Add Flag 'a', ] # Password Protection if self.password is not None: execute.append('-p%s' % self.password) # Handle Compression Level if self.level is CompressionLevel.Maximum: execute.append('-m5') elif self.level is CompressionLevel.Average: execute.append('-m3') elif self.level is CompressionLevel.Minimum: execute.append('-m0') # Exclude base directory from archive execute.append('-ep1') if not name: name = splitext(basename(tmp_file))[0] # Now place content within directory identifed by it's name execute.append('-ap%s' % name) # Handle RAR Volume Splitting if self.volume_size: execute.append('-v%sb' % self.volume_size) # Handle Recovery Record if self.recovery_record is not None: execute.append('-rr%s' % self.recovery_record) if self.cpu_cores is not None and self.cpu_cores > 1: # create archive using multiple threads execute.append('-mt%d' % self.cpu_cores) # Stop Switch Parsing execute.append('--') # Specify the Destination Path execute.append(tmp_file) # Add all of our paths now for _path in self: execute.append(_path) # Create our SubProcess Instance sp = SubProcess(execute) # Start our execution now sp.start() found_set = None while not sp.is_complete(timeout=1.5): found_set = self.watch_dir( tmp_path, prefix=name, ignore=found_set, ) # Handle remaining content found_set = self.watch_dir( tmp_path, prefix=name, ignore=found_set, seconds=-1, ) # Let the caller know our status if not sp.successful(): # Cleanup Temporary Path rm(tmp_path) return None if not len(found_set): return None # Create a resultset results = sortedset(key=lambda x: x.key()) # iterate through our found_set and create NNTPBinaryContent() # objects from them. part = 0 for path in found_set: # Iterate over our found files and determine their part # information _re_results = RAR_PART_RE.match(path) if _re_results: if _re_results.group('part') is not None: part = int(_re_results.group('part')) else: part += 1 else: part += 1 content = NNTPBinaryContent( path, part=part, total_parts=len(found_set), ) # Loaded data is by default detached; we want to attach it content.attach() # Add our attached content to our results results.add(content) # Clean our are list of objects to archive self.clear() # Return our return results
def test_yenc_multi_message(self): """ Tests the handling of a yenc multi-message """ # Create a non-secure connection sock = NNTPConnection( host=self.nttp_ipaddr, port=self.nntp_portno, username='******', password='******', secure=False, join_group=True, ) assert sock.connect() is True assert sock._iostream == NNTPIOStream.RFC3977_GZIP articles = sortedset(key=lambda x: x.key()) # We intententionally fetch the content out of order # ideally we'd want 20 followed by 21 articles.add(sock.get(id='21', work_dir=self.tmp_dir, group=self.common_group)) assert sock.group_name == self.common_group articles.add(sock.get(id='20', work_dir=self.tmp_dir)) assert sock.group_name == self.common_group newfile = NNTPBinaryContent( # This looks rough; # we're basically looking at the first article stored (since our # set is sorted, and then we're looking at the first content entry # TODO: update the article function so it's much easier to get # an iterator to decoded list filepath=iter(iter(articles).next().decoded).next().filename, work_dir=self.tmp_dir, ) for article in articles: assert isinstance(article, NNTPArticle) is True assert len(article.decoded) == 1 assert isinstance(iter(article.decoded).next(), NNTPBinaryContent) assert iter(article.decoded).next().is_valid() is True # Build on new file newfile.append(iter(article.decoded).next()) # keep open file count low iter(article.decoded).next().close() # Compare File decoded_filepath = join(self.var_dir, 'joystick.jpg') assert isfile(decoded_filepath) with open(decoded_filepath, 'r') as fd_in: decoded = fd_in.read() assert isfile(newfile.filepath) is True old_filepath = newfile.filepath newfile.save() new_filepath = newfile.filepath assert old_filepath != new_filepath assert isfile(old_filepath) is False assert isfile(new_filepath) is True assert decoded == newfile.getvalue() # Close up our socket sock.close() while len(articles): article = articles.pop() # length hasn't changed assert len(article.decoded) == 1 old_filepath = iter(article.decoded).next().filepath assert isfile(old_filepath) is True # If we remove the article, we automatically destroy # all associated decoded with it (that aren't detached) del article # Since there is only 1 attachment per article in this test # we can see that the file is now gone assert isfile(old_filepath) is False # Remove the file del newfile # We called save() so the file has been detached and will still exist! assert isfile(new_filepath) is True # cleanup our file unlink(new_filepath)