def test_mime(self): """ Tests mime types on different types of content """ ac = NNTPAsciiContent() bc = NNTPBinaryContent() # Mime Types aren't detectable with new files assert (ac.mime().type() == 'application/x-empty') assert (bc.mime().type() == 'application/x-empty') # Open up a jpeg bc = NNTPBinaryContent(join(self.var_dir, 'joystick.jpg')) assert (bc.mime().type() == 'image/jpeg') # Make a copy of our image as a different name assert (bc.save(join(self.tmp_dir, 'weird.name'), copy=True) is True) # We still know it's an image assert (bc.mime().type() == 'image/jpeg') # Create ourselves a new file tmp_file = join(self.tmp_dir, 'test.rar') assert (self.touch(tmp_file, size='2KB', random=True) is True) bc = NNTPBinaryContent(tmp_file) # Now we can guess the name from it's file type assert (bc.mime().type() == 'application/x-rar-compressed')
def test_mime(self): """ Tests mime types on different types of content """ ac = NNTPAsciiContent() bc = NNTPBinaryContent() # Mime Types aren't detectable with new files assert(ac.mime().type() == 'application/x-empty') assert(bc.mime().type() == 'application/x-empty') # Open up a jpeg bc = NNTPBinaryContent(join(self.var_dir, 'joystick.jpg')) assert(bc.mime().type() == 'image/jpeg') # Make a copy of our image as a different name assert(bc.save(join(self.tmp_dir, 'weird.name'), copy=True) is True) # We still know it's an image assert(bc.mime().type() == 'image/jpeg') # Create ourselves a new file tmp_file = join(self.tmp_dir, 'test.rar') assert(self.touch(tmp_file, size='2KB', random=True) is True) bc = NNTPBinaryContent(tmp_file) # Now we can guess the name from it's file type assert(bc.mime().type() == 'application/x-rar-compressed')
def reset(self): """ Reset our decoded content """ super(CodecAscii, self).reset() # Reset our decoded content self.decoded = NNTPAsciiContent( filepath='.message', work_dir=self.work_dir, )
def __init__(self, descriptor=None, work_dir=None, *args, **kwargs): super(CodecAscii, self).__init__(descriptor=descriptor, work_dir=work_dir, *args, **kwargs) # Our Ascii Object we can reference while we store our # text content self.decoded = NNTPAsciiContent( filepath='.message', work_dir=self.work_dir, )
def test_ascii_article_iterations(self): """ Ascii Content can be loaded straight from file and can be processed in a for loop. """ # Content aa = NNTPAsciiContent() assert (aa.load('unknown_file') is False) temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp') with open(temp_file, 'wb') as fd: fd.write('Line 1\n') fd.write('Line 2\n') assert (isfile(temp_file) is True) assert (aa.load(temp_file) is True) # Successfully loaded files area always valid assert (aa.is_valid() is True) # Ascii Content read line by line lineno = 1 for line in aa: assert (line == 'Line %d\n' % (lineno)) lineno += 1 # Remove article del aa # Files are not attached by default so our temp file # should still exist assert (isfile(temp_file) is True) # We'll create another object aa = NNTPAsciiContent() assert (aa.load(temp_file) is True) # Successfully loaded files are never attached assert (aa.is_attached() is False) # our file still exists of course assert (isfile(temp_file) is True) del aa assert (isfile(temp_file) is True)
def test_ascii_article_iterations(self): """ Ascii Content can be loaded straight from file and can be processed in a for loop. """ # Content aa = NNTPAsciiContent() assert(aa.load('unknown_file') is False) temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp') with open(temp_file, 'wb') as fd: fd.write('Line 1\n') fd.write('Line 2\n') assert(isfile(temp_file) is True) assert(aa.load(temp_file) is True) # Successfully loaded files area always valid assert(aa.is_valid() is True) # Ascii Content read line by line lineno = 1 for line in aa: assert(line == 'Line %d\n' % (lineno)) lineno += 1 # Remove article del aa # Files are not attached by default so our temp file # should still exist assert(isfile(temp_file) is True) # We'll create another object aa = NNTPAsciiContent() assert(aa.load(temp_file) is True) # Successfully loaded files are never attached assert(aa.is_attached() is False) # our file still exists of course assert(isfile(temp_file) is True) del aa assert(isfile(temp_file) is True)
class CodecAscii(CodecBase): """ This is the codec used to store general content parsed that is not encoded on an NNTP Server. It doesn't do much but store/track ascii data """ def __init__(self, descriptor=None, work_dir=None, *args, **kwargs): super(CodecAscii, self).__init__(descriptor=descriptor, work_dir=work_dir, *args, **kwargs) # Our Ascii Object we can reference while we store our # text content self.decoded = NNTPAsciiContent( filepath='.message', work_dir=self.work_dir, ) def detect(self, line, relative=True): """ A Simple function that can be used to determine if there is ascii on the line. It returns None if there is no ascii characters on the line otherwise it returns an empty dictionary since there are no meta-keys to extract # from a common line """ is_binary = lambda bytes: bool(bytes.translate(None, line)) if is_binary: # We're dealing with binary data return None # We always match this type, but we also always return # an empty dictionary return {} def decode(self, stream): """ Decode body decoding always stops at the end of the line. """ # Read in our data data = stream.readline() if not data: # We're Done; returns the number of bytes decoded return self._decoded # Convert lines to separated by cr & lf decoded = data.rstrip() + EOL # Line Tracking self._lines += 1 # Track the number of bytes decoded self._decoded += len(decoded) # Write data to out stream self.decoded.write(decoded) # Returns the number of bytes decoded return self._decoded def reset(self): """ Reset our decoded content """ super(CodecAscii, self).reset() # Reset our decoded content self.decoded = NNTPAsciiContent( filepath='.message', work_dir=self.work_dir, ) def __str__(self): """ Return a printable version of the codec """ return repr(self) def __repr__(self): """ Return a printable object """ return '<CodecAscii lines_processed=%d />' % (self._lines, )
def test_general_features(self): """ Detaching makes managing a file no longer managed by this NNTPContent. Test that this works """ # No parameters should create a file aa = NNTPAsciiContent() ba = NNTPBinaryContent() # open a temporary file aa.open() ba.open() # Test Files aa_filepath = aa.filepath ba_filepath = ba.filepath assert (isfile(aa_filepath) is True) assert (isfile(ba_filepath) is True) # Test Length assert (len(aa) == 0) assert (len(ba) == 0) # Test that files are destroyed if the object is del aa del ba # Files are destroyed assert (isfile(aa_filepath) is False) assert (isfile(ba_filepath) is False) # Test some parameters out during initialization aa = NNTPAsciiContent( filepath="ascii.file", part=2, work_dir=self.tmp_dir, ) ba = NNTPBinaryContent( filepath="binary.file", part="10", work_dir=self.tmp_dir, ) # Check our parts assert (aa.part == 2) # Strings are converted okay assert (ba.part == 10) # open a temporary file aa.open() ba.open() # files don't exist yet assert (isfile(join(self.tmp_dir, "binary.file")) is False) assert (isfile(join(self.tmp_dir, "ascii.file")) is False) # Grab a copy of these file paths so we can check them later aa_filepath = aa.filepath ba_filepath = ba.filepath # Save our content aa.save() ba.save() # check that it was created okay assert (isfile(join(self.tmp_dir, "binary.file")) is True) assert (isfile(join(self.tmp_dir, "ascii.file")) is True) # Temporary files are gone (moved from the save() command above) assert (isfile(aa_filepath) is False) assert (isfile(ba_filepath) is False) # They were never the same after the save() assert (aa_filepath != aa.filepath) assert (ba_filepath != ba.filepath) # However after save is called; the filepath is updated to reflect # the proper path; so this is still true assert (isfile(aa.filepath) is True) assert (isfile(ba.filepath) is True) # Even after the objects are gone del aa del ba # Files still exist even after the objects displayed assert (isfile(join(self.tmp_dir, "binary.file")) is True) assert (isfile(join(self.tmp_dir, "ascii.file")) is True) # Cleanup unlink(join(self.tmp_dir, "ascii.file")) unlink(join(self.tmp_dir, "binary.file"))
def test_binary_article_iterations(self): """ Binary Content can be loaded straight from file and can be processed in a for loop. """ # Create a BytesIO Object bobj = BytesIO() # Fill our BytesIO object with random junk at least # 4x our expected block size for _ in range(4): bobj.write(urandom(BLOCK_SIZE)) # Write just '1' more bytes so we ``overflow`` and require # a 5th query later bobj.write('0') # Content ba = NNTPBinaryContent() # No items means not valid assert (ba.is_valid() is False) assert (ba.load('unknown_file') is False) # a failed load means not valid assert (ba.is_valid() is False) temp_file = join(self.tmp_dir, 'NNTPContent_Test-test_iterations.tmp') with open(temp_file, 'wb') as fd: fd.write(bobj.getvalue()) assert (isfile(temp_file) is True) assert (ba.load(temp_file) is True) # Binary Content read by chunk size chunk = 4 for line in ba: if chunk > 0: assert (len(line) == BLOCK_SIZE) else: # 5th query assert (len(line) == 1) chunk -= 1 # We should have performed 5 chunk requests and # -1 more since we decrement the chunk one last time # before we're done assert (chunk == -1) # Confirm our size is reading correctly too assert (len(ba) == (BLOCK_SIZE * 4) + 1) # Remove article del ba # Files are not attached by default so our temp file # should still exist assert (isfile(temp_file) is True) # We'll create another object ba = NNTPAsciiContent() assert (ba.load(temp_file) is True) # Successfully loaded files are never attached assert (ba.is_attached() is False) # our file still exists of course assert (isfile(temp_file) is True) # we'll detach it ba.detach() # Still all is good assert (isfile(temp_file) is True) # Check that we're no longer attached assert (ba.is_attached() is False) # Now, once we delete our object, the file will be gone for good del ba # it's gone for good assert (isfile(temp_file) is True)
def test_general_features(self): """ Detaching makes managing a file no longer managed by this NNTPContent. Test that this works """ # No parameters should create a file aa = NNTPAsciiContent() ba = NNTPBinaryContent() # open a temporary file aa.open() ba.open() # Test Files aa_filepath = aa.filepath ba_filepath = ba.filepath assert(isfile(aa_filepath) is True) assert(isfile(ba_filepath) is True) # Test Length assert(len(aa) == 0) assert(len(ba) == 0) # Test that files are destroyed if the object is del aa del ba # Files are destroyed assert(isfile(aa_filepath) is False) assert(isfile(ba_filepath) is False) # Test some parameters out during initialization aa = NNTPAsciiContent( filepath="ascii.file", part=2, work_dir=self.tmp_dir, ) ba = NNTPBinaryContent( filepath="binary.file", part="10", work_dir=self.tmp_dir, ) # Check our parts assert(aa.part == 2) # Strings are converted okay assert(ba.part == 10) # open a temporary file aa.open() ba.open() # files don't exist yet assert(isfile(join(self.tmp_dir, "binary.file")) is False) assert(isfile(join(self.tmp_dir, "ascii.file")) is False) # Grab a copy of these file paths so we can check them later aa_filepath = aa.filepath ba_filepath = ba.filepath # Save our content aa.save() ba.save() # check that it was created okay assert(isfile(join(self.tmp_dir, "binary.file")) is True) assert(isfile(join(self.tmp_dir, "ascii.file")) is True) # Temporary files are gone (moved from the save() command above) assert(isfile(aa_filepath) is False) assert(isfile(ba_filepath) is False) # They were never the same after the save() assert(aa_filepath != aa.filepath) assert(ba_filepath != ba.filepath) # However after save is called; the filepath is updated to reflect # the proper path; so this is still true assert(isfile(aa.filepath) is True) assert(isfile(ba.filepath) is True) # Even after the objects are gone del aa del ba # Files still exist even after the objects displayed assert(isfile(join(self.tmp_dir, "binary.file")) is True) assert(isfile(join(self.tmp_dir, "ascii.file")) is True) # Cleanup unlink(join(self.tmp_dir, "ascii.file")) unlink(join(self.tmp_dir, "binary.file"))
def encode(self, content, mem_buf=DEFAULT_BUFFER_SIZE): """ Encodes an NNTPContent object passed in """ if isinstance(content, NNTPContent): # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content.filename, part=content.part, total_parts=content.total_parts, sort_no=content.sort_no, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) else: # If we reach here, we presume our content is a filename # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) # Convert our content object into an NNTPContent object content = NNTPContent( filepath=content, work_dir=self.work_dir, ) # yEnc (v1.3) begin fmt_ybegin = '=ybegin part=%d total=%d line=%d size=%d name=%s' % ( content.part, content.total_parts, self.linelen, len(content), content.filename, ) # yEnc part fmt_ypart = '=ypart begin=%d end=%d' % ( content.begin() + 1, content.end(), ) if isinstance(content._parent, NNTPContent): # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s crc32=%s' % ( len(content), content.part, content.crc32(), content._parent.crc32(), ) else: # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s' % ( len(content), content.part, content.crc32(), ) # Write =ybegin line _encoded.write(fmt_ybegin + EOL) # Write =ypart line _encoded.write(fmt_ypart + EOL) if not content.open(): return None # Prepare our result set results = "" # Column is used for decoding column = 0 crc = BIN_MASK # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while True: # Read in our data data = content.stream.read(mem_buf) if not data: # We're done break if FAST_YENC_SUPPORT: try: _results, crc, column = encode_string(data, crc, column) # Append our parsed content onto our ongoing buffer results += _results except YencError as e: logger.error("Failed to encode Yenc for %s." % content) logger.debug('Yenc exception: %s' % (str(e))) return None else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we translate the all of the characters by adding # 42 to their value with the exception of a few special # characters that are explicitly reserved for the yEnc # language (and conflict with the NNTP Server language). # # - next, we need to apply our ENCODE_SPECIAL_MAP to be # sure to handle the characters that are reserved as # special keywords used by both NNTP Servers and the yEnc # protocol itself. # # - finally we want to prevent our string from going on for # to many characters (horizontally). So we need to split # our content up # idx = 0 while idx < len(data): _byte = (ord(data[idx]) + 42) & 0xff if _byte in YENC_ENCODE_ESCAPED_CHARACTERS: _byte = (_byte + 64) & 0xff # Esape Sequence results += '=' # Store our character results += chr(_byte) # Increment Index idx += 1 # Our offset offset = 0 while offset < (len(results) - self.linelen + 1): eol = offset + self.linelen if results[offset:eol][-1] == '=': # Lines can't end with the escape sequence (=). If we get # here then this one did. We just adjust our end-of-line # by 1 and keep moving eol -= 1 _encoded.write(results[offset:eol] + EOL) offset = eol if offset < len(results): results = results[-(len(results) - offset):] else: # reset string results = '' # We're done reading our data content.close() if len(results): # We still have content left in our buffer _encoded.write(results + EOL) # Write footer _encoded.write(fmt_yend + EOL) if _encoded: # close article when complete _encoded.close() # Return our encoded object return _encoded
def encode(self, content, mem_buf=DEFAULT_BUFFER_SIZE): """ Encodes an NNTPContent object passed in """ if isinstance(content, NNTPContent): # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content.filename, part=content.part, total_parts=content.total_parts, sort_no=content.sort_no, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) else: # If we reach here, we presume our content is a filename # Create our ascii instance _encoded = NNTPAsciiContent( filepath=content, work_dir=self.work_dir, # We want to ensure we're working with a unique attached file unique=True, ) # Convert our content object into an NNTPContent object content = NNTPContent( filepath=content, work_dir=self.work_dir, ) # yEnc (v1.3) begin fmt_ybegin = '=ybegin part=%d total=%d line=%d size=%d name=%s' % ( content.part, content.total_parts, self.linelen, len(content), content.filename, ) # yEnc part fmt_ypart = '=ypart begin=%d end=%d' % ( content.begin() + 1, content.end(), ) if isinstance(content._parent, NNTPContent): # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s crc32=%s' % ( len(content), content.part, content.crc32(), content._parent.crc32(), ) else: # yEnc end fmt_yend = '=yend size=%d part=%d pcrc32=%s' % ( len(content), content.part, content.crc32(), ) # Write =ybegin line _encoded.write(fmt_ybegin + EOL) # Write =ypart line _encoded.write(fmt_ypart + EOL) if not content.open(): return None # Prepare our result set results = "" # Column is used for decoding column = 0 crc = BIN_MASK # We need to parse the content until we either reach # the end of the file or get to an 'end' tag while True: # Read in our data data = content.stream.read(mem_buf) if not data: # We're done break if FAST_YENC_SUPPORT: try: _results, crc, column = encode_string(data, crc, column) # Append our parsed content onto our ongoing buffer results += _results except YencError as e: logger.error("Failed to encode Yenc for %s." % content) logger.debug('Yenc exception: %s' % (str(e))) return None else: # The slow and painful way, the below looks complicated # but it really isn't at the the end of the day; yEnc is # pretty basic; # - first we translate the all of the characters by adding # 42 to their value with the exception of a few special # characters that are explicitly reserved for the yEnc # language (and conflict with the NNTP Server language). # # - next, we need to apply our ENCODE_SPECIAL_MAP to be # sure to handle the characters that are reserved as # special keywords used by both NNTP Servers and the yEnc # protocol itself. # # - finally we want to prevent our string from going on for # to many characters (horizontally). So we need to split # our content up # idx = 0 while idx < len(data): _byte = (ord(data[idx]) + 42) & 0xff if _byte in YENC_ENCODE_ESCAPED_CHARACTERS: _byte = (_byte + 64) & 0xff # Esape Sequence results += '=' # Store our character results += chr(_byte) # Increment Index idx += 1 # Our offset offset = 0 while offset < (len(results)-self.linelen+1): eol = offset+self.linelen if results[offset:eol][-1] == '=': # Lines can't end with the escape sequence (=). If we get # here then this one did. We just adjust our end-of-line # by 1 and keep moving eol -= 1 _encoded.write(results[offset:eol] + EOL) offset = eol if offset < len(results): results = results[-(len(results) - offset):] else: # reset string results = '' # We're done reading our data content.close() if len(results): # We still have content left in our buffer _encoded.write(results + EOL) # Write footer _encoded.write(fmt_yend + EOL) if _encoded: # close article when complete _encoded.close() # Return our encoded object return _encoded
class CodecAscii(CodecBase): """ This is the codec used to store general content parsed that is not encoded on an NNTP Server. It doesn't do much but store/track ascii data """ def __init__(self, descriptor=None, work_dir=None, *args, **kwargs): super(CodecAscii, self).__init__(descriptor=descriptor, work_dir=work_dir, *args, **kwargs) # Our Ascii Object we can reference while we store our # text content self.decoded = NNTPAsciiContent( filepath='.message', work_dir=self.work_dir, ) def detect(self, line, relative=True): """ A Simple function that can be used to determine if there is ascii on the line. It returns None if there is no ascii characters on the line otherwise it returns an empty dictionary since there are no meta-keys to extract # from a common line """ is_binary = lambda bytes: bool(bytes.translate(None, line)) if is_binary: # We're dealing with binary data return None # We always match this type, but we also always return # an empty dictionary return {} def decode(self, stream): """ Decode body decoding always stops at the end of the line. """ # Read in our data data = stream.readline() if not data: # We're Done; returns the number of bytes decoded return self._decoded # Convert lines to separated by cr & lf decoded = data.rstrip() + EOL # Line Tracking self._lines += 1 # Track the number of bytes decoded self._decoded += len(decoded) # Write data to out stream self.decoded.write(decoded) # Returns the number of bytes decoded return self._decoded def reset(self): """ Reset our decoded content """ super(CodecAscii, self).reset() # Reset our decoded content self.decoded = NNTPAsciiContent( filepath='.message', work_dir=self.work_dir, ) def __str__(self): """ Return a printable version of the codec """ return repr(self) def __repr__(self): """ Return a printable object """ return '<CodecAscii lines_processed=%d />' % ( self._lines, )