def __init__(self, filename, mode="r", buffering=None, compresslevel=9): """Open a bzip2-compressed file. If filename is a str or bytes object, is gives the name of the file to be opened. Otherwise, it should be a file object, which will be used to read or write the compressed data. mode can be 'r' for reading (default), 'w' for (over)writing, or 'a' for appending. These can equivalently be given as 'rb', 'wb', and 'ab'. buffering is ignored. Its use is deprecated. If mode is 'w' or 'a', compresslevel can be a number between 1 and 9 specifying the level of compression: 1 produces the least compression, and 9 (default) produces the most compression. If mode is 'r', the input file may be the concatenation of multiple compressed streams. """ # This lock must be recursive, so that BufferedIOBase's # readline(), readlines() and writelines() don't deadlock. self._lock = RLock() self._fp = None self._closefp = False self._mode = _MODE_CLOSED self._pos = 0 self._size = -1 if buffering is not None: warnings.warn("Use of 'buffering' argument is deprecated", DeprecationWarning) if not (1 <= compresslevel <= 9): raise ValueError("compresslevel must be between 1 and 9") if mode in ("", "r", "rb"): mode = "rb" mode_code = _MODE_READ self._decompressor = BZ2Decompressor() self._buffer = None elif mode in ("w", "wb"): mode = "wb" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) elif mode in ("a", "ab"): mode = "ab" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) else: raise ValueError("Invalid mode: {!r}".format(mode)) if isinstance(filename, (str, bytes)): self._fp = builtins.open(filename, mode) self._closefp = True self._mode = mode_code elif hasattr(filename, "read") or hasattr(filename, "write"): self._fp = filename self._mode = mode_code else: raise TypeError("filename must be a str or bytes object, or a file")
def test_EOF_error(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() bz2d.decompress(self.DATA) raises(EOFError, bz2d.decompress, b"foo") raises(EOFError, bz2d.decompress, b"")
def test_decompress(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() raises(TypeError, bz2d.decompress) decompressed_data = bz2d.decompress(self.DATA) assert decompressed_data == self.TEXT
def _rewind(self): self._fp.seek(0, 0) self._mode = _MODE_READ self._pos = 0 self._decompressor = BZ2Decompressor() self._buffer = b"" self._buffer_offset = 0
def _fill_buffer(self): if self._mode == _MODE_READ_EOF: return False # Depending on the input data, our call to the decompressor may not # return any data. In this case, try again after reading another block. while self._buffer_offset == len(self._buffer): rawblock = (self._decompressor.unused_data or self._fp.read(_BUFFER_SIZE)) if not rawblock: if self._decompressor.eof: # End-of-stream marker and end of file. We're good. self._mode = _MODE_READ_EOF self._size = self._pos return False else: # Problem - we were expecting more compressed data. raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") if self._decompressor.eof: # Continue to next stream. self._decompressor = BZ2Decompressor() try: self._buffer = self._decompressor.decompress(rawblock) except OSError: # Trailing data isn't a valid bzip2 stream. We're done here. self._mode = _MODE_READ_EOF self._size = self._pos return False else: self._buffer = self._decompressor.decompress(rawblock) self._buffer_offset = 0 return True
def decompress(data): """Decompress a block of data. For incremental decompression, use a BZ2Decompressor object instead. """ results = [] while 1: if data: decomp = BZ2Decompressor() try: res = decomp.decompress(data) except OSError: if results: break else: raise results.append(res) if not decomp.eof: raise ValueError( 'Compressed data ended before the end-of-stream marker was reached' ) data = decomp.unused_data return ''.join(results)
def _fill_buffer(self): # Depending on the input data, our call to the decompressor may not # return any data. In this case, try again after reading another block. while True: if self._buffer: return True if self._decompressor.unused_data: rawblock = self._decompressor.unused_data else: rawblock = self._fp.read(_BUFFER_SIZE) if not rawblock: if self._decompressor.eof: self._mode = _MODE_READ_EOF self._size = self._pos return False else: raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") # Continue to next stream. if self._decompressor.eof: self._decompressor = BZ2Decompressor() self._buffer = self._decompressor.decompress(rawblock)
def test_decompressor_pickle_error(self): from _bz2 import BZ2Decompressor import pickle exc = raises(TypeError, pickle.dumps, BZ2Decompressor()) assert exc.value.args[ 0] == "cannot serialize '_bz2.BZ2Decompressor' object"
def test_decompress_unused_data(self): # test with unused data. (data after EOF) from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() unused_data = b"this is unused data" decompressed_data = bz2d.decompress(self.DATA + unused_data) assert decompressed_data == self.TEXT assert bz2d.unused_data == unused_data
def test_decompress_chunks_10(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() decompressed_data = b"" n = 0 while True: temp = self.DATA[n * 10:(n + 1) * 10] if not temp: break decompressed_data += bz2d.decompress(temp) n += 1 assert decompressed_data == self.TEXT
def test_decompress_max_length(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() decomp = [] length = len(self.DATA) decomp.append(bz2d.decompress(self.DATA, max_length=100)) assert len(decomp[-1]) == 100 while not bz2d.eof: decomp.append(bz2d.decompress(b"", max_length=200)) assert len(decomp[-1]) <= 200 assert b''.join(decomp) == self.TEXT
def decompress(data): """Decompress a block of data. For incremental decompression, use a BZ2Decompressor object instead. """ if len(data) == 0: return b"" results = [] while True: decomp = BZ2Decompressor() results.append(decomp.decompress(data)) if not decomp.eof: raise ValueError("Compressed data ended before the " "end-of-stream marker was reached") if not decomp.unused_data: return b"".join(results) # There is unused data left over. Proceed to next stream. data = decomp.unused_data
def decompress(data): """Decompress a block of data. For incremental decompression, use a BZ2Decompressor object instead. """ results = [] while data: decomp = BZ2Decompressor() try: res = decomp.decompress(data) except OSError: if results: break # Leftover data is not a valid bzip2 stream; ignore it. else: raise # Error on the first iteration; bail out. results.append(res) if not decomp.eof: raise ValueError("Compressed data ended before the " "end-of-stream marker was reached") data = decomp.unused_data return b"".join(results)
def _fill_buffer(self): if self._buffer: return True if self._decompressor.unused_data: rawblock = self._decompressor.unused_data else: rawblock = self._fp.read(_BUFFER_SIZE) if not rawblock: if self._decompressor.eof: self._mode = _MODE_READ_EOF self._size = self._pos return False else: raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") # Continue to next stream. if self._decompressor.eof: self._decompressor = BZ2Decompressor() self._buffer = self._decompressor.decompress(rawblock) return True
def test_creation(self): from _bz2 import BZ2Decompressor raises(TypeError, BZ2Decompressor, "foo") BZ2Decompressor()
def test_buffer(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() decompressed_data = bz2d.decompress(memoryview(self.DATA)) assert decompressed_data == self.TEXT
def test_attribute(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() assert bz2d.unused_data == b""
def test_subsequent_read(self): from _bz2 import BZ2Decompressor bz2d = BZ2Decompressor() decompressed_data = bz2d.decompress(self.BUGGY_DATA) assert decompressed_data == b'' raises(IOError, bz2d.decompress, self.BUGGY_DATA)