def __flush(self, force=False): if not force and self.__frlen < 262144: return assert sum(len(_[0]) for _ in self.__frame) == self.__frlen file_off = self.__file_off ctx = lz4frame.create_compression_context() self.__file_write(lz4frame.compress_begin(ctx, block_size=lz4frame.BLOCKSIZE_MAX4MB, # makes no harm for larger blobs block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=5, content_checksum=lz4frame.CONTENTCHECKSUM_ENABLED, # sorry, no per-block checksums yet auto_flush=False, source_size=self.__frlen)) for blob, meta in self.__frame: self.__file_write(lz4frame.compress_update(ctx, blob)) self.__file_write(lz4frame.compress_end(ctx)) json.dump({'type': 'frame', 'file_off': file_off, 'file_size': self.__file_off - file_off, 'text_off': self.__text_off - self.__frlen, 'text_size': self.__frlen}, self.__meta, sort_keys=True) self.__meta.write('\n') for blob, meta in self.__frame: if meta is not None: json.dump(meta, self.__meta, sort_keys=True) self.__meta.write('\n') json.dump({'type': '/frame'}, self.__meta) self.__meta.write('\n') self.__frame = [] self.__frlen = 0
def test_compress_huge_without_size(self): context = lz4frame.create_compression_context() input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 4096 chunk_size = int((len(input_data) / 2) + 1) compressed = lz4frame.compress_begin(context) compressed += lz4frame.compress_update(context, input_data[:chunk_size]) compressed += lz4frame.compress_update(context, input_data[chunk_size:]) compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def test_compress_begin_update_end_no_auto_flush(self): context = lz4frame.create_compression_context() self.assertNotEqual(context, None) input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" chunk_size = int((len(input_data) / 2) + 1) compressed = lz4frame.compress_begin(context, auto_flush=0) compressed += lz4frame.compress_update(context, input_data[:chunk_size]) compressed += lz4frame.compress_update(context, input_data[chunk_size:]) compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def test_roundtrip_multiframe_2(data): nframes = 4 compressed = b'' ctx = lz4frame.create_compression_context() for _ in range(nframes): compressed += lz4frame.compress_begin(ctx) compressed += lz4frame.compress_chunk(ctx, data) compressed += lz4frame.compress_flush(ctx) decompressed = b'' for _ in range(nframes): decompressed += lz4frame.decompress(compressed) assert len(decompressed) == nframes * len(data) assert data * nframes == decompressed
def test_decompress_truncated(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" for chksum in (lz4frame.CONTENTCHECKSUM_DISABLED, lz4frame.CONTENTCHECKSUM_ENABLED): for conlen in (0, len(input_data)): context = lz4frame.create_compression_context() compressed = lz4frame.compress_begin(context, content_checksum=chksum, source_size=conlen) compressed += lz4frame.compress_update(context, input_data) compressed += lz4frame.compress_end(context) for i in range(len(compressed)): with self.assertRaisesRegexp( RuntimeError, r'^(LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete|LZ4F_freeDecompressionContext reported unclean decompressor state \(truncated frame\?\): \d+)$' ): lz4frame.decompress(compressed[:i])
def test_compress_begin_update_end_no_auto_flush_2(self): input_data = os.urandom(4 * 128 * 1024) # Read 4 * 128kb context = lz4frame.create_compression_context() self.assertNotEqual(context, None) compressed = lz4frame.compress_begin(context, auto_flush=0) chunk_size = 32 * 1024 # 32 kb, half of default block size start = 0 end = start + chunk_size while start <= len(input_data): compressed += lz4frame.compress_update(context, input_data[start:end]) start = end end = start + chunk_size compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def __flush(self, force=False): if not force and self.__frlen < 262144: return assert sum(len(_[0]) for _ in self.__frame) == self.__frlen file_off = self.__file_off ctx = lz4frame.create_compression_context() self.__file_write( lz4frame.compress_begin( ctx, block_size=lz4frame. BLOCKSIZE_MAX4MB, # makes no harm for larger blobs block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=5, content_checksum=lz4frame.CONTENTCHECKSUM_ENABLED, # sorry, no per-block checksums yet auto_flush=False, source_size=self.__frlen, )) for blob, meta in self.__frame: self.__file_write(lz4frame.compress_update(ctx, blob)) self.__file_write(lz4frame.compress_end(ctx)) json.dump( { "type": "frame", "file_off": file_off, "file_size": self.__file_off - file_off, "text_off": self.__text_off - self.__frlen, "text_size": self.__frlen, }, self.__meta, sort_keys=True, ) self.__meta.write("\n") for blob, meta in self.__frame: if meta is not None: json.dump(meta, self.__meta, sort_keys=True) self.__meta.write("\n") json.dump({"type": "/frame"}, self.__meta) self.__meta.write("\n") self.__frame = [] self.__frlen = 0
def test_roundtrip_2(data, block_size, block_linked, content_checksum, block_checksum, compression_level, auto_flush, store_size): c_context = lz4frame.create_compression_context() kwargs = {} kwargs['compression_level'] = compression_level kwargs['block_size'] = block_size kwargs['block_linked'] = block_linked kwargs['content_checksum'] = content_checksum kwargs['block_checksum'] = block_checksum kwargs['auto_flush'] = auto_flush if store_size is True: kwargs['source_size'] = len(data) compressed = lz4frame.compress_begin( c_context, **kwargs ) compressed += lz4frame.compress_chunk( c_context, data ) compressed += lz4frame.compress_flush(c_context) get_frame_info_check( compressed, len(data), store_size, block_size, block_linked, content_checksum, block_checksum, ) decompressed, bytes_read = lz4frame.decompress( compressed, return_bytes_read=True) assert bytes_read == len(compressed) assert decompressed == data
def test_roundtrip_multiframe_3(data): nframes = 4 compressed = b'' ctx = lz4frame.create_compression_context() for _ in range(nframes): compressed += lz4frame.compress_begin(ctx) compressed += lz4frame.compress_chunk(ctx, data) compressed += lz4frame.compress_flush(ctx) decompressed = b'' ctx = lz4frame.create_decompression_context() for _ in range(nframes): d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed) decompressed += d assert eof is True assert bytes_read == len(compressed) // nframes assert len(decompressed) == nframes * len(data) assert data * nframes == decompressed
def roundtrip(x): context = lz4frame.create_compression_context() self.assertNotEqual(context, None) compressed = lz4frame.compress_begin( context, block_size=lz4frame.BLOCKSIZE_MAX256KB, block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=lz4frame.COMPRESSIONLEVEL_MAX, auto_flush=0) chunk_size = 128 * 1024 # 128 kb, half of block size start = 0 end = start + chunk_size while start <= len(x): compressed += lz4frame.compress_update(context, x[start:end]) start = end end = start + chunk_size compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) return decompressed
def test_compress_begin_update_end_no_auto_flush_not_defaults(self): input_data = os.urandom(10 * 128 * 1024) # Read 10 * 128kb context = lz4frame.create_compression_context() self.assertNotEqual(context, None) compressed = lz4frame.compress_begin( context, block_size=lz4frame.BLOCKSIZE_MAX256KB, block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=lz4frame.COMPRESSIONLEVEL_MAX, auto_flush=0) chunk_size = 128 * 1024 # 128 kb, half of block size start = 0 end = start + chunk_size while start <= len(input_data): compressed += lz4frame.compress_update(context, input_data[start:end]) start = end end = start + chunk_size compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def test_create_and_free_compression_context(self): context = lz4frame.create_compression_context() self.assertNotEqual(context, None)
import lz4.frame as lz4frame context = lz4frame.create_compression_context() input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 4096 * 1000 chunk_size = int((len(input_data) / 2) + 1) compressed = lz4frame.compress_begin(context, source_size=len(input_data)) compressed += lz4frame.compress_update(context, input_data[:chunk_size]) compressed += lz4frame.compress_update(context, input_data[chunk_size:]) compressed += lz4frame.compress_end(context) with open('t.lz4', 'w') as file: file.write(compressed)
def test_create_compression_context(): context = lz4frame.create_compression_context() assert context is not None
def test_roundtrip_chunked(data, block_size, block_linked, content_checksum, block_checksum, compression_level, auto_flush, store_size): data, c_chunks, d_chunks = data c_context = lz4frame.create_compression_context() kwargs = {} kwargs['compression_level'] = compression_level kwargs['block_size'] = block_size kwargs['block_linked'] = block_linked kwargs['content_checksum'] = content_checksum kwargs['block_checksum'] = block_checksum kwargs['auto_flush'] = auto_flush if store_size is True: kwargs['source_size'] = len(data) compressed = lz4frame.compress_begin( c_context, **kwargs ) data_in = get_chunked(data, c_chunks) try: while True: compressed += lz4frame.compress_chunk( c_context, next(data_in) ) except StopIteration: pass finally: del data_in compressed += lz4frame.compress_flush(c_context) get_frame_info_check( compressed, len(data), store_size, block_size, block_linked, content_checksum, block_checksum, ) d_context = lz4frame.create_decompression_context() compressed_in = get_chunked(compressed, d_chunks) decompressed = b'' bytes_read = 0 eofs = [] try: while True: d, b, e = lz4frame.decompress_chunk( d_context, next(compressed_in), ) decompressed += d bytes_read += b eofs.append(e) except StopIteration: pass finally: del compressed_in assert bytes_read == len(compressed) assert decompressed == data assert eofs[-1] is True assert (True in eofs[:-2]) is False
def test_roundtrip_chunked(data, block_size, block_linked, content_checksum, block_checksum, compression_level, auto_flush, store_size): data, c_chunks, d_chunks = data c_context = lz4frame.create_compression_context() kwargs = {} kwargs['compression_level'] = compression_level kwargs['block_size'] = block_size kwargs['block_linked'] = block_linked kwargs['content_checksum'] = content_checksum kwargs['block_checksum'] = block_checksum kwargs['auto_flush'] = auto_flush if store_size is True: kwargs['source_size'] = len(data) compressed = lz4frame.compress_begin(c_context, **kwargs) data_in = get_chunked(data, c_chunks) try: while True: compressed += lz4frame.compress_chunk(c_context, next(data_in)) except StopIteration: pass finally: del data_in compressed += lz4frame.compress_flush(c_context) get_frame_info_check( compressed, len(data), store_size, block_size, block_linked, content_checksum, block_checksum, ) d_context = lz4frame.create_decompression_context() compressed_in = get_chunked(compressed, d_chunks) decompressed = b'' bytes_read = 0 eofs = [] try: while True: d, b, e = lz4frame.decompress_chunk( d_context, next(compressed_in), ) decompressed += d bytes_read += b eofs.append(e) except StopIteration: pass finally: del compressed_in assert bytes_read == len(compressed) assert decompressed == data assert eofs[-1] is True assert (True in eofs[:-2]) is False