def __flush(self, force=False): if not force and self.__frlen < 262144: return assert sum(len(_[0]) for _ in self.__frame) == self.__frlen file_off = self.__file_off ctx = lz4frame.create_compression_context() self.__file_write(lz4frame.compress_begin(ctx, block_size=lz4frame.BLOCKSIZE_MAX4MB, # makes no harm for larger blobs block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=5, content_checksum=lz4frame.CONTENTCHECKSUM_ENABLED, # sorry, no per-block checksums yet auto_flush=False, source_size=self.__frlen)) for blob, meta in self.__frame: self.__file_write(lz4frame.compress_update(ctx, blob)) self.__file_write(lz4frame.compress_end(ctx)) json.dump({'type': 'frame', 'file_off': file_off, 'file_size': self.__file_off - file_off, 'text_off': self.__text_off - self.__frlen, 'text_size': self.__frlen}, self.__meta, sort_keys=True) self.__meta.write('\n') for blob, meta in self.__frame: if meta is not None: json.dump(meta, self.__meta, sort_keys=True) self.__meta.write('\n') json.dump({'type': '/frame'}, self.__meta) self.__meta.write('\n') self.__frame = [] self.__frlen = 0
def test_compress_huge_without_size(self): context = lz4frame.create_compression_context() input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 4096 chunk_size = int((len(input_data) / 2) + 1) compressed = lz4frame.compress_begin(context) compressed += lz4frame.compress_update(context, input_data[:chunk_size]) compressed += lz4frame.compress_update(context, input_data[chunk_size:]) compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def test_compress_begin_update_end_no_auto_flush(self): context = lz4frame.create_compression_context() self.assertNotEqual(context, None) input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" chunk_size = int((len(input_data) / 2) + 1) compressed = lz4frame.compress_begin(context, auto_flush=0) compressed += lz4frame.compress_update(context, input_data[:chunk_size]) compressed += lz4frame.compress_update(context, input_data[chunk_size:]) compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def test_decompress_truncated(self): input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" for chksum in (lz4frame.CONTENTCHECKSUM_DISABLED, lz4frame.CONTENTCHECKSUM_ENABLED): for conlen in (0, len(input_data)): context = lz4frame.create_compression_context() compressed = lz4frame.compress_begin(context, content_checksum=chksum, source_size=conlen) compressed += lz4frame.compress_update(context, input_data) compressed += lz4frame.compress_end(context) for i in range(len(compressed)): with self.assertRaisesRegexp( RuntimeError, r'^(LZ4F_getFrameInfo failed with code: ERROR_frameHeader_incomplete|LZ4F_freeDecompressionContext reported unclean decompressor state \(truncated frame\?\): \d+)$' ): lz4frame.decompress(compressed[:i])
def test_compress_begin_update_end_no_auto_flush_2(self): input_data = os.urandom(4 * 128 * 1024) # Read 4 * 128kb context = lz4frame.create_compression_context() self.assertNotEqual(context, None) compressed = lz4frame.compress_begin(context, auto_flush=0) chunk_size = 32 * 1024 # 32 kb, half of default block size start = 0 end = start + chunk_size while start <= len(input_data): compressed += lz4frame.compress_update(context, input_data[start:end]) start = end end = start + chunk_size compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
def __flush(self, force=False): if not force and self.__frlen < 262144: return assert sum(len(_[0]) for _ in self.__frame) == self.__frlen file_off = self.__file_off ctx = lz4frame.create_compression_context() self.__file_write( lz4frame.compress_begin( ctx, block_size=lz4frame. BLOCKSIZE_MAX4MB, # makes no harm for larger blobs block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=5, content_checksum=lz4frame.CONTENTCHECKSUM_ENABLED, # sorry, no per-block checksums yet auto_flush=False, source_size=self.__frlen, )) for blob, meta in self.__frame: self.__file_write(lz4frame.compress_update(ctx, blob)) self.__file_write(lz4frame.compress_end(ctx)) json.dump( { "type": "frame", "file_off": file_off, "file_size": self.__file_off - file_off, "text_off": self.__text_off - self.__frlen, "text_size": self.__frlen, }, self.__meta, sort_keys=True, ) self.__meta.write("\n") for blob, meta in self.__frame: if meta is not None: json.dump(meta, self.__meta, sort_keys=True) self.__meta.write("\n") json.dump({"type": "/frame"}, self.__meta) self.__meta.write("\n") self.__frame = [] self.__frlen = 0
def roundtrip(x): context = lz4frame.create_compression_context() self.assertNotEqual(context, None) compressed = lz4frame.compress_begin( context, block_size=lz4frame.BLOCKSIZE_MAX256KB, block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=lz4frame.COMPRESSIONLEVEL_MAX, auto_flush=0) chunk_size = 128 * 1024 # 128 kb, half of block size start = 0 end = start + chunk_size while start <= len(x): compressed += lz4frame.compress_update(context, x[start:end]) start = end end = start + chunk_size compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) return decompressed
def test_compress_begin_update_end_no_auto_flush_not_defaults(self): input_data = os.urandom(10 * 128 * 1024) # Read 10 * 128kb context = lz4frame.create_compression_context() self.assertNotEqual(context, None) compressed = lz4frame.compress_begin( context, block_size=lz4frame.BLOCKSIZE_MAX256KB, block_mode=lz4frame.BLOCKMODE_LINKED, compression_level=lz4frame.COMPRESSIONLEVEL_MAX, auto_flush=0) chunk_size = 128 * 1024 # 128 kb, half of block size start = 0 end = start + chunk_size while start <= len(input_data): compressed += lz4frame.compress_update(context, input_data[start:end]) start = end end = start + chunk_size compressed += lz4frame.compress_end(context) decompressed = lz4frame.decompress(compressed) self.assertEqual(input_data, decompressed)
import lz4.frame as lz4frame context = lz4frame.create_compression_context() input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123" * 4096 * 1000 chunk_size = int((len(input_data) / 2) + 1) compressed = lz4frame.compress_begin(context, source_size=len(input_data)) compressed += lz4frame.compress_update(context, input_data[:chunk_size]) compressed += lz4frame.compress_update(context, input_data[chunk_size:]) compressed += lz4frame.compress_end(context) with open('t.lz4', 'w') as file: file.write(compressed)