def test_write_content_size(self): no_size = io.BytesIO() cctx = zstd.ZstdCompressor(level=1) with cctx.write_to(no_size) as compressor: self.assertEqual(compressor.write(b'foobar' * 256), 0) with_size = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, write_content_size=True) with cctx.write_to(with_size) as compressor: self.assertEqual(compressor.write(b'foobar' * 256), 0) # Source size is not known in streaming mode, so header not # written. self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) # Declaring size will write the header. with_size = io.BytesIO() with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor: self.assertEqual(compressor.write(b'foobar' * 256), 0) no_params = zstd.get_frame_parameters(no_size.getvalue()) with_params = zstd.get_frame_parameters(with_size.getvalue()) self.assertEqual(no_params.content_size, 0) self.assertEqual(with_params.content_size, 1536) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) self.assertFalse(no_params.has_checksum) self.assertFalse(with_params.has_checksum) self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
def test_multithreaded(self): source = io.BytesIO() source.write(b'a' * 1048576) source.write(b'b' * 1048576) source.write(b'c' * 1048576) source.seek(0) dest = io.BytesIO() cctx = zstd.ZstdCompressor(threads=2) r, w = cctx.copy_stream(source, dest) self.assertEqual(r, 3145728) self.assertEqual(w, 295) params = zstd.get_frame_parameters(dest.getvalue()) self.assertEqual(params.content_size, 0) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) # Writing content size and checksum works. cctx = zstd.ZstdCompressor(threads=2, write_content_size=True, write_checksum=True) dest = io.BytesIO() source.seek(0) cctx.copy_stream(source, dest, size=len(source.getvalue())) params = zstd.get_frame_parameters(dest.getvalue()) self.assertEqual(params.content_size, 3145728) self.assertEqual(params.dict_id, 0) self.assertTrue(params.has_checksum)
def test_no_dict_id(self): samples = [] for i in range(128): samples.append(b'foo' * 64) samples.append(b'bar' * 64) samples.append(b'foobar' * 64) d = zstd.train_dictionary(1024, samples) with_dict_id = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, dict_data=d) with cctx.write_to(with_dict_id) as compressor: self.assertEqual(compressor.write(b'foobarfoobar'), 0) cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) no_dict_id = io.BytesIO() with cctx.write_to(no_dict_id) as compressor: self.assertEqual(compressor.write(b'foobarfoobar'), 0) no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) self.assertEqual(no_params.content_size, 0) self.assertEqual(with_params.content_size, 0) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, d.dict_id()) self.assertFalse(no_params.has_checksum) self.assertFalse(with_params.has_checksum) self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4)
def test_write_checksum(self): cctx = zstd.ZstdCompressor(level=1) no_checksum = cctx.compress(b'foobar') cctx = zstd.ZstdCompressor(level=1, write_checksum=True) with_checksum = cctx.compress(b'foobar') self.assertEqual(len(with_checksum), len(no_checksum) + 4)
def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) no_size = cctx.compress(b'foobar' * 256) cctx = zstd.ZstdCompressor(level=1, write_content_size=True) with_size = cctx.compress(b'foobar' * 256) self.assertEqual(len(with_size), len(no_size) + 1)
def test_write_content_size(self): source = io.BytesIO(b'foobar' * 256) no_size = io.BytesIO() cctx = zstd.ZstdCompressor(level=1) cctx.copy_stream(source, no_size) source.seek(0) with_size = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, write_content_size=True) cctx.copy_stream(source, with_size) # Source content size is unknown, so no content size written. self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) source.seek(0) with_size = io.BytesIO() cctx.copy_stream(source, with_size, size=len(source.getvalue())) # We specified source size, so content size header is present. self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) no_params = zstd.get_frame_parameters(no_size.getvalue()) with_params = zstd.get_frame_parameters(with_size.getvalue()) self.assertEqual(no_params.content_size, 0) self.assertEqual(with_params.content_size, 1536) self.assertEqual(no_params.dict_id, 0) self.assertEqual(with_params.dict_id, 0) self.assertFalse(no_params.has_checksum) self.assertFalse(with_params.has_checksum)
def test_write_checksum(self): cctx = zstd.ZstdCompressor(level=1) cobj = cctx.compressobj() no_checksum = cobj.compress(b'foobar') + cobj.flush() cctx = zstd.ZstdCompressor(level=1, write_checksum=True) cobj = cctx.compressobj() with_checksum = cobj.compress(b'foobar') + cobj.flush() self.assertEqual(len(with_checksum), len(no_checksum) + 4)
def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) cobj = cctx.compressobj(size=len(b'foobar' * 256)) no_size = cobj.compress(b'foobar' * 256) + cobj.flush() cctx = zstd.ZstdCompressor(level=1, write_content_size=True) cobj = cctx.compressobj(size=len(b'foobar' * 256)) with_size = cobj.compress(b'foobar' * 256) + cobj.flush() self.assertEqual(len(with_size), len(no_size) + 1)
def compress(input_data, compressiondict=None): if isinstance(input_data, str): input_data = input_data.encode('utf-8') zstd_compression_level = 22 # Highest (best) compression level is 22 if compressiondict is None: zstandard_compressor = zstd.ZstdCompressor(level=zstd_compression_level) else: zstandard_compressor = zstd.ZstdCompressor(level=zstd_compression_level, dict_data=compressiondict) zstd_compressed_data = zstandard_compressor.compress(input_data) return zstd_compressed_data
def test_write_size_variance(self, original, level, write_size): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) b = io.BytesIO() with cctx.write_to(b, size=len(original), write_size=write_size) as compressor: compressor.write(original) self.assertEqual(b.getvalue(), ref_frame)
def test_write_content_size(self): cctx = zstd.ZstdCompressor(level=1) no_size = cctx.compress(b'foobar' * 256) cctx = zstd.ZstdCompressor(level=1, write_content_size=True) with_size = cctx.compress(b'foobar' * 256) self.assertEqual(len(with_size), len(no_size) + 1) no_params = zstd.get_frame_parameters(no_size) with_params = zstd.get_frame_parameters(with_size) self.assertEqual(no_params.content_size, 0) self.assertEqual(with_params.content_size, 1536)
def test_write_checksum(self): no_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1) with cctx.write_to(no_checksum) as compressor: compressor.write(b'foobar') with_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, write_checksum=True) with cctx.write_to(with_checksum) as compressor: compressor.write(b'foobar') self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
def test_write_checksum(self): cctx = zstd.ZstdCompressor(level=1) no_checksum = cctx.compress(b'foobar') cctx = zstd.ZstdCompressor(level=1, write_checksum=True) with_checksum = cctx.compress(b'foobar') self.assertEqual(len(with_checksum), len(no_checksum) + 4) no_params = zstd.get_frame_parameters(no_checksum) with_params = zstd.get_frame_parameters(with_checksum) self.assertFalse(no_params.has_checksum) self.assertTrue(with_params.has_checksum)
def test_write_checksum(self): source = io.BytesIO(b'foobar') no_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1) cctx.copy_stream(source, no_checksum) source.seek(0) with_checksum = io.BytesIO() cctx = zstd.ZstdCompressor(level=1, write_checksum=True) cctx.copy_stream(source, with_checksum) self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
def test_read_write_size_variance(self, original, level, read_size, write_size): refcctx = zstd.ZstdCompressor(level=level) ref_frame = refcctx.compress(original) source = io.BytesIO(original) cctx = zstd.ZstdCompressor(level=level) chunks = list( cctx.read_from(source, size=len(original), read_size=read_size, write_size=write_size)) self.assertEqual(b''.join(chunks), ref_frame)
def test_data_equivalence(self, original, threads, use_dict): kwargs = {} if use_dict: kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) cctx = zstd.ZstdCompressor(level=1, write_content_size=True, write_checksum=True, **kwargs) frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) dctx = zstd.ZstdDecompressor(**kwargs) result = dctx.multi_decompress_to_buffer(frames_buffer) self.assertEqual(len(result), len(original)) for i, frame in enumerate(result): self.assertEqual(frame.tobytes(), original[i]) frames_list = [f.tobytes() for f in frames_buffer] result = dctx.multi_decompress_to_buffer(frames_list) self.assertEqual(len(result), len(original)) for i, frame in enumerate(result): self.assertEqual(frame.tobytes(), original[i])
def test_read_write_size_variance(self, original, level, read_size, write_size): refctx = zstd.ZstdCompressor(level=level) ref_frame = refctx.compress(original) cctx = zstd.ZstdCompressor(level=level) source = io.BytesIO(original) dest = io.BytesIO() cctx.copy_stream(source, dest, size=len(original), read_size=read_size, write_size=write_size) self.assertEqual(dest.getvalue(), ref_frame)
def test_dictionary(self): samples = [] for i in range(128): samples.append(b'foo' * 64) samples.append(b'bar' * 64) samples.append(b'foobar' * 64) d = zstd.train_dictionary(8192, samples) buffer = io.BytesIO() cctx = zstd.ZstdCompressor(level=9, dict_data=d) with cctx.write_to(buffer) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(compressor.write(b'bar'), 0) self.assertEqual(compressor.write(b'foo' * 16384), 634) compressed = buffer.getvalue() params = zstd.get_frame_parameters(compressed) self.assertEqual(params.content_size, 0) self.assertEqual(params.window_size, 1024) self.assertEqual(params.dict_id, d.dict_id()) self.assertFalse(params.has_checksum) self.assertEqual( compressed[0:32], b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00' b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54' b'\x00\x00\x18\x6f\x6f\x66\x01\x00') h = hashlib.sha1(compressed).hexdigest() self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
def test_no_dict_id(self): samples = [] for i in range(128): samples.append(b'foo' * 64) samples.append(b'bar' * 64) samples.append(b'foobar' * 64) d = zstd.train_dictionary(1024, samples) cctx = zstd.ZstdCompressor(level=1, dict_data=d) with_dict_id = cctx.compress(b'foobarfoobar') cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) no_dict_id = cctx.compress(b'foobarfoobar') self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
def test_large_output(self): source = io.BytesIO() source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) source.write(b'o') source.seek(0) cctx = zstd.ZstdCompressor(level=1) compressed = io.BytesIO(cctx.compress(source.getvalue())) compressed.seek(0) dctx = zstd.ZstdDecompressor() it = dctx.read_from(compressed) chunks = [] chunks.append(next(it)) chunks.append(next(it)) with self.assertRaises(StopIteration): next(it) decompressed = b''.join(chunks) self.assertEqual(decompressed, source.getvalue()) # And again with buffer protocol. it = dctx.read_from(compressed.getvalue()) chunks = [] chunks.append(next(it)) chunks.append(next(it)) with self.assertRaises(StopIteration): next(it) decompressed = b''.join(chunks) self.assertEqual(decompressed, source.getvalue())
def test_content_size_present(self): cctx = zstd.ZstdCompressor(write_content_size=True) compressed = cctx.compress(b'foobar') dctx = zstd.ZstdDecompressor() decompressed = dctx.decompress(compressed) self.assertEqual(decompressed, b'foobar')
def test_read_write_size(self): source = OpCountingBytesIO(b'foobarfoobar') cctx = zstd.ZstdCompressor(level=3) for chunk in cctx.read_from(source, read_size=1, write_size=1): self.assertEqual(len(chunk), 1) self.assertEqual(source._read_count, len(source.getvalue()) + 1)
def test_memory_size(self): cctx = zstd.ZstdCompressor(level=3) buffer = io.BytesIO() with cctx.write_to(buffer) as compressor: size = compressor.memory_size() self.assertGreater(size, 100000)
def test_no_write(self): source = io.BytesIO() dest = object() cctx = zstd.ZstdCompressor() with self.assertRaises(ValueError): cctx.copy_stream(source, dest)
def test_skip_bytes(self): cctx = zstd.ZstdCompressor(write_content_size=False) compressed = cctx.compress(b'foobar') dctx = zstd.ZstdDecompressor() output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3)) self.assertEqual(output, b'foobar')
def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, searchlength, targetlength, strategy): p = zstd.CompressionParameters(windowlog, chainlog, hashlog, searchlog, searchlength, targetlength, strategy) self.assertEqual(tuple(p), (windowlog, chainlog, hashlog, searchlog, searchlength, targetlength, strategy)) # Verify we can instantiate a compressor with the supplied values. # ZSTD_checkCParams moves the goal posts on us from what's advertised # in the constants. So move along with them. if searchlength == zstd.SEARCHLENGTH_MIN and strategy in ( zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): searchlength += 1 p = zstd.CompressionParameters(windowlog, chainlog, hashlog, searchlog, searchlength, targetlength, strategy) elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: searchlength -= 1 p = zstd.CompressionParameters(windowlog, chainlog, hashlog, searchlog, searchlength, targetlength, strategy) cctx = zstd.ZstdCompressor(compression_params=p) with cctx.write_to(io.BytesIO()): pass
def test_buffer_with_segments_collection_input(self): cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) original = [ b'foo1', b'foo2' * 2, b'foo3' * 3, b'foo4' * 4, b'foo5' * 5, ] frames = [cctx.compress(c) for c in original] b = b''.join([original[0], original[1]]) b1 = zstd.BufferWithSegments( b, struct.pack('=QQQQ', 0, len(original[0]), len(original[0]), len(original[1]))) b = b''.join([original[2], original[3], original[4]]) b2 = zstd.BufferWithSegments( b, struct.pack('=QQQQQQ', 0, len(original[2]), len(original[2]), len(original[3]), len(original[2]) + len(original[3]), len(original[4]))) c = zstd.BufferWithSegmentsCollection(b1, b2) result = cctx.multi_compress_to_buffer(c) self.assertEqual(len(result), len(frames)) for i, frame in enumerate(frames): self.assertEqual(result[i].tobytes(), frame)
def write_lns(filename, files): fname_writer = BinaryWriter() fdata_writer = BinaryWriter() for fname, fdata in files.items(): fname_writer.write_uint32(len(fname)) fname_writer.write_string(fname) fname_writer.write_uint32(0) fname_writer.write_uint32(len(fdata)) fname_writer.write_uint32(fdata_writer.size) #offset fdata_writer.write_bytes(fdata) cctx = zstd.ZstdCompressor() compressed = cctx.compress(fdata_writer.get_bytes()) lns_writer = BinaryWriter() lns_writer.write_bytes(b"LZC\0") lns_writer.write_uint32(1) lns_writer.write_uint32(len(files)) lns_writer.write_uint32(0x48 + fname_writer.size) # header size lns_writer.write_uint32(1) lns_writer.write_uint32(1) lns_writer.write_uint32(fdata_writer.size) lns_writer.write_uint32(len(compressed)) lns_writer.write_bytes(bytes(32)) lns_writer.write_uint32(2) lns_writer.write_uint32(fname_writer.size) lns_writer.write_bytes(fname_writer.get_bytes()) lns_writer.write_uint32(1) lns_writer.write_uint32(len(compressed)) lns_writer.write_bytes(compressed) lns_writer.to_file(filename)
def test_no_content_size_in_frame(self): cctx = zstd.ZstdCompressor(write_content_size=False) compressed = cctx.compress(b'foobar') dctx = zstd.ZstdDecompressor() with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): dctx.decompress(compressed)
def zstd_compress(data, **kwargs): kwargs['write_content_size'] = False cctx = zstd.ZstdCompressor(**kwargs) try: return cctx.compress(data, allow_empty=True) except TypeError: # zstandard-0.9 removed allow_empy and made it the default. return cctx.compress(data)