def test_strategy(self): with self.assertRaisesRegex( ValueError, "cannot specify both compression_strategy"): zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) p = zstd.ZstdCompressionParameters(strategy=2) self.assertEqual(p.compression_strategy, 2) p = zstd.ZstdCompressionParameters(strategy=3) self.assertEqual(p.compression_strategy, 3)
def test_ldm_hash_rate_log(self): with self.assertRaisesRegex(ValueError, "cannot specify both ldm_hash_rate_log"): zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) self.assertEqual(p.ldm_hash_every_log, 8) p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16) self.assertEqual(p.ldm_hash_every_log, 16)
def test_overlap_log(self): with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"): zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) p = zstd.ZstdCompressionParameters(overlap_log=2) self.assertEqual(p.overlap_log, 2) self.assertEqual(p.overlap_size_log, 2) p = zstd.ZstdCompressionParameters(overlap_size_log=4) self.assertEqual(p.overlap_log, 4) self.assertEqual(p.overlap_size_log, 4)
def test_compression_params(self): params = zstd.ZstdCompressionParameters( window_log=20, chain_log=6, hash_log=12, min_match=5, search_log=4, target_length=10, strategy=zstd.STRATEGY_FAST, ) buffer = io.BytesIO() cctx = zstd.ZstdCompressor(compression_params=params) with cctx.stream_writer(buffer, closefd=False) as compressor: self.assertEqual(compressor.write(b"foo"), 3) self.assertEqual(compressor.write(b"bar"), 3) self.assertEqual(compressor.write(b"foobar" * 16384), 6 * 16384) compressed = buffer.getvalue() params = zstd.get_frame_parameters(compressed) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1048576) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) h = hashlib.sha1(compressed).hexdigest() self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b")
def test_compression_params(self): params = zstd.ZstdCompressionParameters( window_log=20, chain_log=6, hash_log=12, min_match=5, search_log=4, target_length=10, compression_strategy=zstd.STRATEGY_FAST) buffer = io.BytesIO() cctx = zstd.ZstdCompressor(compression_params=params) with cctx.stream_writer(buffer) as compressor: self.assertEqual(compressor.write(b'foo'), 0) self.assertEqual(compressor.write(b'bar'), 0) self.assertEqual(compressor.write(b'foobar' * 16384), 0) compressed = buffer.getvalue() params = zstd.get_frame_parameters(compressed) self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) self.assertEqual(params.window_size, 1048576) self.assertEqual(params.dict_id, 0) self.assertFalse(params.has_checksum) h = hashlib.sha1(compressed).hexdigest() self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
def test_bounds(self): zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN, chain_log=zstd.CHAINLOG_MIN, hash_log=zstd.HASHLOG_MIN, search_log=zstd.SEARCHLOG_MIN, min_match=zstd.MINMATCH_MIN + 1, target_length=zstd.TARGETLENGTH_MIN, strategy=zstd.STRATEGY_FAST) zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX, chain_log=zstd.CHAINLOG_MAX, hash_log=zstd.HASHLOG_MAX, search_log=zstd.SEARCHLOG_MAX, min_match=zstd.MINMATCH_MAX - 1, target_length=zstd.TARGETLENGTH_MAX, strategy=zstd.STRATEGY_BTULTRA2)
def test_estimated_compression_context_size( self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy, ): if minmatch == zstd.MINMATCH_MIN and strategy in ( zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY, ): minmatch += 1 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST: minmatch -= 1 p = zstd.ZstdCompressionParameters( window_log=windowlog, chain_log=chainlog, hash_log=hashlog, search_log=searchlog, min_match=minmatch, target_length=targetlength, strategy=strategy, ) size = p.estimated_compression_context_size()
def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy): zstd.ZstdCompressionParameters(window_log=windowlog, chain_log=chainlog, hash_log=hashlog, search_log=searchlog, min_match=minmatch, target_length=targetlength, strategy=strategy)
def test_estimated_compression_context_size(self): p = zstd.ZstdCompressionParameters(window_log=20, chain_log=16, hash_log=17, search_log=1, min_match=5, target_length=16, strategy=zstd.STRATEGY_DFAST) # 32-bit has slightly different values from 64-bit. self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, delta=250)
def test_bad_precompute_compress(self): samples = generate_samples() d = zstd.train_dictionary(get_optimal_dict_size_heuristically(samples), samples, k=64, d=8) with self.assertRaisesRegex(ValueError, "must specify one of level or "): d.precompute_compress() with self.assertRaisesRegex(ValueError, "must only specify one of level or "): d.precompute_compress( level=3, compression_params=zstd.ZstdCompressionParameters())
def test_estimated_compression_context_size(self, windowlog, chainlog, hashlog, searchlog, searchlength, targetlength, strategy): if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): searchlength += 1 elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: searchlength -= 1 p = zstd.ZstdCompressionParameters(window_log=windowlog, chain_log=chainlog, hash_log=hashlog, search_log=searchlog, min_match=searchlength, target_length=targetlength, compression_strategy=strategy) size = p.estimated_compression_context_size()
def test_members(self): p = zstd.ZstdCompressionParameters( window_log=10, chain_log=6, hash_log=7, search_log=4, min_match=5, target_length=8, strategy=1, ) self.assertEqual(p.window_log, 10) self.assertEqual(p.chain_log, 6) self.assertEqual(p.hash_log, 7) self.assertEqual(p.search_log, 4) self.assertEqual(p.min_match, 5) self.assertEqual(p.target_length, 8) self.assertEqual(p.strategy, 1) p = zstd.ZstdCompressionParameters(compression_level=2) self.assertEqual(p.compression_level, 2) p = zstd.ZstdCompressionParameters(threads=4) self.assertEqual(p.threads, 4) p = zstd.ZstdCompressionParameters( threads=2, job_size=1048576, overlap_log=6 ) self.assertEqual(p.threads, 2) self.assertEqual(p.job_size, 1048576) self.assertEqual(p.overlap_log, 6) p = zstd.ZstdCompressionParameters(compression_level=-1) self.assertEqual(p.compression_level, -1) p = zstd.ZstdCompressionParameters(compression_level=-2) self.assertEqual(p.compression_level, -2) p = zstd.ZstdCompressionParameters(force_max_window=True) self.assertEqual(p.force_max_window, 1) p = zstd.ZstdCompressionParameters(enable_ldm=True) self.assertEqual(p.enable_ldm, 1) p = zstd.ZstdCompressionParameters(ldm_hash_log=7) self.assertEqual(p.ldm_hash_log, 7) p = zstd.ZstdCompressionParameters(ldm_min_match=6) self.assertEqual(p.ldm_min_match, 6) p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) self.assertEqual(p.ldm_bucket_size_log, 7) p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) self.assertEqual(p.ldm_hash_rate_log, 8)
def test_overlap_log(self): p = zstd.ZstdCompressionParameters(overlap_log=2) self.assertEqual(p.overlap_log, 2)
def test_ldm_hash_rate_log(self): p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) self.assertEqual(p.ldm_hash_rate_log, 8)
def test_strategy(self): p = zstd.ZstdCompressionParameters(strategy=2) self.assertEqual(p.strategy, 2) p = zstd.ZstdCompressionParameters(strategy=3) self.assertEqual(p.strategy, 3)
dctx = zstd.ZstdDecompressor() uncompressed_file_data = dctx.decompress( compressed_file_data) # uncompressed_file_data is now a bytes string. # Printing the type will show "<class 'bytes'>" print("Object type is {}".format(type(uncompressed_file_data))) # To convert to a UTF-8 string, use the decode method uncompressed_file_data = uncompressed_file_data.decode() # Printing the type will now show "<class 'str'>" print("Object type is {}".format(type(uncompressed_file_data))) print("\nTesting Zstandard Compression Levels 0-22\n") for x in range(0, 23): params = zstd.ZstdCompressionParameters( compression_level=x) # Set compression parameters here cctx = zstd.ZstdCompressor( compression_params=params ) # Set compression_params to the parameters defined previously time_start = time.time() # Start time before compression begins compressed = cctx.compress( uncompressed_file_data.encode() ) # When compressing a string, the string needs to be a bytes object. encode() converts to bytes. time_end = time.time() - time_start # Total time taken to compress uncompressed_length = len( uncompressed_file_data) # Calculate uncompressed length compressed_length = len(compressed) # Calculate the compressed length print( "Compression Level: {} - Uncompressed: {} - Compressed: {} - Ratio: {}% - Compression Time: {}ms" .format(