def test_set_dict_id(self): samples = [] for i in range(128): samples.append(b'foo' * 64) samples.append(b'foobar' * 64) d = zstd.train_cover_dictionary(8192, samples, k=64, d=16, dict_id=42) self.assertEqual(d.dict_id(), 42)
def test_optimize(self): samples = [] for i in range(128): samples.append(b'foo' * 64) samples.append(b'foobar' * 64) d = zstd.train_cover_dictionary(8192, samples, optimize=True, threads=-1, steps=1, d=16) self.assertEqual(d.k, 50) self.assertEqual(d.d, 16)
def test_basic(self): samples = [] for i in range(128): samples.append(b'foo' * 64) samples.append(b'foobar' * 64) d = zstd.train_cover_dictionary(8192, samples, k=64, d=16) self.assertIsInstance(d.dict_id(), int_type) data = d.as_bytes() self.assertEqual(data[0:4], b'\x37\xa4\x30\xec') self.assertEqual(d.k, 64) self.assertEqual(d.d, 16)
else: training_chunks = chunks cover_args = { 'k': args.cover_k, 'd': args.cover_d, 'optimize': False, # Always use all available threads in optimize mode. 'threads': -1, 'level': opts['level'], } if not args.cover_k and not args.cover_d: cover_args['optimize'] = True cover_dict_data = zstd.train_cover_dictionary(args.dict_size, training_chunks, **cover_args) print('trained cover dictionary of size %d (wanted %d); k=%d; d=%d' % (len(cover_dict_data), args.dict_size, cover_dict_data.k, cover_dict_data.d)) if args.zlib and args.discrete: compressed_discrete_zlib = [] ratios = [] for chunk in chunks: c = zlib.compress(chunk, args.zlib_level) compressed_discrete_zlib.append(c) ratios.append(float(len(c)) / float(len(chunk))) compressed_size = sum(map(len, compressed_discrete_zlib)) ratio = float(compressed_size) / float(orig_size) * 100.0
def test_bad_args(self): with self.assertRaises(TypeError): zstd.train_cover_dictionary(8192, u'foo') with self.assertRaises(ValueError): zstd.train_cover_dictionary(8192, [u'foo'])
def test_no_args(self): with self.assertRaises(TypeError): zstd.train_cover_dictionary()