Beispiel #1
0
    def test_write_content_size(self):
        no_size = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1)
        with cctx.write_to(no_size) as compressor:
            self.assertEqual(compressor.write(b'foobar' * 256), 0)

        with_size = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
        with cctx.write_to(with_size) as compressor:
            self.assertEqual(compressor.write(b'foobar' * 256), 0)

        # Source size is not known in streaming mode, so header not
        # written.
        self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))

        # Declaring size will write the header.
        with_size = io.BytesIO()
        with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor:
            self.assertEqual(compressor.write(b'foobar' * 256), 0)

        no_params = zstd.get_frame_parameters(no_size.getvalue())
        with_params = zstd.get_frame_parameters(with_size.getvalue())
        self.assertEqual(no_params.content_size, 0)
        self.assertEqual(with_params.content_size, 1536)
        self.assertEqual(no_params.dict_id, 0)
        self.assertEqual(with_params.dict_id, 0)
        self.assertFalse(no_params.has_checksum)
        self.assertFalse(with_params.has_checksum)

        self.assertEqual(len(with_size.getvalue()),
                         len(no_size.getvalue()) + 1)
Beispiel #2
0
    def test_multithreaded(self):
        source = io.BytesIO()
        source.write(b'a' * 1048576)
        source.write(b'b' * 1048576)
        source.write(b'c' * 1048576)
        source.seek(0)

        dest = io.BytesIO()
        cctx = zstd.ZstdCompressor(threads=2)
        r, w = cctx.copy_stream(source, dest)
        self.assertEqual(r, 3145728)
        self.assertEqual(w, 295)

        params = zstd.get_frame_parameters(dest.getvalue())
        self.assertEqual(params.content_size, 0)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Writing content size and checksum works.
        cctx = zstd.ZstdCompressor(threads=2,
                                   write_content_size=True,
                                   write_checksum=True)
        dest = io.BytesIO()
        source.seek(0)
        cctx.copy_stream(source, dest, size=len(source.getvalue()))

        params = zstd.get_frame_parameters(dest.getvalue())
        self.assertEqual(params.content_size, 3145728)
        self.assertEqual(params.dict_id, 0)
        self.assertTrue(params.has_checksum)
Beispiel #3
0
    def test_no_dict_id(self):
        samples = []
        for i in range(128):
            samples.append(b'foo' * 64)
            samples.append(b'bar' * 64)
            samples.append(b'foobar' * 64)

        d = zstd.train_dictionary(1024, samples)

        with_dict_id = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, dict_data=d)
        with cctx.write_to(with_dict_id) as compressor:
            self.assertEqual(compressor.write(b'foobarfoobar'), 0)

        cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
        no_dict_id = io.BytesIO()
        with cctx.write_to(no_dict_id) as compressor:
            self.assertEqual(compressor.write(b'foobarfoobar'), 0)

        no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
        with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
        self.assertEqual(no_params.content_size, 0)
        self.assertEqual(with_params.content_size, 0)
        self.assertEqual(no_params.dict_id, 0)
        self.assertEqual(with_params.dict_id, d.dict_id())
        self.assertFalse(no_params.has_checksum)
        self.assertFalse(with_params.has_checksum)

        self.assertEqual(len(with_dict_id.getvalue()),
                         len(no_dict_id.getvalue()) + 4)
Beispiel #4
0
    def test_write_checksum(self):
        cctx = zstd.ZstdCompressor(level=1)
        no_checksum = cctx.compress(b'foobar')
        cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
        with_checksum = cctx.compress(b'foobar')

        self.assertEqual(len(with_checksum), len(no_checksum) + 4)
Beispiel #5
0
    def test_write_content_size(self):
        cctx = zstd.ZstdCompressor(level=1)
        no_size = cctx.compress(b'foobar' * 256)
        cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
        with_size = cctx.compress(b'foobar' * 256)

        self.assertEqual(len(with_size), len(no_size) + 1)
Beispiel #6
0
    def test_write_content_size(self):
        source = io.BytesIO(b'foobar' * 256)
        no_size = io.BytesIO()

        cctx = zstd.ZstdCompressor(level=1)
        cctx.copy_stream(source, no_size)

        source.seek(0)
        with_size = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
        cctx.copy_stream(source, with_size)

        # Source content size is unknown, so no content size written.
        self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))

        source.seek(0)
        with_size = io.BytesIO()
        cctx.copy_stream(source, with_size, size=len(source.getvalue()))

        # We specified source size, so content size header is present.
        self.assertEqual(len(with_size.getvalue()),
                         len(no_size.getvalue()) + 1)

        no_params = zstd.get_frame_parameters(no_size.getvalue())
        with_params = zstd.get_frame_parameters(with_size.getvalue())
        self.assertEqual(no_params.content_size, 0)
        self.assertEqual(with_params.content_size, 1536)
        self.assertEqual(no_params.dict_id, 0)
        self.assertEqual(with_params.dict_id, 0)
        self.assertFalse(no_params.has_checksum)
        self.assertFalse(with_params.has_checksum)
Beispiel #7
0
    def test_write_checksum(self):
        cctx = zstd.ZstdCompressor(level=1)
        cobj = cctx.compressobj()
        no_checksum = cobj.compress(b'foobar') + cobj.flush()
        cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
        cobj = cctx.compressobj()
        with_checksum = cobj.compress(b'foobar') + cobj.flush()

        self.assertEqual(len(with_checksum), len(no_checksum) + 4)
Beispiel #8
0
    def test_write_content_size(self):
        cctx = zstd.ZstdCompressor(level=1)
        cobj = cctx.compressobj(size=len(b'foobar' * 256))
        no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
        cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
        cobj = cctx.compressobj(size=len(b'foobar' * 256))
        with_size = cobj.compress(b'foobar' * 256) + cobj.flush()

        self.assertEqual(len(with_size), len(no_size) + 1)
def compress(input_data, compressiondict=None):
    if isinstance(input_data, str):
        input_data = input_data.encode('utf-8')

    zstd_compression_level = 22  # Highest (best) compression level is 22
    if compressiondict is None:
        zstandard_compressor = zstd.ZstdCompressor(level=zstd_compression_level)
    else:
        zstandard_compressor = zstd.ZstdCompressor(level=zstd_compression_level, dict_data=compressiondict)
    zstd_compressed_data = zstandard_compressor.compress(input_data)
    return zstd_compressed_data
Beispiel #10
0
    def test_write_size_variance(self, original, level, write_size):
        refctx = zstd.ZstdCompressor(level=level)
        ref_frame = refctx.compress(original)

        cctx = zstd.ZstdCompressor(level=level)
        b = io.BytesIO()
        with cctx.write_to(b, size=len(original),
                           write_size=write_size) as compressor:
            compressor.write(original)

        self.assertEqual(b.getvalue(), ref_frame)
Beispiel #11
0
    def test_write_content_size(self):
        cctx = zstd.ZstdCompressor(level=1)
        no_size = cctx.compress(b'foobar' * 256)
        cctx = zstd.ZstdCompressor(level=1, write_content_size=True)
        with_size = cctx.compress(b'foobar' * 256)

        self.assertEqual(len(with_size), len(no_size) + 1)

        no_params = zstd.get_frame_parameters(no_size)
        with_params = zstd.get_frame_parameters(with_size)
        self.assertEqual(no_params.content_size, 0)
        self.assertEqual(with_params.content_size, 1536)
Beispiel #12
0
    def test_write_checksum(self):
        no_checksum = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1)
        with cctx.write_to(no_checksum) as compressor:
            compressor.write(b'foobar')

        with_checksum = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
        with cctx.write_to(with_checksum) as compressor:
            compressor.write(b'foobar')

        self.assertEqual(len(with_checksum.getvalue()),
                         len(no_checksum.getvalue()) + 4)
Beispiel #13
0
    def test_write_checksum(self):
        cctx = zstd.ZstdCompressor(level=1)
        no_checksum = cctx.compress(b'foobar')
        cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
        with_checksum = cctx.compress(b'foobar')

        self.assertEqual(len(with_checksum), len(no_checksum) + 4)

        no_params = zstd.get_frame_parameters(no_checksum)
        with_params = zstd.get_frame_parameters(with_checksum)

        self.assertFalse(no_params.has_checksum)
        self.assertTrue(with_params.has_checksum)
Beispiel #14
0
    def test_write_checksum(self):
        source = io.BytesIO(b'foobar')
        no_checksum = io.BytesIO()

        cctx = zstd.ZstdCompressor(level=1)
        cctx.copy_stream(source, no_checksum)

        source.seek(0)
        with_checksum = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
        cctx.copy_stream(source, with_checksum)

        self.assertEqual(len(with_checksum.getvalue()),
                         len(no_checksum.getvalue()) + 4)
Beispiel #15
0
    def test_read_write_size_variance(self, original, level, read_size,
                                      write_size):
        refcctx = zstd.ZstdCompressor(level=level)
        ref_frame = refcctx.compress(original)

        source = io.BytesIO(original)

        cctx = zstd.ZstdCompressor(level=level)
        chunks = list(
            cctx.read_from(source,
                           size=len(original),
                           read_size=read_size,
                           write_size=write_size))

        self.assertEqual(b''.join(chunks), ref_frame)
Beispiel #16
0
    def test_data_equivalence(self, original, threads, use_dict):
        kwargs = {}
        if use_dict:
            kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])

        cctx = zstd.ZstdCompressor(level=1,
                                   write_content_size=True,
                                   write_checksum=True,
                                   **kwargs)

        frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)

        dctx = zstd.ZstdDecompressor(**kwargs)

        result = dctx.multi_decompress_to_buffer(frames_buffer)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])

        frames_list = [f.tobytes() for f in frames_buffer]
        result = dctx.multi_decompress_to_buffer(frames_list)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])
Beispiel #17
0
    def test_read_write_size_variance(self, original, level, read_size,
                                      write_size):
        refctx = zstd.ZstdCompressor(level=level)
        ref_frame = refctx.compress(original)

        cctx = zstd.ZstdCompressor(level=level)
        source = io.BytesIO(original)
        dest = io.BytesIO()

        cctx.copy_stream(source,
                         dest,
                         size=len(original),
                         read_size=read_size,
                         write_size=write_size)

        self.assertEqual(dest.getvalue(), ref_frame)
Beispiel #18
0
    def test_dictionary(self):
        samples = []
        for i in range(128):
            samples.append(b'foo' * 64)
            samples.append(b'bar' * 64)
            samples.append(b'foobar' * 64)

        d = zstd.train_dictionary(8192, samples)

        buffer = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=9, dict_data=d)
        with cctx.write_to(buffer) as compressor:
            self.assertEqual(compressor.write(b'foo'), 0)
            self.assertEqual(compressor.write(b'bar'), 0)
            self.assertEqual(compressor.write(b'foo' * 16384), 634)

        compressed = buffer.getvalue()

        params = zstd.get_frame_parameters(compressed)
        self.assertEqual(params.content_size, 0)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, d.dict_id())
        self.assertFalse(params.has_checksum)

        self.assertEqual(
            compressed[0:32],
            b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00'
            b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54'
            b'\x00\x00\x18\x6f\x6f\x66\x01\x00')

        h = hashlib.sha1(compressed).hexdigest()
        self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92')
Beispiel #19
0
    def test_no_dict_id(self):
        samples = []
        for i in range(128):
            samples.append(b'foo' * 64)
            samples.append(b'bar' * 64)
            samples.append(b'foobar' * 64)

        d = zstd.train_dictionary(1024, samples)

        cctx = zstd.ZstdCompressor(level=1, dict_data=d)
        with_dict_id = cctx.compress(b'foobarfoobar')

        cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
        no_dict_id = cctx.compress(b'foobarfoobar')

        self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
Beispiel #20
0
    def test_large_output(self):
        source = io.BytesIO()
        source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
        source.write(b'o')
        source.seek(0)

        cctx = zstd.ZstdCompressor(level=1)
        compressed = io.BytesIO(cctx.compress(source.getvalue()))
        compressed.seek(0)

        dctx = zstd.ZstdDecompressor()
        it = dctx.read_from(compressed)

        chunks = []
        chunks.append(next(it))
        chunks.append(next(it))

        with self.assertRaises(StopIteration):
            next(it)

        decompressed = b''.join(chunks)
        self.assertEqual(decompressed, source.getvalue())

        # And again with buffer protocol.
        it = dctx.read_from(compressed.getvalue())
        chunks = []
        chunks.append(next(it))
        chunks.append(next(it))

        with self.assertRaises(StopIteration):
            next(it)

        decompressed = b''.join(chunks)
        self.assertEqual(decompressed, source.getvalue())
Beispiel #21
0
    def test_content_size_present(self):
        cctx = zstd.ZstdCompressor(write_content_size=True)
        compressed = cctx.compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        decompressed = dctx.decompress(compressed)
        self.assertEqual(decompressed, b'foobar')
Beispiel #22
0
    def test_read_write_size(self):
        source = OpCountingBytesIO(b'foobarfoobar')
        cctx = zstd.ZstdCompressor(level=3)
        for chunk in cctx.read_from(source, read_size=1, write_size=1):
            self.assertEqual(len(chunk), 1)

        self.assertEqual(source._read_count, len(source.getvalue()) + 1)
Beispiel #23
0
    def test_memory_size(self):
        cctx = zstd.ZstdCompressor(level=3)
        buffer = io.BytesIO()
        with cctx.write_to(buffer) as compressor:
            size = compressor.memory_size()

        self.assertGreater(size, 100000)
Beispiel #24
0
    def test_no_write(self):
        source = io.BytesIO()
        dest = object()

        cctx = zstd.ZstdCompressor()
        with self.assertRaises(ValueError):
            cctx.copy_stream(source, dest)
Beispiel #25
0
    def test_skip_bytes(self):
        cctx = zstd.ZstdCompressor(write_content_size=False)
        compressed = cctx.compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
        self.assertEqual(output, b'foobar')
Beispiel #26
0
        def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
                            searchlength, targetlength, strategy):
            p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
                                           searchlog, searchlength,
                                           targetlength, strategy)
            self.assertEqual(tuple(p),
                             (windowlog, chainlog, hashlog, searchlog,
                              searchlength, targetlength, strategy))

            # Verify we can instantiate a compressor with the supplied values.
            # ZSTD_checkCParams moves the goal posts on us from what's advertised
            # in the constants. So move along with them.
            if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (
                    zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
                searchlength += 1
                p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
                                               searchlog, searchlength,
                                               targetlength, strategy)
            elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST:
                searchlength -= 1
                p = zstd.CompressionParameters(windowlog, chainlog, hashlog,
                                               searchlog, searchlength,
                                               targetlength, strategy)

            cctx = zstd.ZstdCompressor(compression_params=p)
            with cctx.write_to(io.BytesIO()):
                pass
Beispiel #27
0
    def test_buffer_with_segments_collection_input(self):
        cctx = zstd.ZstdCompressor(write_content_size=True,
                                   write_checksum=True)

        original = [
            b'foo1',
            b'foo2' * 2,
            b'foo3' * 3,
            b'foo4' * 4,
            b'foo5' * 5,
        ]

        frames = [cctx.compress(c) for c in original]

        b = b''.join([original[0], original[1]])
        b1 = zstd.BufferWithSegments(
            b,
            struct.pack('=QQQQ', 0, len(original[0]), len(original[0]),
                        len(original[1])))
        b = b''.join([original[2], original[3], original[4]])
        b2 = zstd.BufferWithSegments(
            b,
            struct.pack('=QQQQQQ', 0, len(original[2]), len(original[2]),
                        len(original[3]),
                        len(original[2]) + len(original[3]), len(original[4])))

        c = zstd.BufferWithSegmentsCollection(b1, b2)

        result = cctx.multi_compress_to_buffer(c)

        self.assertEqual(len(result), len(frames))

        for i, frame in enumerate(frames):
            self.assertEqual(result[i].tobytes(), frame)
Beispiel #28
0
def write_lns(filename, files):
    fname_writer = BinaryWriter()
    fdata_writer = BinaryWriter()
    for fname, fdata in files.items():
        fname_writer.write_uint32(len(fname))
        fname_writer.write_string(fname)
        fname_writer.write_uint32(0)
        fname_writer.write_uint32(len(fdata))
        fname_writer.write_uint32(fdata_writer.size)  #offset
        fdata_writer.write_bytes(fdata)

    cctx = zstd.ZstdCompressor()
    compressed = cctx.compress(fdata_writer.get_bytes())

    lns_writer = BinaryWriter()
    lns_writer.write_bytes(b"LZC\0")
    lns_writer.write_uint32(1)
    lns_writer.write_uint32(len(files))
    lns_writer.write_uint32(0x48 + fname_writer.size)  # header size
    lns_writer.write_uint32(1)
    lns_writer.write_uint32(1)
    lns_writer.write_uint32(fdata_writer.size)
    lns_writer.write_uint32(len(compressed))
    lns_writer.write_bytes(bytes(32))
    lns_writer.write_uint32(2)
    lns_writer.write_uint32(fname_writer.size)
    lns_writer.write_bytes(fname_writer.get_bytes())
    lns_writer.write_uint32(1)
    lns_writer.write_uint32(len(compressed))
    lns_writer.write_bytes(compressed)
    lns_writer.to_file(filename)
Beispiel #29
0
    def test_no_content_size_in_frame(self):
        cctx = zstd.ZstdCompressor(write_content_size=False)
        compressed = cctx.compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
            dctx.decompress(compressed)
Beispiel #30
0
 def zstd_compress(data, **kwargs):
     kwargs['write_content_size'] = False
     cctx = zstd.ZstdCompressor(**kwargs)
     try:
         return cctx.compress(data, allow_empty=True)
     except TypeError:
         # zstandard-0.9 removed allow_empy and made it the default.
         return cctx.compress(data)