def test_ppmd7_encode_decode(tmp_path, mem_size): length = 0 m = hashlib.sha256() with testdata_path.joinpath("10000SalesRecords.csv").open("rb") as f: with tmp_path.joinpath("target.ppmd").open("wb") as target: enc = pyppmd.Ppmd7Encoder(6, mem_size) data = f.read(READ_BLOCKSIZE) while len(data) > 0: m.update(data) length += len(data) target.write(enc.encode(data)) data = f.read(READ_BLOCKSIZE) target.write(enc.flush()) shash = m.digest() m2 = hashlib.sha256() assert length == 1237262 remaining = length with tmp_path.joinpath("target.ppmd").open("rb") as target: with tmp_path.joinpath("target.csv").open("wb") as out: dec = pyppmd.Ppmd7Decoder(6, mem_size) while remaining > 0: data = target.read(READ_BLOCKSIZE) res = dec.decode(data, min(remaining, READ_BLOCKSIZE)) if len(res) == 0: if dec.needs_input: res += dec.decode(b"\0", remaining) else: res += dec.decode(b"", remaining) break remaining -= len(res) m2.update(res) out.write(res) assert remaining == 0 thash = m2.digest() assert thash == shash
def test_ppmd7_encoder2(): encoder = pyppmd.Ppmd7Encoder(6, 16 << 20) result = encoder.encode(data[:33]) result += encoder.encode(data[33:]) result += encoder.flush(endmark=False) assert len(result) == 41 assert result == encoded
def encode(var, max_order, mem_size): if var == 7: encoder = pyppmd.Ppmd7Encoder(max_order=max_order, mem_size=mem_size) else: encoder = pyppmd.Ppmd8Encoder(max_order=max_order, mem_size=mem_size) with io.BytesIO() as target: with testdata.open("rb") as src: data = src.read(READ_BLOCKSIZE) while len(data) > 0: target.write(encoder.encode(data)) data = src.read(READ_BLOCKSIZE) target.write(encoder.flush())
def test_ppmd7_fuzzer(txt, max_order, mem_size): obj = txt.encode("UTF-8") enc = pyppmd.Ppmd7Encoder(max_order=max_order, mem_size=mem_size) length = len(obj) compressed = enc.encode(obj) compressed += enc.flush() dec = pyppmd.Ppmd7Decoder(max_order=max_order, mem_size=mem_size) result = dec.decode(compressed, length) if len(result) < length: if dec.needs_input: # ppmd need extra null byte result += dec.decode(b"\0", length - len(result)) else: result += dec.decode(b"", length - len(result)) assert result == obj assert dec.eof assert not dec.needs_input
def test_benchmark_text_decompress(tmp_path, benchmark, name, var, max_order, mem_size): def decode(var, max_order, mem_size): if var == 7: decoder = pyppmd.Ppmd7Decoder(max_order=max_order, mem_size=mem_size) else: decoder = pyppmd.Ppmd8Decoder(max_order=max_order, mem_size=mem_size) with tmp_path.joinpath("target.ppmd").open("rb") as src: with io.BytesIO() as target: remaining = src_size data = src.read(READ_BLOCKSIZE) while remaining > 0: out = decoder.decode(data, remaining) if len(out) == 0: break target.write(out) remaining = remaining - len(out) data = src.read(READ_BLOCKSIZE) assert remaining == 0 # prepare compressed data if var == 7: encoder = pyppmd.Ppmd7Encoder(max_order=max_order, mem_size=mem_size) else: encoder = pyppmd.Ppmd8Encoder(max_order=max_order, mem_size=mem_size) with tmp_path.joinpath("target.ppmd").open("wb") as target: with testdata.open("rb") as src: data = src.read(READ_BLOCKSIZE) while len(data) > 0: target.write(encoder.encode(data)) data = src.read(READ_BLOCKSIZE) target.write(encoder.flush()) benchmark.extra_info["data_size"] = src_size benchmark(decode, var, max_order, mem_size)
def __init__(self, properties: bytes): order, mem = self._decode_property(properties) self.encoder = pyppmd.Ppmd7Encoder(order, mem)
def test_ppmd7_encoder(): encoder = pyppmd.Ppmd7Encoder(6, 16 << 20) result = encoder.encode(data) result += encoder.flush() assert len(result) == 41 assert result == encoded