def test_ppmd8_decoder2(): decoder = pyppmd.Ppmd8Decoder(6, 8 << 20, pyppmd.PPMD8_RESTORE_METHOD_RESTART) result = decoder.decode(encoded[:20]) result += decoder.decode(encoded[20:]) result += decoder.decode(b"", -1) assert result == source
def test_ppmd8_encode_decode_shortage(): txt = "\U0001127f\U00069f6a\U00069f6a" obj = txt.encode("UTF-8") enc = pyppmd.Ppmd8Encoder(3, 2048) data = enc.encode(obj) data += enc.flush() length = len(obj) dec = pyppmd.Ppmd8Decoder(3, 2048) res = dec.decode(data, length) if len(res) < length: res += dec.decode(b"\0", length - len(res)) assert obj == res
def test_ppmd8_fuzzer(txt, max_order, mem_size): obj = txt.encode("UTF-8") enc = pyppmd.Ppmd8Encoder(max_order=max_order, mem_size=mem_size, restore_method=pyppmd.PPMD8_RESTORE_METHOD_CUT_OFF) length = len(obj) compressed = enc.encode(obj) compressed += enc.flush() dec = pyppmd.Ppmd8Decoder(max_order=max_order, mem_size=mem_size, restore_method=pyppmd.PPMD8_RESTORE_METHOD_CUT_OFF) result = dec.decode(compressed, length) if len(result) < length: if dec.needs_input: # ppmd need extra null byte result += dec.decode(b"\0", length - len(result)) else: result += dec.decode(b"", length - len(result)) assert result == obj
def decode(var, max_order, mem_size): if var == 7: decoder = pyppmd.Ppmd7Decoder(max_order=max_order, mem_size=mem_size) else: decoder = pyppmd.Ppmd8Decoder(max_order=max_order, mem_size=mem_size) with tmp_path.joinpath("target.ppmd").open("rb") as src: with io.BytesIO() as target: remaining = src_size data = src.read(READ_BLOCKSIZE) while remaining > 0: out = decoder.decode(data, remaining) if len(out) == 0: break target.write(out) remaining = remaining - len(out) data = src.read(READ_BLOCKSIZE) assert remaining == 0
def test_ppmd8_encode_decode(tmp_path, mem_size, restore_method): length = 0 m = hashlib.sha256() with testdata_path.joinpath("10000SalesRecords.csv").open("rb") as f: with tmp_path.joinpath("target.ppmd").open("wb") as target: enc = pyppmd.Ppmd8Encoder(6, mem_size, restore_method=restore_method) data = f.read(READ_BLOCKSIZE) while len(data) > 0: m.update(data) length += len(data) target.write(enc.encode(data)) data = f.read(READ_BLOCKSIZE) target.write(enc.flush(endmark=True)) shash = m.digest() m2 = hashlib.sha256() assert length == 1237262 length = 0 with tmp_path.joinpath("target.ppmd").open("rb") as target: with tmp_path.joinpath("target.csv").open("wb") as out: dec = pyppmd.Ppmd8Decoder(6, mem_size, restore_method=restore_method) data = target.read(READ_BLOCKSIZE) while not dec.eof: res = dec.decode(data) m2.update(res) out.write(res) length += len(res) if len(data) == 0: break data = target.read(READ_BLOCKSIZE) assert length == 1237262 thash = m2.digest() assert thash == shash