def test_ppmd7_encode_decode(tmp_path, mem_size): length = 0 m = hashlib.sha256() with testdata_path.joinpath("10000SalesRecords.csv").open("rb") as f: with tmp_path.joinpath("target.ppmd").open("wb") as target: enc = pyppmd.Ppmd7Encoder(6, mem_size) data = f.read(READ_BLOCKSIZE) while len(data) > 0: m.update(data) length += len(data) target.write(enc.encode(data)) data = f.read(READ_BLOCKSIZE) target.write(enc.flush()) shash = m.digest() m2 = hashlib.sha256() assert length == 1237262 remaining = length with tmp_path.joinpath("target.ppmd").open("rb") as target: with tmp_path.joinpath("target.csv").open("wb") as out: dec = pyppmd.Ppmd7Decoder(6, mem_size) while remaining > 0: data = target.read(READ_BLOCKSIZE) res = dec.decode(data, min(remaining, READ_BLOCKSIZE)) if len(res) == 0: if dec.needs_input: res += dec.decode(b"\0", remaining) else: res += dec.decode(b"", remaining) break remaining -= len(res) m2.update(res) out.write(res) assert remaining == 0 thash = m2.digest() assert thash == shash
def __init__(self, properties: bytes, blocksize: Optional[int] = None): if not isinstance(properties, bytes): raise UnsupportedCompressionMethodError( properties, "Unknown type of properties is passed") if len(properties) == 5: order, mem = struct.unpack("<BL", properties) elif len(properties) == 7: order, mem, _, _ = struct.unpack("<BLBB", properties) else: raise UnsupportedCompressionMethodError( properties, "Unknown size of properties is passed") self.decoder = pyppmd.Ppmd7Decoder(order, mem)
def test_ppmd7_decoder2(): decoder = pyppmd.Ppmd7Decoder(6, 16 << 20) result = decoder.decode(encoded[:33], 33) result += decoder.decode(encoded[33:], 28) assert not decoder.eof while len(result) < 66: if decoder.needs_input: result += decoder.decode(b"\0", 66 - len(result)) break else: result += decoder.decode(b"", 66 - len(result)) assert result == data assert not decoder.needs_input assert decoder.eof
def test_ppmd7_fuzzer(txt, max_order, mem_size): obj = txt.encode("UTF-8") enc = pyppmd.Ppmd7Encoder(max_order=max_order, mem_size=mem_size) length = len(obj) compressed = enc.encode(obj) compressed += enc.flush() dec = pyppmd.Ppmd7Decoder(max_order=max_order, mem_size=mem_size) result = dec.decode(compressed, length) if len(result) < length: if dec.needs_input: # ppmd need extra null byte result += dec.decode(b"\0", length - len(result)) else: result += dec.decode(b"", length - len(result)) assert result == obj assert dec.eof assert not dec.needs_input
def test_ppmd7_decode_chunks(): with testdata_path.joinpath("testdata2.ppmd").open("rb") as f: dec = pyppmd.Ppmd7Decoder(6, 16 << 20) for i in range(30): remaining = chunk_sizes[i] result = b"" while remaining > 0: data = f.read(READ_BLOCKSIZE) out = dec.decode(data, remaining) if len(out) == 0: if dec.needs_input: out += dec.decode(b"\0", remaining) else: out += dec.decode(b"", remaining) break remaining -= len(out) result += out assert len(result) == chunk_sizes[i]
def decode(var, max_order, mem_size): if var == 7: decoder = pyppmd.Ppmd7Decoder(max_order=max_order, mem_size=mem_size) else: decoder = pyppmd.Ppmd8Decoder(max_order=max_order, mem_size=mem_size) with tmp_path.joinpath("target.ppmd").open("rb") as src: with io.BytesIO() as target: remaining = src_size data = src.read(READ_BLOCKSIZE) while remaining > 0: out = decoder.decode(data, remaining) if len(out) == 0: break target.write(out) remaining = remaining - len(out) data = src.read(READ_BLOCKSIZE) assert remaining == 0
def test_ppmd7_decoder(): decoder = pyppmd.Ppmd7Decoder(6, 16 << 20) result = decoder.decode(encoded, 66) assert result == data assert decoder.eof assert not decoder.needs_input