예제 #1
0
def test_ppmd7_encode_decode(tmp_path, mem_size):
    length = 0
    m = hashlib.sha256()
    with testdata_path.joinpath("10000SalesRecords.csv").open("rb") as f:
        with tmp_path.joinpath("target.ppmd").open("wb") as target:
            enc = pyppmd.Ppmd7Encoder(6, mem_size)
            data = f.read(READ_BLOCKSIZE)
            while len(data) > 0:
                m.update(data)
                length += len(data)
                target.write(enc.encode(data))
                data = f.read(READ_BLOCKSIZE)
            target.write(enc.flush())
    shash = m.digest()
    m2 = hashlib.sha256()
    assert length == 1237262
    remaining = length
    with tmp_path.joinpath("target.ppmd").open("rb") as target:
        with tmp_path.joinpath("target.csv").open("wb") as out:
            dec = pyppmd.Ppmd7Decoder(6, mem_size)
            while remaining > 0:
                data = target.read(READ_BLOCKSIZE)
                res = dec.decode(data, min(remaining, READ_BLOCKSIZE))
                if len(res) == 0:
                    if dec.needs_input:
                        res += dec.decode(b"\0", remaining)
                    else:
                        res += dec.decode(b"", remaining)
                    break
                remaining -= len(res)
                m2.update(res)
                out.write(res)
            assert remaining == 0
        thash = m2.digest()
    assert thash == shash
예제 #2
0
def test_ppmd7_encoder2():
    encoder = pyppmd.Ppmd7Encoder(6, 16 << 20)
    result = encoder.encode(data[:33])
    result += encoder.encode(data[33:])
    result += encoder.flush(endmark=False)
    assert len(result) == 41
    assert result == encoded
예제 #3
0
 def encode(var, max_order, mem_size):
     if var == 7:
         encoder = pyppmd.Ppmd7Encoder(max_order=max_order,
                                       mem_size=mem_size)
     else:
         encoder = pyppmd.Ppmd8Encoder(max_order=max_order,
                                       mem_size=mem_size)
     with io.BytesIO() as target:
         with testdata.open("rb") as src:
             data = src.read(READ_BLOCKSIZE)
             while len(data) > 0:
                 target.write(encoder.encode(data))
                 data = src.read(READ_BLOCKSIZE)
             target.write(encoder.flush())
예제 #4
0
def test_ppmd7_fuzzer(txt, max_order, mem_size):
    obj = txt.encode("UTF-8")
    enc = pyppmd.Ppmd7Encoder(max_order=max_order, mem_size=mem_size)
    length = len(obj)
    compressed = enc.encode(obj)
    compressed += enc.flush()
    dec = pyppmd.Ppmd7Decoder(max_order=max_order, mem_size=mem_size)
    result = dec.decode(compressed, length)
    if len(result) < length:
        if dec.needs_input:
            # ppmd need extra null byte
            result += dec.decode(b"\0", length - len(result))
        else:
            result += dec.decode(b"", length - len(result))
    assert result == obj
    assert dec.eof
    assert not dec.needs_input
예제 #5
0
def test_benchmark_text_decompress(tmp_path, benchmark, name, var, max_order,
                                   mem_size):
    def decode(var, max_order, mem_size):
        if var == 7:
            decoder = pyppmd.Ppmd7Decoder(max_order=max_order,
                                          mem_size=mem_size)
        else:
            decoder = pyppmd.Ppmd8Decoder(max_order=max_order,
                                          mem_size=mem_size)
        with tmp_path.joinpath("target.ppmd").open("rb") as src:
            with io.BytesIO() as target:
                remaining = src_size
                data = src.read(READ_BLOCKSIZE)
                while remaining > 0:
                    out = decoder.decode(data, remaining)
                    if len(out) == 0:
                        break
                    target.write(out)
                    remaining = remaining - len(out)
                    data = src.read(READ_BLOCKSIZE)
            assert remaining == 0

    # prepare compressed data
    if var == 7:
        encoder = pyppmd.Ppmd7Encoder(max_order=max_order, mem_size=mem_size)
    else:
        encoder = pyppmd.Ppmd8Encoder(max_order=max_order, mem_size=mem_size)
    with tmp_path.joinpath("target.ppmd").open("wb") as target:
        with testdata.open("rb") as src:
            data = src.read(READ_BLOCKSIZE)
            while len(data) > 0:
                target.write(encoder.encode(data))
                data = src.read(READ_BLOCKSIZE)
            target.write(encoder.flush())

    benchmark.extra_info["data_size"] = src_size
    benchmark(decode, var, max_order, mem_size)
예제 #6
0
파일: compressor.py 프로젝트: miurahr/py7zr
 def __init__(self, properties: bytes):
     order, mem = self._decode_property(properties)
     self.encoder = pyppmd.Ppmd7Encoder(order, mem)
예제 #7
0
def test_ppmd7_encoder():
    encoder = pyppmd.Ppmd7Encoder(6, 16 << 20)
    result = encoder.encode(data)
    result += encoder.flush()
    assert len(result) == 41
    assert result == encoded