Example #1
0
def test_pickle_frame_dyncodec(tmp_path, rng: np.random.Generator):
    file = tmp_path / 'data.bpk'

    df = pd.DataFrame({
        'key': np.arange(0, 5000, dtype='i4'),
        'count': rng.integers(0, 1000, 5000),
        'score': rng.normal(10, 2, 5000)
    })

    def codec(buf):
        obj = memoryview(buf).obj
        if isinstance(obj, np.ndarray) and obj.dtype == np.float64:
            print('compacting double array')
            return codecs.Chain([numcodecs.AsType('f4', 'f8'), codecs.Blosc('zstd', 9)])
        else:
            return codecs.Blosc('zstd', 9)

    with BinPickler.compressed(file, codec) as w:
        w.dump(df)

    with BinPickleFile(file) as bpf:
        assert not bpf.find_errors()
        df2 = bpf.load()
        print(df2)
        assert all(df2.columns == df.columns)
        assert all(df2['key'] == df['key'])
        assert all(df2['count'] == df['count'])
        assert all(df2['score'].astype('f4') == df['score'].astype('f4'))
        del df2
Example #2
0
def test_empty(tmp_path):
    "Write a file with nothing in it"
    file = tmp_path / 'data.bpk'

    with BinPickler(file) as w:
        w._finish_file()

    assert file.stat().st_size == 33

    with BinPickleFile(file) as bpf:
        assert len(bpf.entries) == 0
Example #3
0
def test_pickle_array(tmp_path, rng: np.random.Generator):
    "Pickle a NumPy array"
    file = tmp_path / 'data.bpk'

    a = rng.integers(0, 5000, 1024, dtype='i4')

    with BinPickler(file) as w:
        w.dump(a)

    with BinPickleFile(file) as bpf:
        assert len(bpf.entries) == 2
        a2 = bpf.load()
        assert len(a2) == len(a)
        assert all(a2 == a)
Example #4
0
def test_map_many_arrays(a):
    "Pickle random NumPy arrays"
    assume(not any(np.isnan(a)))
    with TemporaryDirectory('.test', 'binpickle') as path:
        file = Path(path) / 'data.bpk'

        with BinPickler.mappable(file) as w:
            w.dump(a)

        with BinPickleFile(file, direct=True) as bpf:
            assert not bpf.find_errors()
            assert len(bpf.entries) in (1, 2)
            a2 = bpf.load()
            assert len(a2) == len(a)
            assert all(a2 == a)
            del a2
Example #5
0
def test_pickle_frame(tmp_path, rng: np.random.Generator, writer, direct):
    "Pickle a Pandas data frame"
    file = tmp_path / 'data.bpk'

    df = pd.DataFrame({
        'key': np.arange(0, 5000),
        'count': rng.integers(0, 1000, 5000),
        'score': rng.normal(10, 2, 5000)
    })

    with writer(file) as w:
        w.dump(df)

    with BinPickleFile(file, direct=direct) as bpf:
        assert not bpf.find_errors()
        df2 = bpf.load()
        print(df2)
        assert all(df2.columns == df.columns)
        for c in df2.columns:
            assert all(df2[c] == df[c])
        del df2
Example #6
0
def test_write_buf(tmp_path, rng: np.random.Generator):
    "Write a file with a single array"
    file = tmp_path / 'data.bpk'

    a = rng.integers(0, 5000, 1024, dtype='i4')

    with BinPickler(file) as w:
        w._write_buffer(a)
        w._finish_file()

    with BinPickleFile(file, direct=True) as bpf:
        assert len(bpf.entries) == 1
        e = bpf.entries[0]
        assert e.dec_length == a.nbytes
        assert e.enc_length == a.nbytes
        b2 = bpf._read_buffer(e)
        assert b2.nbytes == e.dec_length
        a2 = np.frombuffer(b2, dtype='i4')
        assert len(a2) == len(a)
        assert all(a2 == a)
        del a2
        del b2
Example #7
0
def test_write_encoded_arrays(arrays, codec):
    with TemporaryDirectory('.test', 'binpickle-') as path:
        file = Path(path) / 'data.bpk'

        with BinPickler.compressed(file, codec) as w:
            for a in arrays:
                w._write_buffer(a)
            w._finish_file()

        with BinPickleFile(file) as bpf:
            assert not bpf.find_errors()
            assert len(bpf.entries) == len(arrays)
            for e, a in zip(bpf.entries, arrays):
                try:
                    if codec is not None:
                        assert e.codec
                    assert e.dec_length == len(a)
                    dat = bpf._read_buffer(e)
                    assert dat == a
                finally:  # delete things to make failures clearer
                    del dat
                    del e
                    gc.collect()