def test_frame(shape, chunkshape, blockshape, itemsize, enforceframe, filename, copy_sframe): if filename is not None and os.path.exists(filename): os.remove(filename) size = int(np.prod(shape)) buffer = bytes(size * itemsize) a = cat.from_buffer(buffer, shape, chunkshape=chunkshape, blockshape=blockshape, itemsize=itemsize, enforceframe=enforceframe, filename=filename) sframe1 = a.to_sframe() buffer1 = a.to_buffer() # Size of a compressed frame should be less than the plain buffer for the cases here assert len(sframe1) < len(buffer1) b = cat.from_sframe(sframe1, copy=copy_sframe) sframe2 = b.to_sframe() # For some reason, the size of sframe1 and sframe2 are not equal when copies are made, # but the important thing is that the length of the frame should be stable in multiple # round-trips after the first one. # assert len(sframe2) == len(sframe1) sframe3 = sframe2 c = b for i in range(1): c = cat.from_sframe(sframe2, copy=copy_sframe) sframe3 = c.to_sframe() if not copy_sframe: # When the frame is not copied, we *need* a copy for the next iteration sframe3 = bytes(sframe3) assert len(sframe3) == len(sframe2) buffer2 = b.to_buffer() assert buffer2 == buffer1 buffer3 = c.to_buffer() assert buffer3 == buffer1 if filename is not None and os.path.exists(filename): os.remove(filename)
print("Time for serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0)) t0 = time() pyarrow_copy = pa.serialize(arr).to_buffer().to_pybytes() t1 = time() print("Time for serializing array in-memory (arrow, copy): %.3fs" % (t1 - t0)) t0 = time() frame_pickle = pickle.dumps(arr, protocol=4) t1 = time() print("Time for serializing array in-memory (pickle4, copy): %.3fs" % (t1 - t0)) t0 = time() carr2 = cat.from_sframe(sframe_nocopy, copy=False) t1 = time() print("Time for de-serializing array in-memory (caterva, no-copy): %.3fs" % (t1 - t0)) if check_roundtrip: print("The roundtrip is... ", end="", flush=True) np.testing.assert_allclose(carr2, arr) print("ok!") t0 = time() arr2 = pa.deserialize_components(pyarrow_nocopy) t1 = time() print("Time for de-serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0))