def test_serialize_to_components_invalid_cases(): buf = pa.py_buffer(b'hello') components = { 'num_tensors': 0, 'num_sparse_tensors': { 'coo': 0, 'csr': 0, 'csc': 0, 'csf': 0, 'ndim_csf': 0 }, 'num_ndarrays': 0, 'num_buffers': 1, 'data': [buf] } with pytest.raises(pa.ArrowInvalid): pa.deserialize_components(components) components = { 'num_tensors': 0, 'num_sparse_tensors': { 'coo': 0, 'csr': 0, 'csc': 0, 'csf': 0, 'ndim_csf': 0 }, 'num_ndarrays': 1, 'num_buffers': 0, 'data': [buf, buf] } with pytest.raises(pa.ArrowInvalid): pa.deserialize_components(components)
def test_serialize_to_components_invalid_cases(): buf = pa.frombuffer(b'hello') components = {'num_tensors': 0, 'num_buffers': 1, 'data': [buf]} with pytest.raises(pa.ArrowException): pa.deserialize_components(components) components = {'num_tensors': 1, 'num_buffers': 0, 'data': [buf, buf]} with pytest.raises(pa.ArrowException): pa.deserialize_components(components)
def loads(buf): mv = memoryview(buf) header = read_file_header(mv) compress = header.compress if compress == CompressType.NONE: data = buf[HEADER_LENGTH:] else: data = decompressors[compress](mv[HEADER_LENGTH:]) if header.type == SerialType.ARROW: try: return pyarrow.deserialize(memoryview(data), mars_serialize_context()) except pyarrow.lib.ArrowInvalid: # pragma: no cover # reconstruct value from buffers of arrow components data_view = memoryview(data) meta_block_size = np.frombuffer(data_view[0:4], dtype='int32').item() meta = pickle.loads(data_view[4:4 + meta_block_size]) # nosec buffer_sizes = meta.pop('buffer_sizes') bounds = np.cumsum([4 + meta_block_size] + buffer_sizes) meta['data'] = [ pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]]) for idx in range(len(buffer_sizes)) ] return pyarrow.deserialize_components(meta, mars_serialize_context()) else: return pickle.loads(data)
def test_serialize_to_components_invalid_cases(): buf = pa.py_buffer(b'hello') components = { 'num_tensors': 0, 'num_buffers': 1, 'data': [buf] } with pytest.raises(pa.ArrowInvalid): pa.deserialize_components(components) components = { 'num_tensors': 1, 'num_buffers': 0, 'data': [buf, buf] } with pytest.raises(pa.ArrowInvalid): pa.deserialize_components(components)
def get(self, key, fill_cache_func): value = self._cache.get(key, default=None) if value is None: value = fill_cache_func() table_pandas = value.to_pandas() serialized_df = pa.serialize(table_pandas) components = serialized_df.to_components() self._cache.set(key, components) else: original_df = pa.deserialize_components(value) value = pa.Table.from_pandas(original_df, preserve_index=False) return value
def time_deserialize_from_components(self): pa.deserialize_components(self.as_components)
def __call__(self, data): components = pickle.loads(data[0]) data = list(map(pa.py_buffer, data[1:])) components['data'] = data return pa.deserialize_components(components, context=self.context)
print("Time for serializing array in-memory (pickle4, copy): %.3fs" % (t1 - t0)) t0 = time() carr2 = cat.from_sframe(sframe_nocopy, copy=False) t1 = time() print("Time for de-serializing array in-memory (caterva, no-copy): %.3fs" % (t1 - t0)) if check_roundtrip: print("The roundtrip is... ", end="", flush=True) np.testing.assert_allclose(carr2, arr) print("ok!") t0 = time() arr2 = pa.deserialize_components(pyarrow_nocopy) t1 = time() print("Time for de-serializing array in-memory (arrow, no-copy): %.3fs" % (t1 - t0)) if check_roundtrip: print("The roundtrip is... ", end="", flush=True) np.testing.assert_allclose(arr2, arr) print("ok!") t0 = time() arr2 = pa.deserialize(pyarrow_copy) t1 = time() print("Time for de-serializing array in-memory (arrow, copy): %.3fs" % (t1 - t0))
def deserialize(data): symbol_infos = pa.deserialize_components(data) return Hishty(start=min(symbol_infos), end=max(symbol_infos), symbol_infos=symbol_infos)