def test_serialize_to_components_invalid_cases():
    buf = pa.py_buffer(b'hello')

    components = {
        'num_tensors': 0,
        'num_sparse_tensors': {
            'coo': 0, 'csr': 0, 'csc': 0, 'csf': 0, 'ndim_csf': 0
        },
        'num_ndarrays': 0,
        'num_buffers': 1,
        'data': [buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)

    components = {
        'num_tensors': 0,
        'num_sparse_tensors': {
            'coo': 0, 'csr': 0, 'csc': 0, 'csf': 0, 'ndim_csf': 0
        },
        'num_ndarrays': 1,
        'num_buffers': 0,
        'data': [buf, buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)
Esempio n. 2
0
def test_serialize_to_components_invalid_cases():
    buf = pa.frombuffer(b'hello')

    components = {'num_tensors': 0, 'num_buffers': 1, 'data': [buf]}

    with pytest.raises(pa.ArrowException):
        pa.deserialize_components(components)

    components = {'num_tensors': 1, 'num_buffers': 0, 'data': [buf, buf]}

    with pytest.raises(pa.ArrowException):
        pa.deserialize_components(components)
Esempio n. 3
0
def loads(buf):
    mv = memoryview(buf)
    header = read_file_header(mv)
    compress = header.compress

    if compress == CompressType.NONE:
        data = buf[HEADER_LENGTH:]
    else:
        data = decompressors[compress](mv[HEADER_LENGTH:])

    if header.type == SerialType.ARROW:
        try:
            return pyarrow.deserialize(memoryview(data),
                                       mars_serialize_context())
        except pyarrow.lib.ArrowInvalid:  # pragma: no cover
            # reconstruct value from buffers of arrow components
            data_view = memoryview(data)
            meta_block_size = np.frombuffer(data_view[0:4],
                                            dtype='int32').item()
            meta = pickle.loads(data_view[4:4 + meta_block_size])  # nosec
            buffer_sizes = meta.pop('buffer_sizes')
            bounds = np.cumsum([4 + meta_block_size] + buffer_sizes)
            meta['data'] = [
                pyarrow.py_buffer(data_view[bounds[idx]:bounds[idx + 1]])
                for idx in range(len(buffer_sizes))
            ]
            return pyarrow.deserialize_components(meta,
                                                  mars_serialize_context())
    else:
        return pickle.loads(data)
Esempio n. 4
0
def test_serialize_to_components_invalid_cases():
    buf = pa.py_buffer(b'hello')

    components = {
        'num_tensors': 0,
        'num_buffers': 1,
        'data': [buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)

    components = {
        'num_tensors': 1,
        'num_buffers': 0,
        'data': [buf, buf]
    }

    with pytest.raises(pa.ArrowInvalid):
        pa.deserialize_components(components)
Esempio n. 5
0
    def get(self, key, fill_cache_func):
        value = self._cache.get(key, default=None)
        if value is None:
            value = fill_cache_func()
            table_pandas = value.to_pandas()
            serialized_df = pa.serialize(table_pandas)
            components = serialized_df.to_components()
            self._cache.set(key, components)
        else:
            original_df = pa.deserialize_components(value)
            value = pa.Table.from_pandas(original_df, preserve_index=False)

        return value
Esempio n. 6
0
 def time_deserialize_from_components(self):
     pa.deserialize_components(self.as_components)
Esempio n. 7
0
 def __call__(self, data):
     components = pickle.loads(data[0])
     data = list(map(pa.py_buffer, data[1:]))
     components['data'] = data
     return pa.deserialize_components(components, context=self.context)
Esempio n. 8
0
print("Time for serializing array in-memory (pickle4, copy): %.3fs" %
      (t1 - t0))

t0 = time()
carr2 = cat.from_sframe(sframe_nocopy, copy=False)
t1 = time()
print("Time for de-serializing array in-memory (caterva, no-copy): %.3fs" %
      (t1 - t0))

if check_roundtrip:
    print("The roundtrip is... ", end="", flush=True)
    np.testing.assert_allclose(carr2, arr)
    print("ok!")

t0 = time()
arr2 = pa.deserialize_components(pyarrow_nocopy)
t1 = time()
print("Time for de-serializing array in-memory (arrow, no-copy): %.3fs" %
      (t1 - t0))

if check_roundtrip:
    print("The roundtrip is... ", end="", flush=True)
    np.testing.assert_allclose(arr2, arr)
    print("ok!")

t0 = time()
arr2 = pa.deserialize(pyarrow_copy)
t1 = time()
print("Time for de-serializing array in-memory (arrow, copy): %.3fs" %
      (t1 - t0))
Esempio n. 9
0
 def deserialize(data):
     symbol_infos = pa.deserialize_components(data)
     return Hishty(start=min(symbol_infos), end=max(symbol_infos), symbol_infos=symbol_infos)