Esempio n. 1
0
    def write_mutable_tensor(self, session_id, name, payload_type, body):
        import pyarrow

        from ..serialize import dataserializer
        from ..tensor.core import Indexes
        session_uid = SessionActor.gen_uid(session_id)
        session_ref = self.get_actor_ref(session_uid)

        index_json_size = np.frombuffer(body[0:8], dtype=np.int64).item()
        index_json = json.loads(body[8:8 + index_json_size].decode('ascii'))
        index = Indexes.from_json(index_json).indexes
        if payload_type is None:
            value = dataserializer.loads(body[8 + index_json_size:])
        elif payload_type == 'tensor':
            tensor_chunk_offset = 8 + index_json_size
            with pyarrow.BufferReader(body[tensor_chunk_offset:]) as reader:
                value = pyarrow.read_tensor(reader).to_numpy()
        elif payload_type == 'record_batch':
            schema_size = np.frombuffer(body[8 + index_json_size:8 +
                                             index_json_size + 8],
                                        dtype=np.int64).item()
            schema_offset = 8 + index_json_size + 8
            with pyarrow.BufferReader(body[schema_offset:schema_offset +
                                           schema_size]) as reader:
                schema = pyarrow.read_schema(reader)
            record_batch_offset = schema_offset + schema_size
            with pyarrow.BufferReader(body[record_batch_offset:]) as reader:
                record_batch = pyarrow.read_record_batch(reader, schema)
                value = record_batch.to_pandas().to_records(index=False)
        else:
            raise ValueError(f'Not supported payload type: {payload_type}')
        return session_ref.write_mutable_tensor(name, index, value)
Esempio n. 2
0
 def __iter__(self):
     input_stream = pa.input_stream(self.path)
     while (True):
         try:
             result = pa.read_tensor(input_stream)
             yield result.to_numpy()
         except:
             break
     input_stream.close()
Esempio n. 3
0
def test_read_tensor(tmpdir):
    # Create and write tensor tensor
    data = np.random.randn(10, 4)
    tensor = pa.Tensor.from_numpy(data)
    data_size = pa.get_tensor_size(tensor)
    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor')
    write_mmap = pa.create_memory_map(path, data_size)
    pa.write_tensor(tensor, write_mmap)
    # Try to read tensor
    read_mmap = pa.memory_map(path, mode='r')
    array = pa.read_tensor(read_mmap).to_numpy()
    np.testing.assert_equal(data, array)
Esempio n. 4
0
def test_read_tensor(tmpdir):
    # Create and write tensor tensor
    data = np.random.randn(10, 4)
    tensor = pa.Tensor.from_numpy(data)
    data_size = pa.get_tensor_size(tensor)
    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor')
    write_mmap = pa.create_memory_map(path, data_size)
    pa.write_tensor(tensor, write_mmap)
    # Try to read tensor
    read_mmap = pa.memory_map(path, mode='r')
    array = pa.read_tensor(read_mmap).to_numpy()
    np.testing.assert_equal(data, array)
Esempio n. 5
0
def test_tensor_ipc_roundtrip(tmpdir):
    data = np.random.randn(10, 4)
    tensor = pa.Tensor.from_numpy(data)

    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip')
    mmap = pa.create_memory_map(path, 1024)

    pa.write_tensor(tensor, mmap)

    mmap.seek(0)
    result = pa.read_tensor(mmap)

    assert result.equals(tensor)
Esempio n. 6
0
def test_tensor_ipc_roundtrip(tmpdir):
    data = np.random.randn(10, 4)
    tensor = pa.Tensor.from_numpy(data)

    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip')
    mmap = pa.create_memory_map(path, 1024)

    pa.write_tensor(tensor, mmap)

    mmap.seek(0)
    result = pa.read_tensor(mmap)

    assert result.equals(tensor)
Esempio n. 7
0
def test_tensor_ipc_read_from_compressed(tempdir):
    # ARROW-5910
    data = np.random.randn(10, 4)
    tensor = pa.Tensor.from_numpy(data)

    path = tempdir / 'tensor-compressed-file'

    out_stream = pa.output_stream(path, compression='gzip')
    pa.write_tensor(tensor, out_stream)
    out_stream.close()

    result = pa.read_tensor(pa.input_stream(path, compression='gzip'))
    assert result.equals(tensor)
 def test_store_arrow_objects(self):
     data = np.random.randn(10, 4)
     # Write an arrow object.
     object_id = random_object_id()
     tensor = pa.Tensor.from_numpy(data)
     data_size = pa.get_tensor_size(tensor)
     buf = self.plasma_client.create(object_id, data_size)
     stream = pa.FixedSizeBufferWriter(buf)
     pa.write_tensor(tensor, stream)
     self.plasma_client.seal(object_id)
     # Read the arrow object.
     [tensor] = self.plasma_client.get_buffers([object_id])
     reader = pa.BufferReader(tensor)
     array = pa.read_tensor(reader).to_numpy()
     # Assert that they are equal.
     np.testing.assert_equal(data, array)
Esempio n. 9
0
 def test_store_arrow_objects(self):
     data = np.random.randn(10, 4)
     # Write an arrow object.
     object_id = random_object_id()
     tensor = pa.Tensor.from_numpy(data)
     data_size = pa.get_tensor_size(tensor)
     buf = self.plasma_client.create(object_id, data_size)
     stream = pa.FixedSizeBufferWriter(buf)
     pa.write_tensor(tensor, stream)
     self.plasma_client.seal(object_id)
     # Read the arrow object.
     [tensor] = self.plasma_client.get_buffers([object_id])
     reader = pa.BufferReader(tensor)
     array = pa.read_tensor(reader).to_numpy()
     # Assert that they are equal.
     np.testing.assert_equal(data, array)
Esempio n. 10
0
def test_tensor_ipc_roundtrip():
    data = np.random.randn(10, 4)
    tensor = pa.Tensor.from_numpy(data)

    path = 'pyarrow-tensor-ipc-roundtrip'
    try:
        mmap = pa.create_memory_map(path, 1024)

        pa.write_tensor(tensor, mmap)

        mmap.seek(0)
        result = pa.read_tensor(mmap)

        assert result.equals(tensor)
    finally:
        _try_delete(path)
Esempio n. 11
0
def test_tensor_ipc_strided(tmpdir):
    data1 = np.random.randn(10, 4)
    tensor1 = pa.Tensor.from_numpy(data1[::2])

    data2 = np.random.randn(10, 6, 4)
    tensor2 = pa.Tensor.from_numpy(data2[::, ::2, ::])

    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided')
    mmap = pa.create_memory_map(path, 2048)

    for tensor in [tensor1, tensor2]:
        mmap.seek(0)
        pa.write_tensor(tensor, mmap)

        mmap.seek(0)
        result = pa.read_tensor(mmap)

        assert result.equals(tensor)
Esempio n. 12
0
def test_tensor_ipc_strided(tmpdir):
    data1 = np.random.randn(10, 4)
    tensor1 = pa.Tensor.from_numpy(data1[::2])

    data2 = np.random.randn(10, 6, 4)
    tensor2 = pa.Tensor.from_numpy(data2[::, ::2, ::])

    path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided')
    mmap = pa.create_memory_map(path, 2048)

    for tensor in [tensor1, tensor2]:
        mmap.seek(0)
        pa.write_tensor(tensor, mmap)

        mmap.seek(0)
        result = pa.read_tensor(mmap)

        assert result.equals(tensor)
Esempio n. 13
0
def get_np(oid):
    buff = client.get_buffers([oid])[0]
    reader = pa.BufferReader(buff)
    t = pa.read_tensor(reader)
    return t.to_numpy()