Beispiel #1
0
def _deserialize_output_memory(obj_id: plasma.ObjectID,
                               client: plasma.PlasmaClient) -> Any:
    """Gets data from memory.

    Args:
        obj_id: The ID of the object to retrieve from the plasma store.
        client: A PlasmaClient to interface with the in-memory object
            store.

    Returns:
        The unserialized data from the store corresponding to the
        `obj_id`.

    Raises:
        ObjectNotFoundError: If the specified `obj_id` is not in the
            store.
        ValueError: If the serialization type in the metadata is not
            valid.
    """
    obj_ids = [obj_id]

    # TODO: the get_buffers allows for batch, which we want to use in
    #       the future.
    buffers = client.get_buffers(obj_ids, with_meta=True, timeout_ms=1000)

    # Since we currently know that we are only restrieving one buffer,
    # we can instantly get its metadata and buffer.
    metadata, buffer = buffers[0]

    # Getting the buffer timed out. We conclude that the object has not
    # yet been written to the store and maybe never will.
    if metadata is None and buffer is None:
        raise ObjectNotFoundError(
            f'Object with ObjectID "{obj_id}" does not exist in store.')

    metadata = metadata.decode("utf-8").split(Config.__METADATA_SEPARATOR__)
    _, _, serialization, _ = metadata
    if serialization == Serialization.ARROW_TABLE.name:
        # Read all batches as a table.
        stream = pa.ipc.open_stream(buffer)
        return stream.read_all()

    elif serialization == Serialization.ARROW_BATCH.name:
        # Return the first batch (the only one).
        stream = pa.ipc.open_stream(buffer)
        return [b for b in stream][0]

    elif serialization == Serialization.PICKLE.name:
        # Can load the buffer directly because its a bytes-like-object:
        # https://docs.python.org/3/library/pickle.html#pickle.loads
        return pickle.loads(buffer)

    else:
        raise ValueError(
            "Object was serialized with an unsupported serialization")
Beispiel #2
0
def _get_output_memory(obj_id: plasma.ObjectID,
                       client: plasma.PlasmaClient) -> Any:
    """Gets data from memory.

    Args:
        obj_id: The ID of the object to retrieve from the plasma store.
        client: A PlasmaClient to interface with the in-memory object
            store.

    Returns:
        The unserialized data from the store corresponding to the
        `obj_id`.

    Raises:
        ObjectNotFoundError: If the specified `obj_id` is not in the
            store.
    """
    obj_ids = [obj_id]

    # TODO: the get_buffers allows for batch, which we want to use in
    #       the future.
    buffers = client.get_buffers(obj_ids, with_meta=True, timeout_ms=1000)

    # Since we currently know that we are only restrieving one buffer,
    # we can instantly get its metadata and buffer.
    metadata, buffer = buffers[0]

    # Getting the buffer timed out. We conclude that the object has not
    # yet been written to the store and maybe never will.
    if metadata is None and buffer is None:
        raise ObjectNotFoundError(
            f'Object with ObjectID "{obj_id}" does not exist in store.')

    buffers_bytes = buffer.to_pybytes()
    obj = pa.deserialize(buffers_bytes)

    # If the metadata stated that the object was pickled, then we need
    # to additionally unpickle the obj.
    if metadata == bytes(f"{Config.IDENTIFIER_SERIALIZATION};arrowpickle",
                         "utf-8"):
        obj = pickle.loads(obj)

    return obj