Beispiel #1
0
def _resolve_memory(step_uuid: str, consumer: str = None) -> Dict[str, Any]:
    """Returns information of the most recent write to memory.

    Resolves the timestamp via the `create_time` attribute from the info
    of the plasma store. It also sets the arguments to call the
    :func:`get_output_memory` method with.

    Args:
        step_uuid: The UUID of the step to resolve its most recent write
            to memory.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Dictionary containing the information of the function to be
        called to get the most recent data from the step. Additionally,
        returns fill-in arguments for the function and metadata
        related to the data that would be retrieved.

    Raises:
        MemoryOutputNotFoundError: If output from `step_uuid` cannot be found.
        OrchestNetworkError: Could not connect to the
            ``Config.STORE_SOCKET_NAME``, because it does not exist. Which
            might be because the specified value was wrong or the store
            died.
    """
    client = _PlasmaConnector().client

    obj_id = _convert_uuid_to_object_id(step_uuid)

    # get metadata of the object if it exists
    metadata = client.get_metadata([obj_id], timeout_ms=0)
    metadata = metadata[0]
    if metadata is None:
        raise MemoryOutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            "Try rerunning it.")
    # this is a pyarrow.Buffer, gotta make it into pybytes to decode,
    # not much overhead given that this is just metadata
    metadata = metadata.to_pybytes()
    metadata = metadata.decode("utf-8").split(Config.__METADATA_SEPARATOR__)
    # the first element is an internal flag, see output_to_memory
    _, timestamp, serialization, name = metadata

    res = {
        "method_to_call": _get_output_memory,
        "method_args": (step_uuid, ),
        "method_kwargs": {
            "consumer": consumer
        },
        "metadata": {
            "timestamp": timestamp,
            "serialization": serialization,
            "name": name,
        },
    }
    return res
Beispiel #2
0
def resolve_memory(step_uuid: str, consumer: str = None) -> Dict[str, Any]:
    """Returns information of the most recent write to memory.

    Resolves the timestamp via the `create_time` attribute from the info
    of the plasma store. It also sets the arguments to call the
    :func:`get_output_memory` method with.

    Args:
        step_uuid: The UUID of the step to resolve its most recent write
            to memory.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Dictionary containing the information of the function to be
        called to get the most recent data from the step. Additionally,
        returns fill-in arguments for the function.

    Raises:
        MemoryOutputNotFoundError: If output from `step_uuid` cannot be found.
        OrchestNetworkError: Could not connect to the
            ``Config.STORE_SOCKET_NAME``, because it does not exist. Which
            might be because the specified value was wrong or the store
            died.
    """
    try:
        client = plasma.connect(Config.STORE_SOCKET_NAME, num_retries=20)
    except OSError:
        raise OrchestNetworkError(
            'Failed to connect to in-memory object store.')

    obj_id = _convert_uuid_to_object_id(step_uuid)
    try:
        # Dictionary from ObjectIDs to an "info" dictionary describing
        # the object.
        info = client.list()[obj_id]

    except KeyError:
        raise MemoryOutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            'Try rerunning it.')

    ts = info['create_time']
    timestamp = datetime.utcfromtimestamp(ts).isoformat()

    res = {
        'timestamp': timestamp,
        'method_to_call': get_output_memory,
        'method_args': (step_uuid, ),
        'method_kwargs': {
            'consumer': consumer
        }
    }
    return res
Beispiel #3
0
def get_output_memory(step_uuid: str, consumer: Optional[str] = None) -> Any:
    """Gets data from memory.

    Args:
        step_uuid: The UUID of the step to get output data from.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Data from step identified by `step_uuid`.

    Raises:
        MemoryOutputNotFoundError: If output from `step_uuid` cannot be found.
        OrchestNetworkError: Could not connect to the
            ``Config.STORE_SOCKET_NAME``, because it does not exist. Which
            might be because the specified value was wrong or the store
            died.
    """
    # TODO: could be good idea to put connecting to plasma in a class
    #       such that does not get called when no store is instantiated
    #       or allocated. Additionally, we don't want to be connecting
    #       to the store multiple times.
    try:
        client = plasma.connect(Config.STORE_SOCKET_NAME)
    except OSError:
        raise OrchestNetworkError(
            'Failed to connect to in-memory object store.')

    obj_id = _convert_uuid_to_object_id(step_uuid)
    try:
        obj = _get_output_memory(obj_id, client)

    except ObjectNotFoundError:
        raise MemoryOutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            'Try rerunning it.')

    else:
        # TODO: note somewhere (maybe in the docstring) that it might
        #       although very unlikely raise MemoryError, because the
        #       receive is now actually also outputing data.
        # TODO: this ENV variable is set in the orchest-api. Now we
        #       always know when we are running inside a jupyter kernel
        #       interactively. And in that case we never want to do
        #       eviction.
        if os.getenv('EVICTION_OPTIONALITY') is not None:
            empty_obj, _ = serialize('')
            msg = f'{Config.IDENTIFIER_EVICTION};{step_uuid},{consumer}'
            metadata = bytes(msg, 'utf-8')
            _output_to_memory(empty_obj, client, metadata=metadata)

    return obj
Beispiel #4
0
def get_output_memory(step_uuid: str, consumer: Optional[str] = None) -> Any:
    """Gets data from memory.

    Args:
        step_uuid: The UUID of the step to get output data from.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Data from step identified by `step_uuid`.

    Raises:
        MemoryOutputNotFoundError: If output from `step_uuid` cannot be found.
        OrchestNetworkError: Could not connect to the
            ``Config.STORE_SOCKET_NAME``, because it does not exist. Which
            might be because the specified value was wrong or the store
            died.
    """
    client = _PlasmaConnector().client

    obj_id = _convert_uuid_to_object_id(step_uuid)
    try:
        obj = _get_output_memory(obj_id, client)

    except ObjectNotFoundError:
        raise MemoryOutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            "Try rerunning it.")

    else:
        # TODO: note somewhere (maybe in the docstring) that it might
        #       although very unlikely raise MemoryError, because the
        #       receive is now actually also outputing data.
        # NOTE: the "ORCHEST_MEMORY_EVICTION" ENV variable is set in the
        # orchest-api. Now we always know when we are running inside a
        # jupyter kernel interactively. And in that case we never want
        # to do eviction.
        if os.getenv("ORCHEST_MEMORY_EVICTION") is not None:
            empty_obj, _ = serialize("")
            msg = f"{Config.IDENTIFIER_EVICTION};{step_uuid},{consumer}"
            metadata = bytes(msg, "utf-8")
            _output_to_memory(empty_obj, client, metadata=metadata)

    return obj