Exemple #1
0
def resolve(step_uuid: str, consumer: str = None) -> Tuple[Any]:
    """Resolves the most recently used tranfer method of the given step.

    Additionally, resolves all the ``*args`` and ``**kwargs`` the
    receiving transfer method has to be called with.

    Args:
        step_uuid: UUID of the step to resolve its most recent write.
        consumer: The consumer of the output data. This is put inside
            the metadata of an empty object to trigger a notification in
            the plasma store, which is then used to manage eviction of
            objects.

    Returns:
        Tuple containing the information of the function to be called
        to get the most recent data from the step. Additionally, returns
        fill-in arguments for the function.

    Raises:
        OutputNotFoundError: If no output can be found of the given
            `step_uuid`. Either no output was generated or the in-memory
            object store died (and therefore lost all its data).
    """
    # TODO: not completely sure whether this global approach is prefered
    #       over difining that same list inside this function. Arguably
    #       defining it outside the function allows for easier
    #       extendability.
    global _resolve_methods

    method_infos = []
    for method in _resolve_methods:
        try:
            if method.__name__ == "resolve_memory":
                method_info = method(step_uuid, consumer=consumer)
            else:
                method_info = method(step_uuid)

        except OutputNotFoundError:
            # We know now that the user did not use this method to output
            # thus we can just skip it and continue.
            pass
        except OrchestNetworkError:
            # If no in-memory store is running, then getting the data
            # from memory obviously will not work.
            pass
        else:
            method_infos.append(method_info)

    # If no info could be collected, then the previous step has not yet
    # been executed.
    if not method_infos:
        raise OutputNotFoundError(
            f'Output from incoming step "{step_uuid}" cannot be found. '
            "Try rerunning it.")

    # Get the method that was most recently used based on its logged
    # timestamp.
    # NOTE: if multiple methods have the same timestamp then the method
    # that is highest in the `_resolve_methods` list will be returned.
    # Since `max` returns the first occurrence of the maximum value.
    most_recent = max(method_infos, key=lambda x: x["timestamp"])
    return (
        most_recent["method_to_call"],
        most_recent["method_args"],
        most_recent["method_kwargs"],
    )
Exemple #2
0
def get_inputs(ignore_failure: bool = False,
               verbose: bool = False) -> List[Any]:
    """Gets all data sent from incoming steps.

    Args:
        ignore_failure: If True then the returned result can have
            ``None`` values if the data of a step could not be
            retrieved. If False, then this function will fail if any of
            the incoming steps's data could not be retrieved. Example:
            ``[None, 'Hello World!']`` vs :exc:`OutputNotFoundError`
        verbose: If True print all the steps from which the current step
            has retrieved data.

    Returns:
        List of all the data in the specified order from the front-end.

        Example:

    Raises:
        StepUUIDResolveError: The step's UUID cannot be resolved and
            thus it cannot determine what inputs to get.

    Example:
        >>> # It does not matter how the data was output in steps 1 and 2.
        >>> # It is resolved automatically by the get_inputs method.
        >>> data_step_1, data_step_2 = get_inputs()

    Warning:
        Only call :meth:`get_inputs` once! When auto eviction is
        configured data might no longer be available. Either cache the
        data or maintain a copy yourself.

    """
    with open(Config.PIPELINE_DESCRIPTION_PATH, "r") as f:
        pipeline_description = json.load(f)

    pipeline = Pipeline.from_json(pipeline_description)
    try:
        step_uuid = get_step_uuid(pipeline)
    except StepUUIDResolveError:
        raise StepUUIDResolveError(
            "Failed to determine from where to get data.")

    # TODO: maybe instead of for loop we could first get the receive
    #       method and then do batch receive. For example memory allows
    #       to do get_buffers which operates in batch.
    # NOTE: the order in which the `parents` list is traversed is
    # indirectly set in the UI. The order is important since it
    # determines the order in which the inputs are received in the next
    # step.
    data = []
    for parent in pipeline.get_step_by_uuid(step_uuid).parents:
        parent_uuid = parent.properties["uuid"]
        get_output_method, args, kwargs = resolve(parent_uuid,
                                                  consumer=step_uuid)

        # Either raise an error on failure of getting output or
        # continue with other steps.
        try:
            incoming_step_data = get_output_method(*args, **kwargs)
        except OutputNotFoundError as e:
            if not ignore_failure:
                raise OutputNotFoundError(e)

            incoming_step_data = None

        if verbose:
            parent_title = parent.properties["title"]
            if incoming_step_data is None:
                print(f'Failed to retrieve input from step: "{parent_title}"')
            else:
                print(f'Retrieved input from step: "{parent_title}"')

        data.append(incoming_step_data)

    return data
Exemple #3
0
def get_inputs(ignore_failure: bool = False,
               verbose: bool = False) -> Dict[str, Any]:
    """Gets all data sent from incoming steps.

    Args:
        ignore_failure: If True then the returned result can have
            ``None`` values if the data of a step could not be
            retrieved. If False, then this function will fail if any of
            the incoming steps's data could not be retrieved. Example:
            ``[None, "Hello World!"]`` vs :exc:`OutputNotFoundError`
        verbose: If True print all the steps from which the current step
            has retrieved data.

    Returns:
        Dictionary with input data for this step. We differentiate
        between two cases:

        * Named data, which is data that was outputted with a `name` by
          any parent step. Named data can be retrieved through the
          dictionary by its name, e.g. ``data = get_inputs()["my_name"]``.
          Name collisions will raise an :exc:`InputNameCollisionError`.
        * Unnamed data, which is an ordered list containing all the
          data that was outputted without a name by the parent steps.
          Unnamed data can be retrieved by accessing the reserved
          ``"unnamed"`` key. The order of this list depends on the order
          of the parent steps of the node, which is visible through the
          GUI.

        Example::

            # It does not matter how the data was output by parent steps.
            # It is resolved automatically by the get_inputs method.
            {
                "unnamed" : ["Hello World!", (3, 4)],
                "named_1" : "mystring",
                "named_2" : [1, 2, 3]
            }

    Raises:
        InputNameCollisionError: Multiple steps have outputted data with
            the same name.
        OutputNotFoundError: If no output can be found of the given
            `step_uuid`. Either no output was generated or the in-memory
            object store died (and therefore lost all its data).
        StepUUIDResolveError: The step's UUID cannot be resolved and
            thus it cannot determine what inputs to get.

    Warning:
        Only call :meth:`get_inputs` once! When auto eviction is
        configured data might no longer be available. Either cache the
        data or maintain a copy yourself.

    """
    with open(Config.PIPELINE_DEFINITION_PATH, "r") as f:
        pipeline_definition = json.load(f)

    pipeline = Pipeline.from_json(pipeline_definition)
    try:
        step_uuid = get_step_uuid(pipeline)
    except StepUUIDResolveError:
        raise StepUUIDResolveError(
            "Failed to determine from where to get data.")

    collisions_dict = defaultdict(list)
    get_output_methods = []

    # Check for collisions before retrieving any data.
    for parent in pipeline.get_step_by_uuid(step_uuid).parents:

        # For each parent get what function to use to retrieve its
        # output data and metadata related to said data.
        parent_uuid = parent.properties["uuid"]
        get_output_method, args, kwargs, metadata = _resolve(
            parent_uuid, consumer=step_uuid)

        # Maintain the output methods in order, but wait with calling
        # them so that we can first check for collisions.
        get_output_methods.append(
            (parent, get_output_method, args, kwargs, metadata))

        if metadata["name"] != Config._RESERVED_UNNAMED_OUTPUTS_STR:
            collisions_dict[metadata["name"]].append(
                parent.properties["title"])

    # If there are collisions raise an error.
    collisions_dict = {k: v for k, v in collisions_dict.items() if len(v) > 1}
    if collisions_dict:
        msg = [
            f"\n{name}: {sorted(step_names)}"
            for name, step_names in collisions_dict.items()
        ]
        msg = "".join(msg)
        raise InputNameCollisionError(
            f"Name collisions between input data coming from different steps: {msg}"
        )

    # TODO: maybe instead of for loop we could first get the receive
    #       method and then do batch receive. For example memory allows
    #       to do get_buffers which operates in batch.
    # NOTE: the order in which the `parents` list is traversed is
    # indirectly set in the UI. The order is important since it
    # determines the order in which unnamed inputs are received in
    # the next step.
    data = {Config._RESERVED_UNNAMED_OUTPUTS_STR: []}  # type: Dict[str, Any]
    for parent, get_output_method, args, kwargs, metadata in get_output_methods:

        # Either raise an error on failure of getting output or
        # continue with other steps.
        try:
            incoming_step_data = get_output_method(*args, **kwargs)
        except OutputNotFoundError as e:
            if not ignore_failure:
                raise OutputNotFoundError(e)

            incoming_step_data = None

        if verbose:
            parent_title = parent.properties["title"]
            if incoming_step_data is None:
                print(f'Failed to retrieve input from step: "{parent_title}"')
            else:
                print(f'Retrieved input from step: "{parent_title}"')

        # Populate the return dictionary, where nameless data gets
        # appended to a list and named data becomes a (name, data) pair.
        name = metadata["name"]
        if name == Config._RESERVED_UNNAMED_OUTPUTS_STR:
            data[Config._RESERVED_UNNAMED_OUTPUTS_STR].append(
                incoming_step_data)
        else:
            data[name] = incoming_step_data

    return data