def resolve(step_uuid: str, consumer: str = None) -> Tuple[Any]: """Resolves the most recently used tranfer method of the given step. Additionally, resolves all the ``*args`` and ``**kwargs`` the receiving transfer method has to be called with. Args: step_uuid: UUID of the step to resolve its most recent write. consumer: The consumer of the output data. This is put inside the metadata of an empty object to trigger a notification in the plasma store, which is then used to manage eviction of objects. Returns: Tuple containing the information of the function to be called to get the most recent data from the step. Additionally, returns fill-in arguments for the function. Raises: OutputNotFoundError: If no output can be found of the given `step_uuid`. Either no output was generated or the in-memory object store died (and therefore lost all its data). """ # TODO: not completely sure whether this global approach is prefered # over difining that same list inside this function. Arguably # defining it outside the function allows for easier # extendability. global _resolve_methods method_infos = [] for method in _resolve_methods: try: if method.__name__ == "resolve_memory": method_info = method(step_uuid, consumer=consumer) else: method_info = method(step_uuid) except OutputNotFoundError: # We know now that the user did not use this method to output # thus we can just skip it and continue. pass except OrchestNetworkError: # If no in-memory store is running, then getting the data # from memory obviously will not work. pass else: method_infos.append(method_info) # If no info could be collected, then the previous step has not yet # been executed. if not method_infos: raise OutputNotFoundError( f'Output from incoming step "{step_uuid}" cannot be found. ' "Try rerunning it.") # Get the method that was most recently used based on its logged # timestamp. # NOTE: if multiple methods have the same timestamp then the method # that is highest in the `_resolve_methods` list will be returned. # Since `max` returns the first occurrence of the maximum value. most_recent = max(method_infos, key=lambda x: x["timestamp"]) return ( most_recent["method_to_call"], most_recent["method_args"], most_recent["method_kwargs"], )
def get_inputs(ignore_failure: bool = False, verbose: bool = False) -> List[Any]: """Gets all data sent from incoming steps. Args: ignore_failure: If True then the returned result can have ``None`` values if the data of a step could not be retrieved. If False, then this function will fail if any of the incoming steps's data could not be retrieved. Example: ``[None, 'Hello World!']`` vs :exc:`OutputNotFoundError` verbose: If True print all the steps from which the current step has retrieved data. Returns: List of all the data in the specified order from the front-end. Example: Raises: StepUUIDResolveError: The step's UUID cannot be resolved and thus it cannot determine what inputs to get. Example: >>> # It does not matter how the data was output in steps 1 and 2. >>> # It is resolved automatically by the get_inputs method. >>> data_step_1, data_step_2 = get_inputs() Warning: Only call :meth:`get_inputs` once! When auto eviction is configured data might no longer be available. Either cache the data or maintain a copy yourself. """ with open(Config.PIPELINE_DESCRIPTION_PATH, "r") as f: pipeline_description = json.load(f) pipeline = Pipeline.from_json(pipeline_description) try: step_uuid = get_step_uuid(pipeline) except StepUUIDResolveError: raise StepUUIDResolveError( "Failed to determine from where to get data.") # TODO: maybe instead of for loop we could first get the receive # method and then do batch receive. For example memory allows # to do get_buffers which operates in batch. # NOTE: the order in which the `parents` list is traversed is # indirectly set in the UI. The order is important since it # determines the order in which the inputs are received in the next # step. data = [] for parent in pipeline.get_step_by_uuid(step_uuid).parents: parent_uuid = parent.properties["uuid"] get_output_method, args, kwargs = resolve(parent_uuid, consumer=step_uuid) # Either raise an error on failure of getting output or # continue with other steps. try: incoming_step_data = get_output_method(*args, **kwargs) except OutputNotFoundError as e: if not ignore_failure: raise OutputNotFoundError(e) incoming_step_data = None if verbose: parent_title = parent.properties["title"] if incoming_step_data is None: print(f'Failed to retrieve input from step: "{parent_title}"') else: print(f'Retrieved input from step: "{parent_title}"') data.append(incoming_step_data) return data
def get_inputs(ignore_failure: bool = False, verbose: bool = False) -> Dict[str, Any]: """Gets all data sent from incoming steps. Args: ignore_failure: If True then the returned result can have ``None`` values if the data of a step could not be retrieved. If False, then this function will fail if any of the incoming steps's data could not be retrieved. Example: ``[None, "Hello World!"]`` vs :exc:`OutputNotFoundError` verbose: If True print all the steps from which the current step has retrieved data. Returns: Dictionary with input data for this step. We differentiate between two cases: * Named data, which is data that was outputted with a `name` by any parent step. Named data can be retrieved through the dictionary by its name, e.g. ``data = get_inputs()["my_name"]``. Name collisions will raise an :exc:`InputNameCollisionError`. * Unnamed data, which is an ordered list containing all the data that was outputted without a name by the parent steps. Unnamed data can be retrieved by accessing the reserved ``"unnamed"`` key. The order of this list depends on the order of the parent steps of the node, which is visible through the GUI. Example:: # It does not matter how the data was output by parent steps. # It is resolved automatically by the get_inputs method. { "unnamed" : ["Hello World!", (3, 4)], "named_1" : "mystring", "named_2" : [1, 2, 3] } Raises: InputNameCollisionError: Multiple steps have outputted data with the same name. OutputNotFoundError: If no output can be found of the given `step_uuid`. Either no output was generated or the in-memory object store died (and therefore lost all its data). StepUUIDResolveError: The step's UUID cannot be resolved and thus it cannot determine what inputs to get. Warning: Only call :meth:`get_inputs` once! When auto eviction is configured data might no longer be available. Either cache the data or maintain a copy yourself. """ with open(Config.PIPELINE_DEFINITION_PATH, "r") as f: pipeline_definition = json.load(f) pipeline = Pipeline.from_json(pipeline_definition) try: step_uuid = get_step_uuid(pipeline) except StepUUIDResolveError: raise StepUUIDResolveError( "Failed to determine from where to get data.") collisions_dict = defaultdict(list) get_output_methods = [] # Check for collisions before retrieving any data. for parent in pipeline.get_step_by_uuid(step_uuid).parents: # For each parent get what function to use to retrieve its # output data and metadata related to said data. parent_uuid = parent.properties["uuid"] get_output_method, args, kwargs, metadata = _resolve( parent_uuid, consumer=step_uuid) # Maintain the output methods in order, but wait with calling # them so that we can first check for collisions. get_output_methods.append( (parent, get_output_method, args, kwargs, metadata)) if metadata["name"] != Config._RESERVED_UNNAMED_OUTPUTS_STR: collisions_dict[metadata["name"]].append( parent.properties["title"]) # If there are collisions raise an error. collisions_dict = {k: v for k, v in collisions_dict.items() if len(v) > 1} if collisions_dict: msg = [ f"\n{name}: {sorted(step_names)}" for name, step_names in collisions_dict.items() ] msg = "".join(msg) raise InputNameCollisionError( f"Name collisions between input data coming from different steps: {msg}" ) # TODO: maybe instead of for loop we could first get the receive # method and then do batch receive. For example memory allows # to do get_buffers which operates in batch. # NOTE: the order in which the `parents` list is traversed is # indirectly set in the UI. The order is important since it # determines the order in which unnamed inputs are received in # the next step. data = {Config._RESERVED_UNNAMED_OUTPUTS_STR: []} # type: Dict[str, Any] for parent, get_output_method, args, kwargs, metadata in get_output_methods: # Either raise an error on failure of getting output or # continue with other steps. try: incoming_step_data = get_output_method(*args, **kwargs) except OutputNotFoundError as e: if not ignore_failure: raise OutputNotFoundError(e) incoming_step_data = None if verbose: parent_title = parent.properties["title"] if incoming_step_data is None: print(f'Failed to retrieve input from step: "{parent_title}"') else: print(f'Retrieved input from step: "{parent_title}"') # Populate the return dictionary, where nameless data gets # appended to a list and named data becomes a (name, data) pair. name = metadata["name"] if name == Config._RESERVED_UNNAMED_OUTPUTS_STR: data[Config._RESERVED_UNNAMED_OUTPUTS_STR].append( incoming_step_data) else: data[name] = incoming_step_data return data