Beispiel #1
0
    def wait(
        self,
        num_returns: int = 1,
        timeout: Optional[float] = None
    ) -> Tuple[List[Workflow], List[Workflow]]:
        """Return a list of workflows that are ready and a list of workflows that
        are not. See `api.wait()` for details.

        Args:
            num_returns (int): The number of workflows that should be returned.
            timeout (float): The maximum amount of time in seconds to wait
            before returning.

        Returns:
            A list of workflows that are ready and a list of the remaining
            workflows.
        """
        if self.workflow_refs:
            raise ValueError("Currently, we do not support wait operations "
                             "on dynamic workflow refs. They are typically "
                             "generated by virtual actors.")
        refs_map = {w.ref: w for w in self.workflow_outputs}
        ready_ids, remaining_ids = ray.wait(list(refs_map.keys()),
                                            num_returns=num_returns,
                                            timeout=timeout)
        ready_workflows = [Workflow.from_ref(refs_map[i]) for i in ready_ids]
        remaining_workflows = [
            Workflow.from_ref(refs_map[i]) for i in remaining_ids
        ]
        return ready_workflows, remaining_workflows
Beispiel #2
0
def _reconstruct_wait_step(
    reader: workflow_storage.WorkflowStorage,
    result: workflow_storage.StepInspectResult,
    input_map: Dict[StepID, Any],
):
    input_workflows = []
    step_options = result.step_options
    wait_options = step_options.ray_options.get("wait_options", {})
    for i, _step_id in enumerate(result.workflows):
        # Check whether the step has been loaded or not to avoid
        # duplication
        if _step_id in input_map:
            r = input_map[_step_id]
        else:
            r = _construct_resume_workflow_from_step(reader, _step_id, input_map)
            input_map[_step_id] = r
        if isinstance(r, Workflow):
            input_workflows.append(r)
        else:
            assert isinstance(r, StepID)
            # TODO (Alex): We should consider caching these outputs too.
            output = reader.load_step_output(r)
            # Simulate a workflow with a workflow reference so it could be
            # used directly by 'workflow.wait'.
            static_ref = WorkflowStaticRef(step_id=r, ref=ray.put(output))
            wf = Workflow.from_ref(static_ref)
            input_workflows.append(wf)

    from ray import workflow

    return workflow.wait(input_workflows, **wait_options)
Beispiel #3
0
def _construct_resume_workflow_from_step(
        workflow_id: str, step_id: StepID) -> Union[Workflow, Any]:
    """Try to construct a workflow (step) that recovers the workflow step.
    If the workflow step already has an output checkpointing file, we return
    the workflow step id instead.

    Args:
        workflow_id: The ID of the workflow.
        step_id: The ID of the step we want to recover.

    Returns:
        A workflow that recovers the step, or the output of the step
            if it has been checkpointed.
    """
    reader = workflow_storage.WorkflowStorage(workflow_id)

    # Step 1: construct dependency of the DAG (BFS)
    inpsect_results = {}
    dependency_map = defaultdict(list)
    num_in_edges = {}

    dag_visit_queue = deque([step_id])
    while dag_visit_queue:
        s: StepID = dag_visit_queue.popleft()
        if s in inpsect_results:
            continue
        r = reader.inspect_step(s)
        inpsect_results[s] = r
        if not r.is_recoverable():
            raise WorkflowStepNotRecoverableError(s)
        if r.output_object_valid:
            deps = []
        elif isinstance(r.output_step_id, str):
            deps = [r.output_step_id]
        else:
            deps = r.workflows
        for w in deps:
            dependency_map[w].append(s)
        num_in_edges[s] = len(deps)
        dag_visit_queue.extend(deps)

    # Step 2: topological sort to determine the execution order (Kahn's algorithm)
    execution_queue: List[StepID] = []

    start_nodes = deque(k for k, v in num_in_edges.items() if v == 0)
    while start_nodes:
        n = start_nodes.popleft()
        execution_queue.append(n)
        for m in dependency_map[n]:
            num_in_edges[m] -= 1
            assert num_in_edges[m] >= 0, (m, n)
            if num_in_edges[m] == 0:
                start_nodes.append(m)

    # Step 3: recover the workflow by the order of the execution queue
    with serialization.objectref_cache():
        # "input_map" is a context storing the input which has been loaded.
        # This context is important for deduplicate step inputs.
        input_map: Dict[StepID, Any] = {}

        for _step_id in execution_queue:
            result = inpsect_results[_step_id]
            if result.output_object_valid:
                input_map[_step_id] = reader.load_step_output(_step_id)
                continue
            if isinstance(result.output_step_id, str):
                input_map[_step_id] = input_map[result.output_step_id]
                continue

            # Process the wait step as a special case.
            if result.step_options.step_type == StepType.WAIT:
                wait_input_workflows = []
                for w in result.workflows:
                    output = input_map[w]
                    if isinstance(output, Workflow):
                        wait_input_workflows.append(output)
                    else:
                        # Simulate a workflow with a workflow reference so it could be
                        # used directly by 'workflow.wait'.
                        static_ref = WorkflowStaticRef(step_id=w,
                                                       ref=ray.put(output))
                        wait_input_workflows.append(
                            Workflow.from_ref(static_ref))
                recovery_workflow = ray.workflow.wait(
                    wait_input_workflows,
                    **result.step_options.ray_options.get("wait_options", {}),
                )
            else:
                args, kwargs = reader.load_step_args(
                    _step_id,
                    workflows=[input_map[w] for w in result.workflows],
                    workflow_refs=list(map(WorkflowRef, result.workflow_refs)),
                )
                func: Callable = reader.load_step_func_body(_step_id)
                # TODO(suquark): Use an alternative function when "workflow.step"
                # is fully deprecated.
                recovery_workflow = ray.workflow.step(func).step(
                    *args, **kwargs)

            # override step_options
            recovery_workflow._step_id = _step_id
            recovery_workflow.data.step_options = result.step_options

            input_map[_step_id] = recovery_workflow

    # Step 4: return the output of the requested step
    return input_map[step_id]