예제 #1
0
def execute_workflow(workflow: "Workflow") -> "WorkflowExecutionResult":
    """Execute workflow.

    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result
    workflow_data = workflow.data
    baked_inputs = _BakedWorkflowInputs.from_workflow_inputs(
        workflow_data.inputs)
    persisted_output, volatile_output = _workflow_step_executor.options(
        **workflow_data.ray_options).remote(
            workflow_data.step_type, workflow_data.func_body,
            workflow_context.get_workflow_step_context(), workflow.step_id,
            baked_inputs, workflow_data.catch_exceptions,
            workflow_data.max_retries)

    if not isinstance(persisted_output, WorkflowOutputType):
        raise TypeError("Unexpected return type of the workflow.")

    if workflow_data.step_type != StepType.READONLY_ACTOR_METHOD:
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [volatile_output])

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
예제 #2
0
파일: recovery.py 프로젝트: tchordia/ray
def resume_workflow_step(
    job_id: str,
    workflow_id: str,
    step_id: "StepID",
    current_output: Optional[ray.ObjectRef],
) -> WorkflowExecutionResult:
    """Resume a step of a workflow.

    Args:
        job_id: The ID of the job that submits the workflow execution. The ID
        is used to identify the submitter of the workflow.
        workflow_id: The ID of the workflow job. The ID is used to identify
            the workflow.
        step_id: The step to resume in the workflow.

    Raises:
        WorkflowNotResumableException: fail to resume the workflow.

    Returns:
        The execution result of the workflow, represented by Ray ObjectRef.
    """
    if current_output is None:
        current_output = []
    else:
        current_output = [current_output]

    persisted_output, volatile_output = _resume_workflow_step_executor.remote(
        job_id, workflow_id, step_id, current_output)
    persisted_output = WorkflowStaticRef.from_output(step_id, persisted_output)
    volatile_output = WorkflowStaticRef.from_output(step_id, volatile_output)
    return WorkflowExecutionResult(persisted_output, volatile_output)
예제 #3
0
def resume_workflow_step(
        workflow_id: str, step_id: "StepID", store_url: str,
        current_output: Optional[ray.ObjectRef]) -> WorkflowExecutionResult:
    """Resume a step of a workflow.

    Args:
        workflow_id: The ID of the workflow job. The ID is used to identify
            the workflow.
        step_id: The step to resume in the workflow.
        store_url: The url of the storage to access the workflow.

    Raises:
        WorkflowNotResumableException: fail to resume the workflow.

    Returns:
        The execution result of the workflow, represented by Ray ObjectRef.
    """
    if current_output is None:
        current_output = []
    else:
        current_output = [current_output]

    persisted_output, volatile_output = _resume_workflow_step_executor.remote(
        workflow_id, step_id, store_url, current_output)
    return WorkflowExecutionResult(persisted_output, volatile_output)
예제 #4
0
def execute_workflow(
        workflow: "Workflow",
        outer_most_step_id: Optional[str] = None,
        last_step_of_workflow: bool = False) -> "WorkflowExecutionResult":
    """Execute workflow.

    To fully explain what we are doing, we need to introduce some syntax first.
    The syntax for dependencies between workflow steps
    "B.step(A.step())" is "A - B"; the syntax for nested workflow steps
    "def A(): return B.step()" is "A / B".

    In a chain/DAG of step dependencies, the "output step" is the step of last
    (topological) order. For example, in "A - B - C", C is the output step.

    In a chain of nested workflow steps, the initial "output step" is
    called the "outer most step" for other "output steps". For example, in
    "A / B / C / D", "A" is the outer most step for "B", "C", "D";
    in the hybrid workflow "((A - B) / C / D) - (E / (F - G) / H)",
    "B" is the outer most step for "C", "D"; "E" is the outer most step
    for "G", "H".

    Args:
        workflow: The workflow to be executed.
        outer_most_step_id: The ID of the outer most workflow. None if it
            does not exists. See "step_executor.execute_workflow" for detailed
            explanation.
        last_step_of_workflow: The step that generates the output of the
            workflow (including nested steps).
    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result
    workflow_data = workflow.data
    baked_inputs = _BakedWorkflowInputs.from_workflow_inputs(
        workflow_data.inputs)
    persisted_output, volatile_output = _workflow_step_executor.options(
        **workflow_data.ray_options).remote(
            workflow_data.step_type, workflow_data.func_body,
            workflow_context.get_workflow_step_context(), workflow.step_id,
            baked_inputs, outer_most_step_id, workflow_data.catch_exceptions,
            workflow_data.max_retries, last_step_of_workflow)

    if not isinstance(persisted_output, WorkflowOutputType):
        raise TypeError("Unexpected return type of the workflow.")

    if workflow_data.step_type != StepType.READONLY_ACTOR_METHOD:
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [volatile_output])

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
예제 #5
0
파일: step_executor.py 프로젝트: ijrsvt/ray
def execute_workflow(workflow: "Workflow") -> "WorkflowExecutionResult":
    """Execute workflow.

    Args:
        workflow: The workflow to be executed.

    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result

    # Stage 1: prepare inputs
    workflow_data = workflow.data
    inputs = workflow_data.inputs
    workflow_outputs = []
    with workflow_context.fork_workflow_step_context(
            outer_most_step_id=None, last_step_of_workflow=False):
        for w in inputs.workflows:
            static_ref = w.ref
            if static_ref is None:
                # The input workflow is not a reference to an executed
                # workflow .
                output = execute_workflow(w).persisted_output
                static_ref = WorkflowStaticRef(step_id=w.step_id, ref=output)
            workflow_outputs.append(static_ref)

    baked_inputs = _BakedWorkflowInputs(
        args=workflow_data.inputs.args,
        workflow_outputs=workflow_outputs,
        workflow_refs=inputs.workflow_refs,
    )

    # Stage 2: match executors
    step_options = workflow_data.step_options
    if step_options.allow_inplace:
        # TODO(suquark): For inplace execution, it is impossible
        # to get the ObjectRef of the output before execution.
        # Here we use a dummy ObjectRef, because _record_step_status does not
        # even use it (?!).
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [ray.put(None)])
        # Note: we need to be careful about workflow context when
        # calling the executor directly.
        # TODO(suquark): We still have recursive Python calls.
        # This would cause stack overflow if we have a really
        # deep recursive call. We should fix it later.
        if step_options.step_type == StepType.WAIT:
            executor = _workflow_wait_executor
        else:
            executor = _workflow_step_executor
    else:
        if step_options.step_type == StepType.WAIT:
            # This is very important to set "num_cpus=0" to
            # ensure "workflow.wait" is not blocked by other
            # tasks.
            executor = _workflow_wait_executor_remote.options(
                num_cpus=0).remote
        else:
            executor = _workflow_step_executor_remote.options(
                **step_options.ray_options).remote

    # Stage 3: execution
    persisted_output, volatile_output = executor(
        workflow_data.func_body,
        workflow_context.get_workflow_step_context(),
        workflow.step_id,
        baked_inputs,
        workflow_data.step_options,
    )

    # Stage 4: post processing outputs
    if not isinstance(persisted_output, WorkflowOutputType):
        persisted_output = ray.put(persisted_output)
    if not isinstance(persisted_output, WorkflowOutputType):
        volatile_output = ray.put(volatile_output)

    if step_options.step_type != StepType.READONLY_ACTOR_METHOD:
        if not step_options.allow_inplace:
            # TODO: [Possible flaky bug] Here the RUNNING state may
            # be recorded earlier than SUCCESSFUL. This caused some
            # confusion during development.
            _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                                [volatile_output])

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
예제 #6
0
def execute_workflow(workflow: "Workflow") -> "WorkflowExecutionResult":
    """Execute workflow.

    Args:
        workflow: The workflow to be executed.

    Returns:
        An object ref that represent the result.
    """
    if workflow.executed:
        return workflow.result

    # Stage 1: prepare inputs
    workflow_data = workflow.data
    inputs = workflow_data.inputs
    # Here A is the outer workflow step, B & C are the inner steps.
    # C is the output step for A, because C produces the output for A.
    #
    # @workflow.step
    # def A():
    #     b = B.step()
    #     return C.step(b)
    #
    # If the outer workflow step skips checkpointing, it would
    # update the checkpoint context of all inner steps except
    # the output step, marking them "detached" from the DAG.
    # Output step is not detached from the DAG because once
    # completed, it replaces the output of the outer step.
    step_context = workflow_context.get_workflow_step_context()
    checkpoint_context = step_context.checkpoint_context.copy()
    # "detached" could be defined recursively:
    # detached := already detached or the outer step skips checkpointing
    checkpoint_context.detached_from_dag = (
        checkpoint_context.detached_from_dag
        or not step_context.checkpoint_context.checkpoint)
    # Apply checkpoint context to input steps. Since input steps
    # further apply them to their inputs, this would eventually
    # apply to all steps except the output step. This avoids
    # detaching the output step.
    workflow_outputs = []
    with workflow_context.fork_workflow_step_context(
            outer_most_step_id=None,
            last_step_of_workflow=False,
            checkpoint_context=checkpoint_context,
    ):
        for w in inputs.workflows:
            static_ref = w.ref
            if static_ref is None:
                # The input workflow is not a reference to an executed
                # workflow .
                output = execute_workflow(w).persisted_output
                static_ref = WorkflowStaticRef(step_id=w.step_id, ref=output)
            workflow_outputs.append(static_ref)

    baked_inputs = _BakedWorkflowInputs(
        args=inputs.args,
        workflow_outputs=workflow_outputs,
        workflow_refs=inputs.workflow_refs,
    )

    # Stage 2: match executors
    step_options = workflow_data.step_options
    if step_options.allow_inplace:
        # TODO(suquark): For inplace execution, it is impossible
        # to get the ObjectRef of the output before execution.
        # Here we use a dummy ObjectRef, because _record_step_status does not
        # even use it (?!).
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [ray.put(None)])
        # Note: we need to be careful about workflow context when
        # calling the executor directly.
        # TODO(suquark): We still have recursive Python calls.
        # This would cause stack overflow if we have a really
        # deep recursive call. We should fix it later.
        if step_options.step_type == StepType.WAIT:
            executor = _workflow_wait_executor
        else:
            executor = _workflow_step_executor
    else:
        if step_options.step_type == StepType.WAIT:
            # This is very important to set "num_cpus=0" to
            # ensure "workflow.wait" is not blocked by other
            # tasks.
            executor = _workflow_wait_executor_remote.options(
                num_cpus=0).remote
        else:
            executor = _workflow_step_executor_remote.options(
                **step_options.ray_options).remote

    # Stage 3: execution
    persisted_output, volatile_output = executor(
        workflow_data.func_body,
        step_context,
        workflow.step_id,
        baked_inputs,
        workflow_data.step_options,
    )

    # Stage 4: post processing outputs
    if not isinstance(persisted_output, WorkflowOutputType):
        persisted_output = ray.put(persisted_output)
    if not isinstance(persisted_output, WorkflowOutputType):
        volatile_output = ray.put(volatile_output)

    if step_options.step_type != StepType.READONLY_ACTOR_METHOD:
        if not step_options.allow_inplace:
            # TODO: [Possible flaky bug] Here the RUNNING state may
            # be recorded earlier than SUCCESSFUL. This caused some
            # confusion during development.
            _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                                [volatile_output])

    result = WorkflowExecutionResult(persisted_output, volatile_output)
    workflow._result = result
    workflow._executed = True
    return result
예제 #7
0
def _execute_workflow(job_id,
                      workflow: "Workflow") -> "WorkflowExecutionResult":
    """Internal function of workflow execution."""
    if workflow.executed:
        return workflow.result

    # Stage 1: prepare inputs
    workflow_data = workflow.data
    inputs = workflow_data.inputs
    # Here A is the outer workflow step, B & C are the inner steps.
    # C is the output step for A, because C produces the output for A.
    #
    # @workflow.step
    # def A():
    #     b = B.step()
    #     return C.step(b)
    #
    # If the outer workflow step skips checkpointing, it would
    # update the checkpoint context of all inner steps except
    # the output step, marking them "detached" from the DAG.
    # Output step is not detached from the DAG because once
    # completed, it replaces the output of the outer step.
    step_context = workflow_context.get_workflow_step_context()
    checkpoint_context = step_context.checkpoint_context.copy()
    # "detached" could be defined recursively:
    # detached := already detached or the outer step skips checkpointing
    checkpoint_context.detached_from_dag = (
        checkpoint_context.detached_from_dag
        or not step_context.checkpoint_context.checkpoint)
    # Apply checkpoint context to input steps. Since input steps
    # further apply them to their inputs, this would eventually
    # apply to all steps except the output step. This avoids
    # detaching the output step.
    workflow_outputs = []
    with workflow_context.fork_workflow_step_context(
            outer_most_step_id=None,
            last_step_of_workflow=False,
            checkpoint_context=checkpoint_context,
    ):
        for w in inputs.workflows:
            static_ref = w.ref
            if static_ref is None:
                extra_options = w.data.step_options.ray_options
                # The input workflow is not a reference to an executed
                # workflow.
                static_ref = execute_workflow(job_id, w).output
                static_ref._resolve_like_object_ref_in_args = extra_options.get(
                    "_resolve_like_object_ref_in_args", False)
            workflow_outputs.append(static_ref)

    baked_inputs = _BakedWorkflowInputs(
        args=inputs.args,
        workflow_outputs=workflow_outputs,
        workflow_refs=inputs.workflow_refs,
        job_id=job_id,
    )

    # Stage 2: match executors
    step_options = workflow_data.step_options
    if step_options.allow_inplace:
        # TODO(suquark): For inplace execution, it is impossible
        # to get the ObjectRef of the output before execution.
        # Here we use a dummy ObjectRef, because _record_step_status does not
        # even use it (?!).
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING,
                            [ray.put(None)])
        # Note: we need to be careful about workflow context when
        # calling the executor directly.
        # TODO(suquark): We still have recursive Python calls.
        # This would cause stack overflow if we have a really
        # deep recursive call. We should fix it later.
        if step_options.step_type == StepType.WAIT:
            executor = _workflow_wait_executor
        else:
            # Tell the executor that we are running inplace. This enables
            # tail-recursion optimization.
            executor = functools.partial(_workflow_step_executor, inplace=True)
    else:
        if step_options.step_type == StepType.WAIT:
            # This is very important to set "num_cpus=0" to
            # ensure "workflow.wait" is not blocked by other
            # tasks.
            executor = _workflow_wait_executor_remote.options(
                num_cpus=0).remote
        else:
            ray_options = step_options.ray_options.copy()
            # cleanup the "_resolve_like_object_ref_in_args" option, it is not for Ray.
            ray_options.pop("_resolve_like_object_ref_in_args", None)
            executor = _workflow_step_executor_remote.options(
                **ray_options).remote

    # Stage 3: execution
    output = executor(
        workflow_data.func_body,
        step_context,
        job_id,
        workflow.step_id,
        baked_inputs,
        workflow_data.step_options,
    )

    # Stage 4: post processing outputs
    if not step_options.allow_inplace:
        # TODO: [Possible flaky bug] Here the RUNNING state may
        # be recorded earlier than SUCCESSFUL. This caused some
        # confusion during development.

        # convert into workflow static ref for step status record.
        _record_step_status(workflow.step_id, WorkflowStatus.RUNNING, [None])

    result = WorkflowExecutionResult(output)
    workflow._result = result
    workflow._executed = True
    return result