예제 #1
0
def _workflow_wait_executor(
    func: Callable,
    context: "WorkflowStepContext",
    job_id: str,
    step_id: "StepID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
) -> Tuple[WaitResult, None]:
    """Executor of 'workflow.wait' steps.

    It returns a tuple that contains wait result. The wait result is a list
    of result of workflows that are ready and a list of workflows that are
    pending.
    """
    # Part 1: Update the context for the step.
    workflow_context.update_workflow_step_context(context, step_id)
    context = workflow_context.get_workflow_step_context()
    step_type = runtime_options.step_type
    assert step_type == StepType.WAIT
    wait_options = runtime_options.ray_options.get("wait_options", {})

    # Part 2: Resolve any ready workflows.
    ready_workflows, remaining_workflows = baked_inputs.wait(**wait_options)
    ready_objects = [
        _resolve_static_workflow_ref(w.ref) for w in ready_workflows
    ]
    output = (ready_objects, remaining_workflows)

    # Part 3: Save the outputs.
    store = workflow_storage.get_workflow_storage()
    # TODO(suquark): Because the outputs are not generated by "workflow.wait",
    # we do not checkpoint the outputs here. Those steps that generate
    # outputs should checkpoint them.
    commit_step(store, step_id, output, exception=None)
    if context.last_step_of_workflow:
        # advance the progress of the workflow
        store.advance_progress(step_id)

    _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    return output
예제 #2
0
파일: step_executor.py 프로젝트: ijrsvt/ray
def _workflow_wait_executor(
    func: Callable,
    context: "WorkflowStepContext",
    step_id: "StepID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
) -> Tuple[WaitResult, None]:
    """Executor of 'workflow.wait' steps.

    It returns a tuple that contains wait result. The wait result is a list
    of result of workflows that are ready and a list of workflows that are
    pending.
    """
    # Part 1: Update the context for the step.
    workflow_context.update_workflow_step_context(context, step_id)
    context = workflow_context.get_workflow_step_context()
    step_type = runtime_options.step_type
    assert step_type == StepType.WAIT
    wait_options = runtime_options.ray_options.get("wait_options", {})

    # Part 2: Resolve any ready workflows.
    ready_workflows, remaining_workflows = baked_inputs.wait(**wait_options)
    ready_objects = []
    for w in ready_workflows:
        (
            obj,
            _,
        ) = _resolve_object_ref(w.ref.ref)
        ready_objects.append(obj)
    persisted_output = (ready_objects, remaining_workflows)

    # Part 3: Save the outputs.
    store = workflow_storage.get_workflow_storage()
    commit_step(store, step_id, persisted_output, exception=None)
    if context.last_step_of_workflow:
        # advance the progress of the workflow
        store.advance_progress(step_id)

    _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    return persisted_output, None
예제 #3
0
파일: step_executor.py 프로젝트: ijrsvt/ray
def _workflow_step_executor(
    func: Callable,
    context: "WorkflowStepContext",
    step_id: "StepID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
) -> Tuple[Any, Any]:
    """Executor function for workflow step.

    Args:
        step_id: ID of the step.
        func: The workflow step function.
        baked_inputs: The processed inputs for the step.
        context: Workflow step context. Used to access correct storage etc.
        runtime_options: Parameters for workflow step execution.

    Returns:
        Workflow step output.
    """
    # Part 1: update the context for the step
    workflow_context.update_workflow_step_context(context, step_id)
    context = workflow_context.get_workflow_step_context()
    step_type = runtime_options.step_type

    # Part 2: resolve inputs
    args, kwargs = baked_inputs.resolve()

    # Part 3: execute the step
    store = workflow_storage.get_workflow_storage()
    try:
        step_prerun_metadata = {"start_time": time.time()}
        store.save_step_prerun_metadata(step_id, step_prerun_metadata)
        persisted_output, volatile_output = _wrap_run(func, runtime_options,
                                                      *args, **kwargs)
        step_postrun_metadata = {"end_time": time.time()}
        store.save_step_postrun_metadata(step_id, step_postrun_metadata)
    except Exception as e:
        commit_step(store, step_id, None, exception=e)
        raise e

    # Part 4: save outputs
    if step_type == StepType.READONLY_ACTOR_METHOD:
        if isinstance(volatile_output, Workflow):
            raise TypeError(
                "Returning a Workflow from a readonly virtual actor "
                "is not allowed.")
        assert not isinstance(persisted_output, Workflow)
    else:
        store = workflow_storage.get_workflow_storage()
        commit_step(store, step_id, persisted_output, exception=None)
        if isinstance(persisted_output, Workflow):
            outer_most_step_id = context.outer_most_step_id
            if step_type == StepType.FUNCTION:
                # Passing down outer most step so inner nested steps would
                # access the same outer most step.
                if not context.outer_most_step_id:
                    # The current workflow step returns a nested workflow, and
                    # there is no outer step for the current step. So the
                    # current step is the outer most step for the inner nested
                    # workflow steps.
                    outer_most_step_id = workflow_context.get_current_step_id()
            assert volatile_output is None
            # Execute sub-workflow. Pass down "outer_most_step_id".
            with workflow_context.fork_workflow_step_context(
                    outer_most_step_id=outer_most_step_id):
                result = execute_workflow(persisted_output)
            # When virtual actor returns a workflow in the method,
            # the volatile_output and persisted_output will be put together
            persisted_output = result.persisted_output
            volatile_output = result.volatile_output
        elif context.last_step_of_workflow:
            # advance the progress of the workflow
            store.advance_progress(step_id)
        _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    if isinstance(volatile_output, Workflow):
        # This is the case where a step method is called in the virtual actor.
        # We need to run the method to get the final result.
        assert step_type == StepType.ACTOR_METHOD
        volatile_output = volatile_output.run_async(
            workflow_context.get_current_workflow_id())
    return persisted_output, volatile_output
예제 #4
0
파일: step_executor.py 프로젝트: ijrsvt/ray
def _wrap_run(func: Callable, runtime_options: "WorkflowStepRuntimeOptions",
              *args, **kwargs) -> Tuple[Any, Any]:
    """Wrap the function and execute it.

    It returns two parts, persisted_output (p-out) and volatile_output (v-out).
    P-out is the part of result to persist in a storage and pass to the
    next step. V-out is the part of result to return to the user but does not
    require persistence.

    This table describes their relationships

    +-----------------------------+-------+--------+----------------------+
    | Step Type                   | p-out | v-out  | catch exception into |
    +-----------------------------+-------+--------+----------------------+
    | Function Step               | Y     | N      | p-out                |
    +-----------------------------+-------+--------+----------------------+
    | Virtual Actor Step          | Y     | Y      | v-out                |
    +-----------------------------+-------+--------+----------------------+
    | Readonly Virtual Actor Step | N     | Y      | v-out                |
    +-----------------------------+-------+--------+----------------------+

    Args:
        func: The function body.
        runtime_options: Step execution params.

    Returns:
        State and output.
    """
    exception = None
    result = None
    # max_retries are for application level failure.
    # For ray failure, we should use max_retries.
    for i in range(runtime_options.max_retries):
        logger.info(f"{get_step_status_info(WorkflowStatus.RUNNING)}"
                    f"\t[{i + 1}/{runtime_options.max_retries}]")
        try:
            result = func(*args, **kwargs)
            exception = None
            break
        except BaseException as e:
            if i + 1 == runtime_options.max_retries:
                retry_msg = "Maximum retry reached, stop retry."
            else:
                retry_msg = "The step will be retried."
            logger.error(
                f"{workflow_context.get_name()} failed with error message"
                f" {e}. {retry_msg}")
            exception = e

    step_type = runtime_options.step_type
    if runtime_options.catch_exceptions:
        if step_type == StepType.FUNCTION:
            if isinstance(result, Workflow):
                # When it returns a nested workflow, catch_exception
                # should be passed recursively.
                assert exception is None
                result.data.step_options.catch_exceptions = True
                persisted_output, volatile_output = result, None
            else:
                persisted_output, volatile_output = (result, exception), None
        elif step_type == StepType.ACTOR_METHOD:
            # virtual actors do not persist exception
            persisted_output, volatile_output = result[0], (result[1],
                                                            exception)
        elif runtime_options.step_type == StepType.READONLY_ACTOR_METHOD:
            persisted_output, volatile_output = None, (result, exception)
        else:
            raise ValueError(f"Unknown StepType '{step_type}'")
    else:
        if exception is not None:
            if step_type != StepType.READONLY_ACTOR_METHOD:
                status = WorkflowStatus.FAILED
                _record_step_status(workflow_context.get_current_step_id(),
                                    status)
                logger.info(get_step_status_info(status))
            raise exception
        if step_type == StepType.FUNCTION:
            persisted_output, volatile_output = result, None
        elif step_type == StepType.ACTOR_METHOD:
            persisted_output, volatile_output = result
        elif step_type == StepType.READONLY_ACTOR_METHOD:
            persisted_output, volatile_output = None, result
        else:
            raise ValueError(f"Unknown StepType '{step_type}'")

    return persisted_output, volatile_output
예제 #5
0
def _workflow_step_executor(step_type: StepType, func: Callable,
                            context: workflow_context.WorkflowStepContext,
                            step_id: "StepID",
                            baked_inputs: "_BakedWorkflowInputs",
                            catch_exceptions: bool, max_retries: int) -> Any:
    """Executor function for workflow step.

    Args:
        step_type: The type of workflow step.
        func: The workflow step function.
        context: Workflow step context. Used to access correct storage etc.
        step_id: The ID of the step.
        baked_inputs: The processed inputs for the step.
        catch_exceptions: If set to be true, return
            (Optional[Result], Optional[Error]) instead of Result.
        max_retries: Max number of retries encounter of a failure.

    Returns:
        Workflow step output.
    """
    workflow_context.update_workflow_step_context(context, step_id)
    args, kwargs = _resolve_step_inputs(baked_inputs)
    store = workflow_storage.get_workflow_storage()
    try:
        persisted_output, volatile_output = _wrap_run(
            func, step_type, step_id, catch_exceptions, max_retries, *args,
            **kwargs)
    except Exception as e:
        commit_step(store, step_id, None, e)
        raise e
    if step_type == StepType.READONLY_ACTOR_METHOD:
        if isinstance(volatile_output, Workflow):
            raise TypeError(
                "Returning a Workflow from a readonly virtual actor "
                "is not allowed.")
        assert not isinstance(persisted_output, Workflow)
    else:
        store = workflow_storage.get_workflow_storage()
        commit_step(store, step_id, persisted_output, None)
        outer_most_step_id = context.outer_most_step_id
        if isinstance(persisted_output, Workflow):
            if step_type == StepType.FUNCTION:
                # Passing down outer most step so inner nested steps would
                # access the same outer most step.
                if not context.outer_most_step_id:
                    # The current workflow step returns a nested workflow, and
                    # there is no outer step for the current step. So the
                    # current step is the outer most step for the inner nested
                    # workflow steps.
                    outer_most_step_id = workflow_context.get_current_step_id()
            assert volatile_output is None
            # Execute sub-workflow. Pass down "outer_most_step_id".
            with workflow_context.fork_workflow_step_context(
                    outer_most_step_id=outer_most_step_id):
                result = execute_workflow(persisted_output)
            # When virtual actor returns a workflow in the method,
            # the volatile_output and persisted_output will be put together
            persisted_output = result.persisted_output
            volatile_output = result.volatile_output
        elif context.last_step_of_workflow:
            # advance the progress of the workflow
            store.advance_progress(step_id)
        _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    if isinstance(volatile_output, Workflow):
        # This is the case where a step method is called in the virtual actor.
        # We need to run the method to get the final result.
        assert step_type == StepType.ACTOR_METHOD
        volatile_output = volatile_output.run_async(
            workflow_context.get_current_workflow_id())
    return persisted_output, volatile_output
예제 #6
0
def _workflow_step_executor(
    func: Callable,
    context: "WorkflowStepContext",
    step_id: "StepID",
    baked_inputs: "_BakedWorkflowInputs",
    runtime_options: "WorkflowStepRuntimeOptions",
    inplace: bool = False,
) -> Tuple[Any, Any]:
    """Executor function for workflow step.

    Args:
        step_id: ID of the step.
        func: The workflow step function.
        baked_inputs: The processed inputs for the step.
        context: Workflow step context. Used to access correct storage etc.
        runtime_options: Parameters for workflow step execution.
        inplace: Execute the workflow inplace.

    Returns:
        Workflow step output.
    """
    # Part 1: update the context for the step
    workflow_context.update_workflow_step_context(context, step_id)
    context = workflow_context.get_workflow_step_context()
    step_type = runtime_options.step_type
    context.checkpoint_context.checkpoint = runtime_options.checkpoint

    # Part 2: resolve inputs
    args, kwargs = baked_inputs.resolve()

    # Part 3: execute the step
    store = workflow_storage.get_workflow_storage()
    try:
        step_prerun_metadata = {"start_time": time.time()}
        store.save_step_prerun_metadata(step_id, step_prerun_metadata)
        with workflow_context.workflow_execution():
            persisted_output, volatile_output = _wrap_run(
                func, runtime_options, *args, **kwargs)
        step_postrun_metadata = {"end_time": time.time()}
        store.save_step_postrun_metadata(step_id, step_postrun_metadata)
    except Exception as e:
        # Always checkpoint the exception.
        commit_step(store, step_id, None, exception=e)
        raise e

    # Part 4: save outputs
    if step_type == StepType.READONLY_ACTOR_METHOD:
        if isinstance(volatile_output, Workflow):
            raise TypeError(
                "Returning a Workflow from a readonly virtual actor is not allowed."
            )
        assert not isinstance(persisted_output, Workflow)
    else:
        # TODO(suquark): Validate checkpoint options before
        # commit the step.
        store = workflow_storage.get_workflow_storage()
        if CheckpointMode(runtime_options.checkpoint) == CheckpointMode.SYNC:
            commit_step(
                store,
                step_id,
                persisted_output,
                exception=None,
            )
        if isinstance(persisted_output, Workflow):
            sub_workflow = persisted_output
            outer_most_step_id = context.outer_most_step_id
            assert volatile_output is None
            if step_type == StepType.FUNCTION:
                # Passing down outer most step so inner nested steps would
                # access the same outer most step.
                if not context.outer_most_step_id:
                    # The current workflow step returns a nested workflow, and
                    # there is no outer step for the current step. So the
                    # current step is the outer most step for the inner nested
                    # workflow steps.
                    outer_most_step_id = workflow_context.get_current_step_id()
            if inplace:
                _step_options = sub_workflow.data.step_options
                if (_step_options.step_type != StepType.WAIT
                        and runtime_options.ray_options !=
                        _step_options.ray_options):
                    logger.warning(
                        f"Workflow step '{sub_workflow.step_id}' uses "
                        f"a Ray option different to its caller step '{step_id}' "
                        f"and will be executed inplace. Ray assumes it still "
                        f"consumes the same resource as the caller. This may result "
                        f"in oversubscribing resources.")
                return (
                    InplaceReturnedWorkflow(
                        sub_workflow,
                        {"outer_most_step_id": outer_most_step_id}),
                    None,
                )
            # Execute sub-workflow. Pass down "outer_most_step_id".
            with workflow_context.fork_workflow_step_context(
                    outer_most_step_id=outer_most_step_id):
                result = execute_workflow(sub_workflow)
            # When virtual actor returns a workflow in the method,
            # the volatile_output and persisted_output will be put together
            persisted_output = result.persisted_output
            volatile_output = result.volatile_output
        elif context.last_step_of_workflow:
            # advance the progress of the workflow
            store.advance_progress(step_id)
        _record_step_status(step_id, WorkflowStatus.SUCCESSFUL)
    logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL))
    if isinstance(volatile_output, Workflow):
        # This is the case where a step method is called in the virtual actor.
        # We need to run the method to get the final result.
        assert step_type == StepType.ACTOR_METHOD
        volatile_output = volatile_output.run_async(
            workflow_context.get_current_workflow_id())
    return persisted_output, volatile_output
예제 #7
0
def _wrap_run(func: Callable, runtime_options: "WorkflowStepRuntimeOptions",
              *args, **kwargs) -> Tuple[Any, Any]:
    """Wrap the function and execute it.

    Args:
        func: The function body.
        runtime_options: Step execution params.

    Returns:
        State and output.
    """
    exception = None
    result = None
    done = False
    # max_retries are for application level failure.
    # For ray failure, we should use max_retries.
    i = 0
    while not done:
        if i == 0:
            logger.info(f"{get_step_status_info(WorkflowStatus.RUNNING)}")
        else:
            total_retries = (runtime_options.max_retries
                             if runtime_options.max_retries != -1 else "inf")
            logger.info(f"{get_step_status_info(WorkflowStatus.RUNNING)}"
                        f"\tretries: [{i}/{total_retries}]")
        try:
            result = func(*args, **kwargs)
            exception = None
            done = True
        except BaseException as e:
            if i == runtime_options.max_retries:
                retry_msg = "Maximum retry reached, stop retry."
                exception = e
                done = True
            else:
                retry_msg = "The step will be retried."
                i += 1
            logger.error(
                f"{workflow_context.get_name()} failed with error message"
                f" {e}. {retry_msg}")
    step_type = runtime_options.step_type
    if runtime_options.catch_exceptions:
        if step_type == StepType.FUNCTION:
            if isinstance(result, Workflow):
                # When it returns a nested workflow, catch_exception
                # should be passed recursively.
                assert exception is None
                result.data.step_options.catch_exceptions = True
                output = result
            else:
                output = (result, exception)
        else:
            raise ValueError(f"Unknown StepType '{step_type}'")
    else:
        if exception is not None:
            status = WorkflowStatus.FAILED
            _record_step_status(workflow_context.get_current_step_id(), status)
            logger.info(get_step_status_info(status))
            raise exception
        if step_type == StepType.FUNCTION:
            output = result
        else:
            raise ValueError(f"Unknown StepType '{step_type}'")

    return output