def _workflow_wait_executor( func: Callable, context: "WorkflowStepContext", job_id: str, step_id: "StepID", baked_inputs: "_BakedWorkflowInputs", runtime_options: "WorkflowStepRuntimeOptions", ) -> Tuple[WaitResult, None]: """Executor of 'workflow.wait' steps. It returns a tuple that contains wait result. The wait result is a list of result of workflows that are ready and a list of workflows that are pending. """ # Part 1: Update the context for the step. workflow_context.update_workflow_step_context(context, step_id) context = workflow_context.get_workflow_step_context() step_type = runtime_options.step_type assert step_type == StepType.WAIT wait_options = runtime_options.ray_options.get("wait_options", {}) # Part 2: Resolve any ready workflows. ready_workflows, remaining_workflows = baked_inputs.wait(**wait_options) ready_objects = [ _resolve_static_workflow_ref(w.ref) for w in ready_workflows ] output = (ready_objects, remaining_workflows) # Part 3: Save the outputs. store = workflow_storage.get_workflow_storage() # TODO(suquark): Because the outputs are not generated by "workflow.wait", # we do not checkpoint the outputs here. Those steps that generate # outputs should checkpoint them. commit_step(store, step_id, output, exception=None) if context.last_step_of_workflow: # advance the progress of the workflow store.advance_progress(step_id) _record_step_status(step_id, WorkflowStatus.SUCCESSFUL) logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL)) return output
def _workflow_wait_executor( func: Callable, context: "WorkflowStepContext", step_id: "StepID", baked_inputs: "_BakedWorkflowInputs", runtime_options: "WorkflowStepRuntimeOptions", ) -> Tuple[WaitResult, None]: """Executor of 'workflow.wait' steps. It returns a tuple that contains wait result. The wait result is a list of result of workflows that are ready and a list of workflows that are pending. """ # Part 1: Update the context for the step. workflow_context.update_workflow_step_context(context, step_id) context = workflow_context.get_workflow_step_context() step_type = runtime_options.step_type assert step_type == StepType.WAIT wait_options = runtime_options.ray_options.get("wait_options", {}) # Part 2: Resolve any ready workflows. ready_workflows, remaining_workflows = baked_inputs.wait(**wait_options) ready_objects = [] for w in ready_workflows: ( obj, _, ) = _resolve_object_ref(w.ref.ref) ready_objects.append(obj) persisted_output = (ready_objects, remaining_workflows) # Part 3: Save the outputs. store = workflow_storage.get_workflow_storage() commit_step(store, step_id, persisted_output, exception=None) if context.last_step_of_workflow: # advance the progress of the workflow store.advance_progress(step_id) _record_step_status(step_id, WorkflowStatus.SUCCESSFUL) logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL)) return persisted_output, None
def _workflow_step_executor( func: Callable, context: "WorkflowStepContext", step_id: "StepID", baked_inputs: "_BakedWorkflowInputs", runtime_options: "WorkflowStepRuntimeOptions", ) -> Tuple[Any, Any]: """Executor function for workflow step. Args: step_id: ID of the step. func: The workflow step function. baked_inputs: The processed inputs for the step. context: Workflow step context. Used to access correct storage etc. runtime_options: Parameters for workflow step execution. Returns: Workflow step output. """ # Part 1: update the context for the step workflow_context.update_workflow_step_context(context, step_id) context = workflow_context.get_workflow_step_context() step_type = runtime_options.step_type # Part 2: resolve inputs args, kwargs = baked_inputs.resolve() # Part 3: execute the step store = workflow_storage.get_workflow_storage() try: step_prerun_metadata = {"start_time": time.time()} store.save_step_prerun_metadata(step_id, step_prerun_metadata) persisted_output, volatile_output = _wrap_run(func, runtime_options, *args, **kwargs) step_postrun_metadata = {"end_time": time.time()} store.save_step_postrun_metadata(step_id, step_postrun_metadata) except Exception as e: commit_step(store, step_id, None, exception=e) raise e # Part 4: save outputs if step_type == StepType.READONLY_ACTOR_METHOD: if isinstance(volatile_output, Workflow): raise TypeError( "Returning a Workflow from a readonly virtual actor " "is not allowed.") assert not isinstance(persisted_output, Workflow) else: store = workflow_storage.get_workflow_storage() commit_step(store, step_id, persisted_output, exception=None) if isinstance(persisted_output, Workflow): outer_most_step_id = context.outer_most_step_id if step_type == StepType.FUNCTION: # Passing down outer most step so inner nested steps would # access the same outer most step. if not context.outer_most_step_id: # The current workflow step returns a nested workflow, and # there is no outer step for the current step. So the # current step is the outer most step for the inner nested # workflow steps. outer_most_step_id = workflow_context.get_current_step_id() assert volatile_output is None # Execute sub-workflow. Pass down "outer_most_step_id". with workflow_context.fork_workflow_step_context( outer_most_step_id=outer_most_step_id): result = execute_workflow(persisted_output) # When virtual actor returns a workflow in the method, # the volatile_output and persisted_output will be put together persisted_output = result.persisted_output volatile_output = result.volatile_output elif context.last_step_of_workflow: # advance the progress of the workflow store.advance_progress(step_id) _record_step_status(step_id, WorkflowStatus.SUCCESSFUL) logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL)) if isinstance(volatile_output, Workflow): # This is the case where a step method is called in the virtual actor. # We need to run the method to get the final result. assert step_type == StepType.ACTOR_METHOD volatile_output = volatile_output.run_async( workflow_context.get_current_workflow_id()) return persisted_output, volatile_output
def _wrap_run(func: Callable, runtime_options: "WorkflowStepRuntimeOptions", *args, **kwargs) -> Tuple[Any, Any]: """Wrap the function and execute it. It returns two parts, persisted_output (p-out) and volatile_output (v-out). P-out is the part of result to persist in a storage and pass to the next step. V-out is the part of result to return to the user but does not require persistence. This table describes their relationships +-----------------------------+-------+--------+----------------------+ | Step Type | p-out | v-out | catch exception into | +-----------------------------+-------+--------+----------------------+ | Function Step | Y | N | p-out | +-----------------------------+-------+--------+----------------------+ | Virtual Actor Step | Y | Y | v-out | +-----------------------------+-------+--------+----------------------+ | Readonly Virtual Actor Step | N | Y | v-out | +-----------------------------+-------+--------+----------------------+ Args: func: The function body. runtime_options: Step execution params. Returns: State and output. """ exception = None result = None # max_retries are for application level failure. # For ray failure, we should use max_retries. for i in range(runtime_options.max_retries): logger.info(f"{get_step_status_info(WorkflowStatus.RUNNING)}" f"\t[{i + 1}/{runtime_options.max_retries}]") try: result = func(*args, **kwargs) exception = None break except BaseException as e: if i + 1 == runtime_options.max_retries: retry_msg = "Maximum retry reached, stop retry." else: retry_msg = "The step will be retried." logger.error( f"{workflow_context.get_name()} failed with error message" f" {e}. {retry_msg}") exception = e step_type = runtime_options.step_type if runtime_options.catch_exceptions: if step_type == StepType.FUNCTION: if isinstance(result, Workflow): # When it returns a nested workflow, catch_exception # should be passed recursively. assert exception is None result.data.step_options.catch_exceptions = True persisted_output, volatile_output = result, None else: persisted_output, volatile_output = (result, exception), None elif step_type == StepType.ACTOR_METHOD: # virtual actors do not persist exception persisted_output, volatile_output = result[0], (result[1], exception) elif runtime_options.step_type == StepType.READONLY_ACTOR_METHOD: persisted_output, volatile_output = None, (result, exception) else: raise ValueError(f"Unknown StepType '{step_type}'") else: if exception is not None: if step_type != StepType.READONLY_ACTOR_METHOD: status = WorkflowStatus.FAILED _record_step_status(workflow_context.get_current_step_id(), status) logger.info(get_step_status_info(status)) raise exception if step_type == StepType.FUNCTION: persisted_output, volatile_output = result, None elif step_type == StepType.ACTOR_METHOD: persisted_output, volatile_output = result elif step_type == StepType.READONLY_ACTOR_METHOD: persisted_output, volatile_output = None, result else: raise ValueError(f"Unknown StepType '{step_type}'") return persisted_output, volatile_output
def _workflow_step_executor(step_type: StepType, func: Callable, context: workflow_context.WorkflowStepContext, step_id: "StepID", baked_inputs: "_BakedWorkflowInputs", catch_exceptions: bool, max_retries: int) -> Any: """Executor function for workflow step. Args: step_type: The type of workflow step. func: The workflow step function. context: Workflow step context. Used to access correct storage etc. step_id: The ID of the step. baked_inputs: The processed inputs for the step. catch_exceptions: If set to be true, return (Optional[Result], Optional[Error]) instead of Result. max_retries: Max number of retries encounter of a failure. Returns: Workflow step output. """ workflow_context.update_workflow_step_context(context, step_id) args, kwargs = _resolve_step_inputs(baked_inputs) store = workflow_storage.get_workflow_storage() try: persisted_output, volatile_output = _wrap_run( func, step_type, step_id, catch_exceptions, max_retries, *args, **kwargs) except Exception as e: commit_step(store, step_id, None, e) raise e if step_type == StepType.READONLY_ACTOR_METHOD: if isinstance(volatile_output, Workflow): raise TypeError( "Returning a Workflow from a readonly virtual actor " "is not allowed.") assert not isinstance(persisted_output, Workflow) else: store = workflow_storage.get_workflow_storage() commit_step(store, step_id, persisted_output, None) outer_most_step_id = context.outer_most_step_id if isinstance(persisted_output, Workflow): if step_type == StepType.FUNCTION: # Passing down outer most step so inner nested steps would # access the same outer most step. if not context.outer_most_step_id: # The current workflow step returns a nested workflow, and # there is no outer step for the current step. So the # current step is the outer most step for the inner nested # workflow steps. outer_most_step_id = workflow_context.get_current_step_id() assert volatile_output is None # Execute sub-workflow. Pass down "outer_most_step_id". with workflow_context.fork_workflow_step_context( outer_most_step_id=outer_most_step_id): result = execute_workflow(persisted_output) # When virtual actor returns a workflow in the method, # the volatile_output and persisted_output will be put together persisted_output = result.persisted_output volatile_output = result.volatile_output elif context.last_step_of_workflow: # advance the progress of the workflow store.advance_progress(step_id) _record_step_status(step_id, WorkflowStatus.SUCCESSFUL) logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL)) if isinstance(volatile_output, Workflow): # This is the case where a step method is called in the virtual actor. # We need to run the method to get the final result. assert step_type == StepType.ACTOR_METHOD volatile_output = volatile_output.run_async( workflow_context.get_current_workflow_id()) return persisted_output, volatile_output
def _workflow_step_executor( func: Callable, context: "WorkflowStepContext", step_id: "StepID", baked_inputs: "_BakedWorkflowInputs", runtime_options: "WorkflowStepRuntimeOptions", inplace: bool = False, ) -> Tuple[Any, Any]: """Executor function for workflow step. Args: step_id: ID of the step. func: The workflow step function. baked_inputs: The processed inputs for the step. context: Workflow step context. Used to access correct storage etc. runtime_options: Parameters for workflow step execution. inplace: Execute the workflow inplace. Returns: Workflow step output. """ # Part 1: update the context for the step workflow_context.update_workflow_step_context(context, step_id) context = workflow_context.get_workflow_step_context() step_type = runtime_options.step_type context.checkpoint_context.checkpoint = runtime_options.checkpoint # Part 2: resolve inputs args, kwargs = baked_inputs.resolve() # Part 3: execute the step store = workflow_storage.get_workflow_storage() try: step_prerun_metadata = {"start_time": time.time()} store.save_step_prerun_metadata(step_id, step_prerun_metadata) with workflow_context.workflow_execution(): persisted_output, volatile_output = _wrap_run( func, runtime_options, *args, **kwargs) step_postrun_metadata = {"end_time": time.time()} store.save_step_postrun_metadata(step_id, step_postrun_metadata) except Exception as e: # Always checkpoint the exception. commit_step(store, step_id, None, exception=e) raise e # Part 4: save outputs if step_type == StepType.READONLY_ACTOR_METHOD: if isinstance(volatile_output, Workflow): raise TypeError( "Returning a Workflow from a readonly virtual actor is not allowed." ) assert not isinstance(persisted_output, Workflow) else: # TODO(suquark): Validate checkpoint options before # commit the step. store = workflow_storage.get_workflow_storage() if CheckpointMode(runtime_options.checkpoint) == CheckpointMode.SYNC: commit_step( store, step_id, persisted_output, exception=None, ) if isinstance(persisted_output, Workflow): sub_workflow = persisted_output outer_most_step_id = context.outer_most_step_id assert volatile_output is None if step_type == StepType.FUNCTION: # Passing down outer most step so inner nested steps would # access the same outer most step. if not context.outer_most_step_id: # The current workflow step returns a nested workflow, and # there is no outer step for the current step. So the # current step is the outer most step for the inner nested # workflow steps. outer_most_step_id = workflow_context.get_current_step_id() if inplace: _step_options = sub_workflow.data.step_options if (_step_options.step_type != StepType.WAIT and runtime_options.ray_options != _step_options.ray_options): logger.warning( f"Workflow step '{sub_workflow.step_id}' uses " f"a Ray option different to its caller step '{step_id}' " f"and will be executed inplace. Ray assumes it still " f"consumes the same resource as the caller. This may result " f"in oversubscribing resources.") return ( InplaceReturnedWorkflow( sub_workflow, {"outer_most_step_id": outer_most_step_id}), None, ) # Execute sub-workflow. Pass down "outer_most_step_id". with workflow_context.fork_workflow_step_context( outer_most_step_id=outer_most_step_id): result = execute_workflow(sub_workflow) # When virtual actor returns a workflow in the method, # the volatile_output and persisted_output will be put together persisted_output = result.persisted_output volatile_output = result.volatile_output elif context.last_step_of_workflow: # advance the progress of the workflow store.advance_progress(step_id) _record_step_status(step_id, WorkflowStatus.SUCCESSFUL) logger.info(get_step_status_info(WorkflowStatus.SUCCESSFUL)) if isinstance(volatile_output, Workflow): # This is the case where a step method is called in the virtual actor. # We need to run the method to get the final result. assert step_type == StepType.ACTOR_METHOD volatile_output = volatile_output.run_async( workflow_context.get_current_workflow_id()) return persisted_output, volatile_output
def _wrap_run(func: Callable, runtime_options: "WorkflowStepRuntimeOptions", *args, **kwargs) -> Tuple[Any, Any]: """Wrap the function and execute it. Args: func: The function body. runtime_options: Step execution params. Returns: State and output. """ exception = None result = None done = False # max_retries are for application level failure. # For ray failure, we should use max_retries. i = 0 while not done: if i == 0: logger.info(f"{get_step_status_info(WorkflowStatus.RUNNING)}") else: total_retries = (runtime_options.max_retries if runtime_options.max_retries != -1 else "inf") logger.info(f"{get_step_status_info(WorkflowStatus.RUNNING)}" f"\tretries: [{i}/{total_retries}]") try: result = func(*args, **kwargs) exception = None done = True except BaseException as e: if i == runtime_options.max_retries: retry_msg = "Maximum retry reached, stop retry." exception = e done = True else: retry_msg = "The step will be retried." i += 1 logger.error( f"{workflow_context.get_name()} failed with error message" f" {e}. {retry_msg}") step_type = runtime_options.step_type if runtime_options.catch_exceptions: if step_type == StepType.FUNCTION: if isinstance(result, Workflow): # When it returns a nested workflow, catch_exception # should be passed recursively. assert exception is None result.data.step_options.catch_exceptions = True output = result else: output = (result, exception) else: raise ValueError(f"Unknown StepType '{step_type}'") else: if exception is not None: status = WorkflowStatus.FAILED _record_step_status(workflow_context.get_current_step_id(), status) logger.info(get_step_status_info(status)) raise exception if step_type == StepType.FUNCTION: output = result else: raise ValueError(f"Unknown StepType '{step_type}'") return output