def run(entry_workflow: Workflow, workflow_id: Optional[str] = None, metadata: Optional[Dict] = None) -> ray.ObjectRef: """Run a workflow asynchronously. """ if metadata is not None: if not isinstance(metadata, dict): raise ValueError("metadata must be a dict.") for k, v in metadata.items(): try: json.dumps(v) except TypeError as e: raise ValueError("metadata values must be JSON serializable, " "however '{}' has a value whose {}.".format( k, e)) metadata = metadata or {} store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" logger.info( f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"]. Type: {entry_workflow.data.step_type} ") with workflow_context.workflow_step_context(workflow_id, store.storage_url): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) ws.save_workflow_user_metadata(metadata) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists: commit_step(ws, "", entry_workflow, exception=None) workflow_manager = get_or_create_management_actor() ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION) # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)) if entry_workflow.data.step_type == StepType.FUNCTION: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
def run( entry_workflow: Workflow, workflow_id: Optional[str] = None, metadata: Optional[Dict] = None, ) -> ray.ObjectRef: """Run a workflow asynchronously.""" validate_user_metadata(metadata) metadata = metadata or {} from ray.workflow.api import _ensure_workflow_initialized _ensure_workflow_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" step_type = entry_workflow.data.step_options.step_type logger.info( f'Workflow job created. [id="{workflow_id}"]. Type: {step_type}.') with workflow_context.workflow_step_context(workflow_id): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) ws.save_workflow_user_metadata(metadata) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # "Is growing" means we could adding steps to the (top-level) # workflow to grow the workflow dynamically at runtime. is_growing = step_type not in (StepType.FUNCTION, StepType.WAIT) # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if is_growing or not wf_exists: # We must checkpoint entry workflow. commit_step(ws, "", entry_workflow, exception=None) workflow_manager = get_or_create_management_actor() ignore_existing = is_growing # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. job_id = ray.get_runtime_context().job_id.hex() result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(job_id, workflow_id, ignore_existing)) if not is_growing: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
async def _resume_one(wid: str) -> Tuple[str, Optional[ray.ObjectRef]]: try: result: "WorkflowExecutionResult" = ( await workflow_manager.run_or_resume.remote(wid)) obj = flatten_workflow_output(wid, result.persisted_output) return wid, obj except Exception: logger.error(f"Failed to resume workflow {wid}") return (wid, None)
def run(entry_workflow: Workflow, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. """ store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") with workflow_context.workflow_step_context(workflow_id, store.storage_url): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists: commit_step(ws, "", entry_workflow, None) workflow_manager = get_or_create_management_actor() ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION) # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)) if entry_workflow.data.step_type == StepType.FUNCTION: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
async def _resume_one(wid: str) -> Tuple[str, Optional[ray.ObjectRef]]: try: job_id = ray.get_runtime_context().job_id.hex() result: "WorkflowExecutionResult" = ( await workflow_manager.run_or_resume.remote(job_id, wid) ) obj = flatten_workflow_output(wid, result.output) return wid, obj except Exception: logger.error(f"Failed to resume workflow {wid}") return (wid, None)
def test_workflow_output_resolving(workflow_start_regular_shared): # deep nested workflow nested_ref = deep_nested.remote(30) original_func = workflow_access._resolve_workflow_output # replace the original function with a new function that does not # involving named actor workflow_access._resolve_workflow_output = _resolve_workflow_output try: ref = workflow_access.flatten_workflow_output("fake_workflow_id", nested_ref) finally: # restore the function workflow_access._resolve_workflow_output = original_func assert ray.get(ref) == 42
def get_output(workflow_id: str, name: Optional[str]) -> ray.ObjectRef: """Get the output of a running workflow. See "api.get_output()" for details. """ assert ray.is_initialized() try: workflow_manager = get_management_actor() except ValueError as e: raise ValueError( "Failed to connect to the workflow management " "actor. The workflow could have already failed. You can use " "workflow.resume() to resume the workflow.") from e output = ray.get(workflow_manager.get_output.remote(workflow_id, name)) return flatten_workflow_output(workflow_id, output)
def resume(workflow_id: str) -> ray.ObjectRef: """Resume a workflow asynchronously. See "api.resume()" for details.""" logger.info(f'Resuming workflow [id="{workflow_id}"].') workflow_manager = get_or_create_management_actor() # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. job_id = ray.get_runtime_context().job_id.hex() result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(job_id, workflow_id, ignore_existing=False)) logger.info(f"Workflow job {workflow_id} resumed.") return flatten_workflow_output(workflow_id, result.persisted_output)