def run(entry_workflow: Workflow, workflow_id: Optional[str] = None, metadata: Optional[Dict] = None) -> ray.ObjectRef: """Run a workflow asynchronously. """ if metadata is not None: if not isinstance(metadata, dict): raise ValueError("metadata must be a dict.") for k, v in metadata.items(): try: json.dumps(v) except TypeError as e: raise ValueError("metadata values must be JSON serializable, " "however '{}' has a value whose {}.".format( k, e)) metadata = metadata or {} store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" logger.info( f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"]. Type: {entry_workflow.data.step_type} ") with workflow_context.workflow_step_context(workflow_id, store.storage_url): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) ws.save_workflow_user_metadata(metadata) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists: commit_step(ws, "", entry_workflow, exception=None) workflow_manager = get_or_create_management_actor() ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION) # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)) if entry_workflow.data.step_type == StepType.FUNCTION: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
def run( entry_workflow: Workflow, workflow_id: Optional[str] = None, metadata: Optional[Dict] = None, ) -> ray.ObjectRef: """Run a workflow asynchronously.""" validate_user_metadata(metadata) metadata = metadata or {} from ray.workflow.api import _ensure_workflow_initialized _ensure_workflow_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" step_type = entry_workflow.data.step_options.step_type logger.info( f'Workflow job created. [id="{workflow_id}"]. Type: {step_type}.') with workflow_context.workflow_step_context(workflow_id): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) ws.save_workflow_user_metadata(metadata) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # "Is growing" means we could adding steps to the (top-level) # workflow to grow the workflow dynamically at runtime. is_growing = step_type not in (StepType.FUNCTION, StepType.WAIT) # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if is_growing or not wf_exists: # We must checkpoint entry workflow. commit_step(ws, "", entry_workflow, exception=None) workflow_manager = get_or_create_management_actor() ignore_existing = is_growing # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. job_id = ray.get_runtime_context().job_id.hex() result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(job_id, workflow_id, ignore_existing)) if not is_growing: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
def run(entry_workflow: Workflow, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. """ store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") with workflow_context.workflow_step_context(workflow_id, store.storage_url): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists: commit_step(ws, "", entry_workflow, None) workflow_manager = get_or_create_management_actor() ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION) # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)) if entry_workflow.data.step_type == StepType.FUNCTION: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)