def run(entry_workflow: Workflow, storage: Optional[Union[str, Storage]] = None, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. See "api.run()" for details.""" if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{entry_workflow.id}.{time.time():.9f}" if isinstance(storage, str): set_global_storage(create_storage(storage)) elif isinstance(storage, Storage): set_global_storage(storage) elif storage is not None: raise TypeError("'storage' should be None, str, or Storage type.") storage_url = get_global_storage().storage_url logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{storage_url}\"].") try: workflow_context.init_workflow_step_context(workflow_id, storage_url) commit_step(entry_workflow) # TODO(suquark): Move this to a detached named actor, # so the workflow shares fate with the actor. # The current plan is resuming the workflow on the detached named # actor. This is extremely simple to implement, but I am not sure # of its performance. output = recovery.resume_workflow_job(workflow_id, get_global_storage()) logger.info(f"Workflow job {workflow_id} started.") finally: workflow_context.set_workflow_step_context(None) return output
def run_or_resume(self, workflow_id: str) -> ray.ObjectRef: """Run or resume a workflow. Args: workflow_id: The ID of the workflow. Returns: An object reference that can be used to retrieve the workflow result. """ if workflow_id in self._workflow_outputs: raise ValueError(f"The output of workflow[id={workflow_id}] " "already exists.") output = recovery.resume_workflow_job(workflow_id, self._store) self._workflow_outputs[workflow_id] = output logger.info(f"Workflow job [id={workflow_id}] started.") return output
def resume(workflow_id: str, storage: Optional[Union[str, Storage]] = None) -> ray.ObjectRef: """Resume a workflow asynchronously. See "api.resume()" for details. """ if isinstance(storage, str): store = create_storage(storage) elif isinstance(storage, Storage): store = storage elif storage is None: store = get_global_storage() else: raise TypeError("'storage' should be None, str, or Storage type.") logger.info(f"Resuming workflow [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") output = recovery.resume_workflow_job(workflow_id, store) logger.info(f"Workflow job {workflow_id} resumed.") return output
def resume(workflow_id: str, workflow_root_dir=None) -> ray.ObjectRef: """ Resume a workflow asynchronously. This workflow maybe fail previously. Args: workflow_id: The ID of the workflow. The ID is used to identify the workflow. workflow_root_dir: The path of an external storage used for checkpointing. Returns: The execution result of the workflow, represented by Ray ObjectRef. """ assert ray.is_initialized() if workflow_root_dir is not None: store = storage.create_storage(workflow_root_dir) else: store = storage.get_global_storage() r = recovery.resume_workflow_job(workflow_id, store) if isinstance(r, ray.ObjectRef): return r # skip saving the DAG of a recovery workflow r.skip_saving_workflow_dag = True return run(r, workflow_root_dir, workflow_id)