Beispiel #1
0
def resume_workflow_job(workflow_id: str,
                        store: storage.Storage) -> ray.ObjectRef:
    """Resume a workflow job.

    Args:
        workflow_id: The ID of the workflow job. The ID is used to identify
            the workflow.
        store: The storage to access the workflow.

    Raises:
        WorkflowNotResumableException: fail to resume the workflow.

    Returns:
        The execution result of the workflow, represented by Ray ObjectRef.
    """
    reader = workflow_storage.WorkflowStorage(workflow_id, store)
    try:
        entrypoint_step_id: StepID = reader.get_entrypoint_step_id()
        r = _construct_resume_workflow_from_step(reader, entrypoint_step_id)
    except Exception as e:
        raise WorkflowNotResumableError(workflow_id) from e

    if isinstance(r, Workflow):
        try:
            workflow_context.init_workflow_step_context(
                workflow_id, store.storage_url)
            return execute_workflow(r)
        finally:
            workflow_context.set_workflow_step_context(None)

    return ray.put(reader.load_step_output(r))
Beispiel #2
0
Datei: api.py Projekt: simjay/ray
def run(entry_workflow: Workflow,
        workflow_root_dir=None,
        workflow_id=None) -> ray.ObjectRef:
    """
    Run a workflow asynchronously.

    Args:
        entry_workflow: The workflow to run.
        workflow_root_dir: The path of an external storage used for
            checkpointing.
        workflow_id: The ID of the workflow. The ID is used to identify
            the workflow.

    Returns:
        The execution result of the workflow, represented by Ray ObjectRef.
    """
    if workflow_id is None:
        # TODO(suquark): include the name of the workflow in the default ID,
        # this makes the ID more readable.
        # Workflow ID format: {UUID}.{Unix time to nanoseconds}
        workflow_id = f"{uuid.uuid4().hex}.{time.time():.9f}"
    logger.info(f"Workflow job {workflow_id} created.")
    try:
        workflow_context.init_workflow_step_context(workflow_id,
                                                    workflow_root_dir)
        rref = entry_workflow.execute()
        logger.info(f"Workflow job {workflow_id} started.")
        # TODO(suquark): although we do not return the resolved object to user,
        # the object was resolved temporarily to the driver script.
        # We may need a helper step for storing the resolved object
        # instead later.
        output = resolve_object_ref(rref)[1]
    finally:
        workflow_context.set_workflow_step_context(None)
    return output
Beispiel #3
0
def run(entry_workflow: Workflow,
        storage: Optional[Union[str, Storage]] = None,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously. See "api.run()" for details."""
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{entry_workflow.id}.{time.time():.9f}"
    if isinstance(storage, str):
        set_global_storage(create_storage(storage))
    elif isinstance(storage, Storage):
        set_global_storage(storage)
    elif storage is not None:
        raise TypeError("'storage' should be None, str, or Storage type.")
    storage_url = get_global_storage().storage_url
    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{storage_url}\"].")
    try:
        workflow_context.init_workflow_step_context(workflow_id, storage_url)
        commit_step(entry_workflow)
        try:
            actor = ray.get_actor(MANAGEMENT_ACTOR_NAME)
        except ValueError:
            # the actor does not exist
            actor = WorkflowManagementActor.options(
                name=MANAGEMENT_ACTOR_NAME, lifetime="detached").remote()
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        output = ray.get(actor.run_or_resume.remote(workflow_id, storage_url))
        direct_output = flatten_workflow_output(workflow_id, output)
    finally:
        workflow_context.set_workflow_step_context(None)
    return direct_output
Beispiel #4
0
def run(entry_workflow: Workflow,
        storage: Optional[Union[str, Storage]] = None,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously. See "api.run()" for details."""
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{entry_workflow.id}.{time.time():.9f}"
    if isinstance(storage, str):
        set_global_storage(create_storage(storage))
    elif isinstance(storage, Storage):
        set_global_storage(storage)
    elif storage is not None:
        raise TypeError("'storage' should be None, str, or Storage type.")
    storage_url = get_global_storage().storage_url
    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{storage_url}\"].")
    try:
        workflow_context.init_workflow_step_context(workflow_id, storage_url)
        commit_step(entry_workflow)
        # TODO(suquark): Move this to a detached named actor,
        # so the workflow shares fate with the actor.
        # The current plan is resuming the workflow on the detached named
        # actor. This is extremely simple to implement, but I am not sure
        # of its performance.
        output = recovery.resume_workflow_job(workflow_id,
                                              get_global_storage())
        logger.info(f"Workflow job {workflow_id} started.")
    finally:
        workflow_context.set_workflow_step_context(None)
    return output
Beispiel #5
0
        def _func(context, task_id, args, kwargs):
            # NOTE: must use 'set_current_store_dir' to ensure that we are
            # accessing the correct global variable.
            workflow_context.set_workflow_step_context(context)
            scope = workflow_context.get_scope()
            scope.append(task_id)
            args, kwargs, resolved_object_refs = resolve_step_inputs(
                args, kwargs)
            # free references to potentially save memory
            del resolved_object_refs

            output = func(*args, **kwargs)
            if isinstance(output, Workflow):
                output = output.execute()
            return output