Example #1
0
 def options(
     self,
     *,
     max_retries=0,
     catch_exceptions=False,
     name=None,
     metadata=None,
     **ray_options,
 ) -> "_VirtualActorMethodHelper":
     validate_user_metadata(metadata)
     options = WorkflowStepRuntimeOptions.make(
         step_type=self._options.step_type,
         catch_exceptions=catch_exceptions if catch_exceptions is not None
         else self._options.catch_exceptions,
         max_retries=max_retries
         if max_retries is not None else self._options.max_retries,
         ray_options={
             **self._options.ray_options,
             **(ray_options if ray_options is not None else {}),
         },
     )
     _self = _VirtualActorMethodHelper(
         self._original_class,
         self._original_method,
         self._method_name,
         runtime_options=options,
     )
     _self._name = name if name is not None else self._name
     _self._user_metadata = {
         **self._user_metadata,
         **(metadata if metadata is not None else {}),
     }
     return _self
Example #2
0
def run(
    entry_workflow: Workflow,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict] = None,
) -> ray.ObjectRef:
    """Run a workflow asynchronously."""
    validate_user_metadata(metadata)
    metadata = metadata or {}

    from ray.workflow.api import _ensure_workflow_initialized

    _ensure_workflow_initialized()

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"
    step_type = entry_workflow.data.step_options.step_type

    logger.info(
        f'Workflow job created. [id="{workflow_id}"]. Type: {step_type}.')

    with workflow_context.workflow_step_context(workflow_id):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        wf_exists = True
        try:
            ws.get_entrypoint_step_id()
        except Exception:
            wf_exists = False

        # "Is growing" means we could adding steps to the (top-level)
        # workflow to grow the workflow dynamically at runtime.
        is_growing = step_type not in (StepType.FUNCTION, StepType.WAIT)

        # We only commit for
        #  - virtual actor tasks: it's dynamic tasks, so we always add
        #  - it's a new workflow
        # TODO (yic): follow up with force rerun
        if is_growing or not wf_exists:
            # We must checkpoint entry workflow.
            commit_step(ws, "", entry_workflow, exception=None)
        workflow_manager = get_or_create_management_actor()
        ignore_existing = is_growing
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        job_id = ray.get_runtime_context().job_id.hex()
        result: "WorkflowExecutionResult" = ray.get(
            workflow_manager.run_or_resume.remote(job_id, workflow_id,
                                                  ignore_existing))
        if not is_growing:
            return flatten_workflow_output(workflow_id,
                                           result.persisted_output)
        else:
            return flatten_workflow_output(workflow_id, result.volatile_output)
Example #3
0
    def options(
        self,
        *,
        max_retries: int = None,
        catch_exceptions: bool = None,
        name: str = None,
        metadata: Dict[str, Any] = None,
        allow_inplace: bool = None,
        checkpoint: "Optional[CheckpointModeType]" = None,
        **ray_options,
    ) -> "WorkflowStepFunction":
        """This function set how the step function is going to be executed.

        Args:
            max_retries: num of retries the step for an application
                level error.
            catch_exceptions: Whether the user want to take care of the
                failure mannually.
                If it's set to be true, (Optional[R], Optional[E]) will be
                returned.
                If it's false, the normal result will be returned.
            name: The name of this step, which will be used to
                generate the step_id of the step. The name will be used
                directly as the step id if possible, otherwise deduplicated by
                appending .N suffixes.
            metadata: metadata to add to the step.
            allow_inplace: Execute the workflow step inplace.
            checkpoint: The option for checkpointing.
            **ray_options: All parameters in this fields will be passed
                to ray remote function options.

        Returns:
            The step function itself.
        """
        validate_user_metadata(metadata)
        name = name if name is not None else self._name
        metadata = {
            **self._user_metadata,
            **(metadata if metadata is not None else {})
        }
        step_options = WorkflowStepRuntimeOptions.make(
            step_type=StepType.FUNCTION,
            catch_exceptions=catch_exceptions if catch_exceptions is not None
            else self._step_options.catch_exceptions,
            max_retries=max_retries
            if max_retries is not None else self._step_options.max_retries,
            allow_inplace=allow_inplace
            if allow_inplace is not None else self._step_options.allow_inplace,
            checkpoint=_inherit_checkpoint_option(checkpoint),
            ray_options={
                **self._step_options.ray_options,
                **(ray_options if ray_options is not None else {}),
            },
        )
        return WorkflowStepFunction(self._func,
                                    step_options=step_options,
                                    name=name,
                                    metadata=metadata)
Example #4
0
    def __init__(
        self,
        func: Callable,
        *,
        step_options: "WorkflowStepRuntimeOptions" = None,
        name: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ):
        validate_user_metadata(metadata)
        self._func = func
        self._step_options = step_options
        self._func_signature = signature.extract_signature(func)
        self._name = name or ""
        self._user_metadata = metadata or {}

        # Override signature and docstring
        @functools.wraps(func)
        def _build_workflow(*args, **kwargs) -> Workflow:
            flattened_args = signature.flatten_args(self._func_signature, args,
                                                    kwargs)

            def prepare_inputs():
                from ray.workflow.api import _ensure_workflow_initialized

                _ensure_workflow_initialized()
                return serialization_context.make_workflow_inputs(
                    flattened_args)

            nonlocal step_options
            if step_options is None:
                step_options = WorkflowStepRuntimeOptions.make(
                    step_type=StepType.FUNCTION)
            # We could have "checkpoint=None" when we use @workflow.step
            # with arguments. Avoid this by updating it here.
            step_options.checkpoint = _inherit_checkpoint_option(
                step_options.checkpoint)

            workflow_data = WorkflowData(
                func_body=self._func,
                inputs=None,
                step_options=step_options,
                name=self._name,
                user_metadata=self._user_metadata,
            )
            return Workflow(workflow_data, prepare_inputs)

        self.step = _build_workflow
Example #5
0
def run(
    dag: DAGNode,
    dag_inputs: DAGInputData,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict] = None,
) -> ray.ObjectRef:
    """Run a workflow asynchronously."""
    validate_user_metadata(metadata)
    metadata = metadata or {}

    from ray.workflow.api import _ensure_workflow_initialized

    _ensure_workflow_initialized()

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    state = workflow_state_from_dag(dag, dag_inputs, workflow_id)
    logger.info(f'Workflow job created. [id="{workflow_id}"].')

    context = workflow_context.WorkflowStepContext(workflow_id=workflow_id)
    with workflow_context.workflow_step_context(context):
        # checkpoint the workflow
        ws = workflow_storage.get_workflow_storage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        job_id = ray.get_runtime_context().job_id.hex()

        try:
            ws.get_entrypoint_step_id()
            wf_exists = True
        except Exception:
            # The workflow does not exist. We must checkpoint entry workflow.
            ws.save_workflow_execution_state("", state)
            wf_exists = False
        workflow_manager = get_or_create_management_actor()
        if ray.get(workflow_manager.is_workflow_running.remote(workflow_id)):
            raise RuntimeError(f"Workflow '{workflow_id}' is already running.")
        if wf_exists:
            return resume(workflow_id)
        ignore_existing = ws.load_workflow_status() == WorkflowStatus.NONE
        ray.get(
            workflow_manager.submit_workflow.remote(
                workflow_id, state, ignore_existing=ignore_existing))
        return workflow_manager.execute_workflow.remote(job_id, context)
Example #6
0
    def __init__(self, **workflow_options: Dict[str, Any]):
        # TODO(suquark): More rigid arguments check like @ray.remote arguments. This is
        # fairly complex, but we should enable it later.
        valid_options = {
            "name",
            "metadata",
            "catch_exceptions",
            "allow_inplace",
            "checkpoint",
        }
        invalid_keywords = set(workflow_options.keys()) - valid_options
        if invalid_keywords:
            raise ValueError(
                f"Invalid option keywords {invalid_keywords} for workflow steps. "
                f"Valid ones are {valid_options}.")
        from ray.workflow.common import WORKFLOW_OPTIONS

        validate_user_metadata(workflow_options.get("metadata"))

        self.options = {"_metadata": {WORKFLOW_OPTIONS: workflow_options}}
Example #7
0
    def _node_visitor(node: Any) -> Any:
        if isinstance(node, FunctionNode):
            bound_options = node._bound_options.copy()
            num_returns = bound_options.get("num_returns", 1)
            if num_returns is None:  # ray could use `None` as default value
                num_returns = 1
            if num_returns > 1:
                raise ValueError("Workflow steps can only have one return.")

            workflow_options = bound_options.pop("_metadata",
                                                 {}).get(WORKFLOW_OPTIONS, {})

            # If checkpoint option is not specified, inherit checkpoint
            # options from context (i.e. checkpoint options of the outer
            # step). If it is still not specified, it's True by default.
            checkpoint = workflow_options.get("checkpoint", None)
            if checkpoint is None:
                checkpoint = context.checkpoint if context is not None else True
            # When it returns a nested workflow, catch_exception
            # should be passed recursively.
            catch_exceptions = workflow_options.get("catch_exceptions", None)
            if catch_exceptions is None:
                # TODO(suquark): should we also handle exceptions from a "leaf node"
                #   in the continuation? For example, we have a workflow
                #   > @ray.remote
                #   > def A(): pass
                #   > @ray.remote
                #   > def B(x): return x
                #   > @ray.remote
                #   > def C(x): return workflow.continuation(B.bind(A.bind()))
                #   > dag = C.options(**workflow.options(catch_exceptions=True)).bind()
                #   Should C catches exceptions of A?
                if node.get_stable_uuid() == dag_node.get_stable_uuid():
                    # 'catch_exception' context should be passed down to
                    # its direct continuation task.
                    # In this case, the direct continuation is the output node.
                    catch_exceptions = (context.catch_exceptions
                                        if context is not None else False)
                else:
                    catch_exceptions = False

            max_retries = bound_options.get("max_retries", 3)
            if not isinstance(max_retries, int) or max_retries < -1:
                raise ValueError(
                    "'max_retries' only accepts 0, -1 or a positive integer.")

            step_options = WorkflowStepRuntimeOptions(
                step_type=StepType.FUNCTION,
                catch_exceptions=catch_exceptions,
                max_retries=max_retries,
                allow_inplace=False,
                checkpoint=checkpoint,
                ray_options=bound_options,
            )

            workflow_refs: List[WorkflowRef] = []
            with serialization_context.workflow_args_serialization_context(
                    workflow_refs):
                _func_signature = signature.extract_signature(node._body)
                flattened_args = signature.flatten_args(
                    _func_signature, node._bound_args, node._bound_kwargs)
                # NOTE: When calling 'ray.put', we trigger python object
                # serialization. Under our serialization context,
                # Workflows are separated from the arguments,
                # leaving a placeholder object with all other python objects.
                # Then we put the placeholder object to object store,
                # so it won't be mutated later. This guarantees correct
                # semantics. See "tests/test_variable_mutable.py" as
                # an example.
                input_placeholder: ray.ObjectRef = ray.put(flattened_args)

            name = workflow_options.get("name")
            if name is None:
                name = f"{get_module(node._body)}.{slugify(get_qualname(node._body))}"
            task_id = ray.get(mgr.gen_step_id.remote(workflow_id, name))
            state.add_dependencies(task_id, [s.task_id for s in workflow_refs])
            state.task_input_args[task_id] = input_placeholder

            user_metadata = workflow_options.pop("metadata", {})
            validate_user_metadata(user_metadata)
            state.tasks[task_id] = Task(
                name=name,
                options=step_options,
                user_metadata=user_metadata,
                func_body=node._body,
            )
            return WorkflowRef(task_id)

        if isinstance(node, InputAttributeNode):
            return node._execute_impl()  # get data from input node
        if isinstance(node, InputNode):
            return input_context  # replace input node with input data
        if not isinstance(node, DAGNode):
            return node  # return normal objects
        raise TypeError(f"Unsupported DAG node: {node}")
Example #8
0
def run_async(
    dag: DAGNode,
    *args,
    workflow_id: Optional[str] = None,
    metadata: Optional[Dict[str, Any]] = None,
    **kwargs,
) -> ray.ObjectRef:
    """Run a workflow asynchronously.

    If the workflow with the given id already exists, it will be resumed.

    Args:
        workflow_id: A unique identifier that can be used to resume the
            workflow. If not specified, a random id will be generated.
        metadata: The metadata to add to the workflow. It has to be able
            to serialize to json.

    Returns:
       The running result as ray.ObjectRef.

    """
    _ensure_workflow_initialized()
    if not isinstance(dag, DAGNode):
        raise TypeError("Input should be a DAG.")
    input_data = DAGInputData(*args, **kwargs)
    validate_user_metadata(metadata)
    metadata = metadata or {}

    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}"

    state = workflow_state_from_dag(dag, input_data, workflow_id)
    logger.info(f'Workflow job created. [id="{workflow_id}"].')

    context = workflow_context.WorkflowStepContext(workflow_id=workflow_id)
    with workflow_context.workflow_step_context(context):
        # checkpoint the workflow
        ws = WorkflowStorage(workflow_id)
        ws.save_workflow_user_metadata(metadata)

        job_id = ray.get_runtime_context().job_id.hex()

        try:
            ws.get_entrypoint_step_id()
            wf_exists = True
        except Exception:
            # The workflow does not exist. We must checkpoint entry workflow.
            ws.save_workflow_execution_state("", state)
            wf_exists = False
        workflow_manager = workflow_access.get_management_actor()
        if ray.get(
                workflow_manager.is_workflow_non_terminating.remote(
                    workflow_id)):
            raise RuntimeError(
                f"Workflow '{workflow_id}' is already running or pending.")
        if wf_exists:
            return resume_async(workflow_id)
        ignore_existing = ws.load_workflow_status() == WorkflowStatus.NONE
        ray.get(
            workflow_manager.submit_workflow.remote(
                workflow_id, state, ignore_existing=ignore_existing))
        return workflow_manager.execute_workflow.remote(job_id, context)