Beispiel #1
0
async def _write_step_inputs(wf_storage: workflow_storage.WorkflowStorage,
                             step_id: StepID, inputs: WorkflowData) -> None:
    """Save workflow inputs."""
    metadata = inputs.to_metadata()
    with serialization_context.workflow_args_keeping_context():
        # TODO(suquark): in the future we should write to storage directly
        # with plasma store object in memory.
        args_obj = ray.get(inputs.inputs.args)
    workflow_id = wf_storage._workflow_id
    storage = wf_storage._storage
    save_tasks = [
        # TODO (Alex): Handle the json case better?
        wf_storage._put(wf_storage._key_step_input_metadata(step_id), metadata,
                        True),
        wf_storage._put(wf_storage._key_step_user_metadata(step_id),
                        inputs.user_metadata, True),
        serialization.dump_to_storage(
            wf_storage._key_step_function_body(step_id),
            inputs.func_body,
            workflow_id,
            storage,
        ),
        serialization.dump_to_storage(wf_storage._key_step_args(step_id),
                                      args_obj, workflow_id, storage),
    ]
    await asyncio.gather(*save_tasks)
Beispiel #2
0
def test_embedded_objectrefs(workflow_start_regular):
    workflow_id = test_embedded_objectrefs.__name__

    class ObjectRefsWrapper:
        def __init__(self, refs):
            self.refs = refs

    from ray.internal.storage import _storage_uri

    wrapped = ObjectRefsWrapper([ray.put(1), ray.put(2)])

    store = workflow_storage.get_workflow_storage(workflow_id)
    serialization.dump_to_storage("key", wrapped, workflow_id, store)

    # Be extremely explicit about shutting down. We want to make sure the
    # `_get` call deserializes the full object and puts it in the object store.
    # Shutting down the cluster should guarantee we don't accidently get the
    # old object and pass the test.
    ray.shutdown()
    subprocess.check_output("ray stop --force", shell=True)

    ray.init(storage=_storage_uri)
    workflow.init()
    storage2 = workflow_storage.get_workflow_storage(workflow_id)

    result = storage2._get("key")
    assert ray.get(result.refs) == [1, 2]
Beispiel #3
0
    def save_step_output(self, step_id: StepID, ret: Union[Workflow, Any], *,
                         exception: Optional[Exception],
                         outer_most_step_id: StepID) -> None:
        """When a workflow step returns,
        1. If the returned object is a workflow, this means we are a nested
           workflow. We save the output metadata that points to the workflow.
        2. Otherwise, checkpoint the output.

        Args:
            step_id: The ID of the workflow step. If it is an empty string,
                it means we are in the workflow job driver process.
            ret: The returned object from a workflow step.
            exception: This step should throw exception.
            outer_most_step_id: See WorkflowStepContext.
        """
        tasks = []
        dynamic_output_id = None
        if isinstance(ret, Workflow):
            # This workflow step returns a nested workflow.
            assert step_id != ret.step_id
            assert exception is None
            tasks.append(
                self._put(
                    self._key_step_output_metadata(step_id),
                    {"output_step_id": ret.step_id}, True))
            dynamic_output_id = ret.step_id
        else:
            if exception is None:
                # This workflow step returns a object.
                ret = ray.get(ret) if isinstance(ret, ray.ObjectRef) else ret
                promise = serialization.dump_to_storage(
                    self._key_step_output(step_id), ret, self._workflow_id,
                    self._storage)
                tasks.append(promise)
                # tasks.append(self._put(self._key_step_output(step_id), ret))
                dynamic_output_id = step_id
                # TODO (yic): Delete exception file
            else:
                assert ret is None
                promise = serialization.dump_to_storage(
                    self._key_step_exception(step_id), exception,
                    self._workflow_id, self._storage)
                tasks.append(promise)
                # tasks.append(
                #     self._put(self._key_step_exception(step_id), exception))

        # Finish checkpointing.
        asyncio_run(asyncio.gather(*tasks))

        # NOTE: if we update the dynamic output before
        # finishing checkpointing, then during recovery, the dynamic could
        # would point to a checkpoint that does not exist.
        if dynamic_output_id is not None:
            asyncio_run(
                self._update_dynamic_output(outer_most_step_id,
                                            dynamic_output_id))
Beispiel #4
0
    def save_step_output(self, step_id: StepID, ret: Union[Workflow, Any],
                         exception: Optional[Exception],
                         outer_most_step_id: Optional[StepID]) -> None:
        """When a workflow step returns,
        1. If the returned object is a workflow, this means we are a nested
           workflow. We save the output metadata that points to the workflow.
        2. Otherwise, checkpoint the output.

        Args:
            step_id: The ID of the workflow step. If it is an empty string,
                it means we are in the workflow job driver process.
            ret: The returned object from a workflow step.
            exception: This step should throw exception.
            outer_most_step_id: See
                "step_executor.execute_workflow" for explanation.
        """
        tasks = []
        if isinstance(ret, Workflow):
            # This workflow step returns a nested workflow.
            assert step_id != ret.step_id
            assert exception is None
            tasks.append(
                self._put(self._key_step_output_metadata(step_id),
                          {"output_step_id": ret.step_id}, True))
            dynamic_output_id = ret.step_id
        else:
            if exception is None:
                # This workflow step returns a object.
                ret = ray.get(ret) if isinstance(ret, ray.ObjectRef) else ret
                promise = serialization.dump_to_storage(
                    self._key_step_output(step_id), ret, self._workflow_id,
                    self._storage)
                tasks.append(promise)
                # tasks.append(self._put(self._key_step_output(step_id), ret))
                dynamic_output_id = step_id
                # TODO (yic): Delete exception file

                # outer_most_step_id == "" indicates the root step of a
                # workflow. This would directly update "outputs.json" in
                # the workflow dir, and we want to avoid it.
                if outer_most_step_id is not None and outer_most_step_id != "":
                    tasks.append(
                        self._update_dynamic_output(outer_most_step_id,
                                                    dynamic_output_id))
            else:
                assert ret is None
                promise = serialization.dump_to_storage(
                    self._key_step_exception(step_id), exception,
                    self._workflow_id, self._storage)
                tasks.append(promise)
                # tasks.append(
                #     self._put(self._key_step_exception(step_id), exception))

        asyncio_run(asyncio.gather(*tasks))
Beispiel #5
0
    def save_workflow_execution_state(self, creator_task_id: TaskID,
                                      state: WorkflowExecutionState) -> None:
        """Save a workflow execution state.
        Typically, the state is translated from a Ray DAG.

        Args:
            creator_task_id: The ID of the task that creates the state.
            state: The state converted from the DAG.
        """
        assert creator_task_id != state.output_task_id

        for task_id, task in state.tasks.items():
            # TODO (Alex): Handle the json case better?
            metadata = {
                **task.to_dict(),
                "workflow_refs":
                state.upstream_dependencies[task_id],
            }
            self._put(self._key_step_input_metadata(task_id), metadata, True)
            # TODO(suquark): The task user metadata duplicates.
            self._put(
                self._key_step_user_metadata(task_id),
                task.user_metadata,
                True,
            )
            workflow_id = self._workflow_id
            serialization.dump_to_storage(
                self._key_step_function_body(task_id),
                task.func_body,
                workflow_id,
                self,
            )
            with serialization_context.workflow_args_keeping_context():
                # TODO(suquark): in the future we should write to storage directly
                # with plasma store object in memory.
                args_obj = ray.get(state.task_input_args[task_id])
            serialization.dump_to_storage(
                self._key_step_args(task_id),
                args_obj,
                workflow_id,
                self,
            )

        # Finally, point to the output ID of the DAG. The DAG is a continuation
        # of the creator task.
        self._put(
            self._key_step_output_metadata(creator_task_id),
            {"output_step_id": state.output_task_id},
            True,
        )
Beispiel #6
0
def test_embedded_objectrefs(workflow_start_regular):
    workflow_id = test_embedded_objectrefs.__name__
    base_storage = storage.get_global_storage()

    class ObjectRefsWrapper:
        def __init__(self, refs):
            self.refs = refs

    url = base_storage.storage_url

    wrapped = ObjectRefsWrapper([ray.put(1), ray.put(2)])

    promise = serialization.dump_to_storage(["key"], wrapped, workflow_id,
                                            base_storage)
    workflow_storage.asyncio_run(promise)

    # Be extremely explicit about shutting down. We want to make sure the
    # `_get` call deserializes the full object and puts it in the object store.
    # Shutting down the cluster should guarantee we don't accidently get the
    # old object and pass the test.
    ray.shutdown()
    subprocess.check_output("ray stop --force", shell=True)

    workflow.init(url)
    storage2 = workflow_storage.get_workflow_storage(workflow_id)

    result = workflow_storage.asyncio_run(storage2._get(["key"]))
    assert ray.get(result.refs) == [1, 2]
Beispiel #7
0
    def _put(self, key: str, data: Any, is_json: bool = False) -> str:
        """Serialize and put an object in the object store.

        Args:
            key: The key of the object.
            data: The data to be stored.
            is_json: If true, json encode the data, otherwise pickle it.
        """
        # TODO(suquark): Currently put to file is not atomic -- you can get a partial
        # file. This could fail workflow recovery.
        try:
            if not is_json:
                serialization.dump_to_storage(
                    key, data, self._workflow_id, storage=self
                )
            else:
                serialized_data = json.dumps(data).encode()
                self._storage.put(key, serialized_data)
        except Exception as e:
            raise DataSaveError from e

        return key
Beispiel #8
0
    def save_step_output(
        self,
        task_id: TaskID,
        ret: Any,
        *,
        exception: Optional[Exception],
    ) -> None:
        """When a workflow step returns,
        1. If the returned object is a workflow, this means we are a nested
           workflow. We save the output metadata that points to the workflow.
        2. Otherwise, checkpoint the output.

        Args:
            task_id: The ID of the workflow step. If it is an empty string,
                it means we are in the workflow job driver process.
            ret: The returned object from a workflow step.
            exception: This step should throw exception.
        """
        if exception is None:
            # This workflow step returns a object.
            ret = ray.get(ret) if isinstance(ret, ray.ObjectRef) else ret
            serialization.dump_to_storage(
                self._key_step_output(task_id),
                ret,
                self._workflow_id,
                storage=self,
            )
            # tasks.append(self._put(self._key_step_output(task_id), ret))
            # TODO (yic): Delete exception file
        else:
            assert ret is None
            serialization.dump_to_storage(
                self._key_step_exception(task_id),
                exception,
                self._workflow_id,
                storage=self,
            )