Example #1
0
    def update_step_status(self, workflow_id: str, step_id: str,
                           status: common.WorkflowStatus):
        # Note: For virtual actor, we could add more steps even if
        # the workflow finishes.
        self._step_status.setdefault(workflow_id, {})
        if status == common.WorkflowStatus.SUCCESSFUL:
            self._step_status[workflow_id].pop(step_id, None)
        else:
            self._step_status.setdefault(workflow_id, {})[step_id] = status
        remaining = len(self._step_status[workflow_id])
        if status != common.WorkflowStatus.RUNNING:
            self._step_output_cache.pop((workflow_id, step_id), None)

        if status != common.WorkflowStatus.FAILED and remaining != 0:
            return

        wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)

        if status == common.WorkflowStatus.FAILED:
            if workflow_id in self._workflow_outputs:
                cancel_job(self._workflow_outputs.pop(workflow_id).output)
            wf_store.save_workflow_meta(
                common.WorkflowMetaData(common.WorkflowStatus.FAILED))
            self._step_status.pop(workflow_id)
        else:
            # remaining = 0
            wf_store.save_workflow_meta(
                common.WorkflowMetaData(common.WorkflowStatus.SUCCESSFUL))
            self._step_status.pop(workflow_id)
Example #2
0
    def get_output(self, workflow_id: str) -> "ray.ObjectRef":
        """Get the output of a running workflow.

        Args:
            workflow_id: The ID of a workflow job.

        Returns:
            An object reference that can be used to retrieve the
            workflow result.
        """
        if workflow_id in self._workflow_outputs:
            return self._workflow_outputs[workflow_id].output
        wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)
        meta = wf_store.load_workflow_meta()
        if meta is None:
            raise ValueError(f"No such workflow {workflow_id}")
        if meta == common.WorkflowStatus.FAILED:
            raise ValueError(
                f"Workflow {workflow_id} failed, please resume it")
        step_id = wf_store.get_entrypoint_step_id()
        result = recovery.resume_workflow_step(workflow_id, step_id,
                                               self._store.storage_url)
        latest_output = LatestWorkflowOutput(result.persisted_output,
                                             workflow_id, step_id)
        self._workflow_outputs[workflow_id] = latest_output
        wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)
        wf_store.save_workflow_meta(
            common.WorkflowMetaData(common.WorkflowStatus.RUNNING))
        self._step_status.setdefault(workflow_id, {})
        # "persisted_output" is the return value of a step or the state of
        # a virtual actor.
        return result.persisted_output
Example #3
0
    def run_or_resume(self, workflow_id: str, ignore_existing: bool = False
                      ) -> "WorkflowExecutionResult":
        """Run or resume a workflow.

        Args:
            workflow_id: The ID of the workflow.
            ignore_existing: Ignore we already have an existing output. When
            set false, raise an exception if there has already been a workflow
            running with this id

        Returns:
            Workflow execution result that contains the state and output.
        """
        if workflow_id in self._workflow_outputs and not ignore_existing:
            raise RuntimeError(f"The output of workflow[id={workflow_id}] "
                               "already exists.")
        wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)
        step_id = wf_store.get_entrypoint_step_id()
        result = recovery.resume_workflow_step(workflow_id, step_id,
                                               self._store.storage_url)

        latest_output = LatestWorkflowOutput(result.persisted_output,
                                             workflow_id, step_id)
        self._workflow_outputs[workflow_id] = latest_output
        self._step_output_cache[workflow_id, step_id] = latest_output

        wf_store.save_workflow_meta(
            common.WorkflowMetaData(common.WorkflowStatus.RUNNING))

        if workflow_id not in self._step_status:
            self._step_status[workflow_id] = {}
            logger.info(f"Workflow job [id={workflow_id}] started.")
        return result
Example #4
0
    def update_step_status(self, workflow_id: str, step_id: str,
                           status: common.WorkflowStatus):
        if status == common.WorkflowStatus.FINISHED:
            self._step_status[workflow_id].pop(step_id, None)
        else:
            self._step_status.setdefault(workflow_id, {})[step_id] = status
        remaining = len(self._step_status[workflow_id])

        if status != common.WorkflowStatus.RESUMABLE and remaining != 0:
            return

        wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)

        if status == common.WorkflowStatus.RESUMABLE:
            if workflow_id in self._workflow_outputs:
                cancel_job(self._workflow_outputs.pop(workflow_id))
            wf_store.save_workflow_meta(
                common.WorkflowMetaData(common.WorkflowStatus.RESUMABLE))
            self._step_status.pop(workflow_id)
        else:
            # remaining = 0
            wf_store.save_workflow_meta(
                common.WorkflowMetaData(common.WorkflowStatus.FINISHED))
            self._step_status.pop(workflow_id)
Example #5
0
    def run_or_resume(self, workflow_id: str) -> ray.ObjectRef:
        """Run or resume a workflow.

        Args:
            workflow_id: The ID of the workflow.

        Returns:
            An object reference that can be used to retrieve the
            workflow result.
        """
        if workflow_id in self._workflow_outputs:
            raise ValueError(f"The output of workflow[id={workflow_id}] "
                             "already exists.")
        output = recovery.resume_workflow_job.remote(workflow_id,
                                                     self._store.storage_url)
        self._workflow_outputs[workflow_id] = output
        wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)
        wf_store.save_workflow_meta(
            common.WorkflowMetaData(common.WorkflowStatus.RUNNING))
        self._step_status[workflow_id] = {}
        logger.info(f"Workflow job [id={workflow_id}] started.")
        return output
Example #6
0
 def cancel_workflow(self, workflow_id: str) -> None:
     self._step_status.pop(workflow_id)
     cancel_job(self._workflow_outputs.pop(workflow_id).output)
     wf_store = workflow_storage.WorkflowStorage(workflow_id, self._store)
     wf_store.save_workflow_meta(
         common.WorkflowMetaData(common.WorkflowStatus.CANCELED))