예제 #1
0
    def stop_computation(self):
        try:
            # reset pause flag
            self.database.commit()

            self.get_engine().stop_engine()
            if self.dbcontainer:
                self.dbcontainer.stop()

            self.database.close()

        except Exception as e:
            Logger.critical(
                "An error occurred while trying to pause Janis state: "
                + str(e)
                + "\n\nSee the logfile for more information: "
                + Logger.WRITE_LOCATION
            )
        Logger.close_file()
예제 #2
0
    def resume(self):
        """
        Resume takes an initialised DB, looks for the engine (if it's around),
        or starts it and monitors the status, updating the DB every so often
        :return:
        """

        # get a logfile and start doing stuff
        try:
            logsdir = self.get_path_for_component(self.WorkflowManagerPath.logs)

            Logger.set_write_location(os.path.join(logsdir, "janis-monitor.log"))

            # in case anything relies on CD, we'll throw it into janis/execution
            os.chdir(self.get_path_for_component(self.WorkflowManagerPath.execution))

            self.start_engine_if_required()

            if self.database.workflowmetadata.please_abort:
                Logger.info("Detected please_abort request, aborting")
                return self.abort()

            if self.database.workflowmetadata.please_pause:
                Logger.info("Detecting please_pause request, exiting")
                return self.stop_computation()

            # check status and see if we can resume
            self.submit_workflow_if_required()

            self.database.commit()
            self.get_engine().add_callback(
                self.get_engine_wid(),
                lambda meta: self.main_queue.put(lambda: self.save_metadata(meta)),
            )

            # add extra check for engine on resume
            meta = self._engine.metadata(self.get_engine_wid())
            if meta and meta.status in TaskStatus.final_states():
                self.save_metadata(meta)
                return self.process_completed_task()

            while True:
                try:
                    cb = self.main_queue.get(False)
                    # callback from add_callback() returns True if in TaskStatus.final_states()
                    res = cb()
                    if res is True:
                        break
                except queue.Empty:

                    if self.database.workflowmetadata.please_abort:
                        self.abort()
                        return
                    if self.database.workflowmetadata.please_pause:
                        self.database.workflowmetadata.please_pause = False
                        return self.stop_computation()

                    continue

            self.process_completed_task()

        except Exception as e:
            import traceback

            err = traceback.format_exc()
            Logger.critical(
                f"A fatal error occurred while monitoring workflow = '{self.wid}', exiting: {e}: {err}"
            )

            try:
                self.database.workflowmetadata.status = TaskStatus.FAILED
                self.database.workflowmetadata.error = traceback.format_exc()
                self.database.commit()

                self.get_engine().stop_engine()
                if self.dbcontainer:
                    self.dbcontainer.stop()

                self.database.close()
            except Exception as e:
                Logger.critical(
                    "An additional fatal error occurred while trying to store Janis state: "
                    + str(e)
                    + "\n\nSee the logfile for more information: "
                    + Logger.WRITE_LOCATION
                )

        Logger.close_file()

        return self