def stop_computation(self): try: # reset pause flag self.database.commit() self.get_engine().stop_engine() if self.dbcontainer: self.dbcontainer.stop() self.database.close() except Exception as e: Logger.critical( "An error occurred while trying to pause Janis state: " + str(e) + "\n\nSee the logfile for more information: " + Logger.WRITE_LOCATION ) Logger.close_file()
def resume(self): """ Resume takes an initialised DB, looks for the engine (if it's around), or starts it and monitors the status, updating the DB every so often :return: """ # get a logfile and start doing stuff try: logsdir = self.get_path_for_component(self.WorkflowManagerPath.logs) Logger.set_write_location(os.path.join(logsdir, "janis-monitor.log")) # in case anything relies on CD, we'll throw it into janis/execution os.chdir(self.get_path_for_component(self.WorkflowManagerPath.execution)) self.start_engine_if_required() if self.database.workflowmetadata.please_abort: Logger.info("Detected please_abort request, aborting") return self.abort() if self.database.workflowmetadata.please_pause: Logger.info("Detecting please_pause request, exiting") return self.stop_computation() # check status and see if we can resume self.submit_workflow_if_required() self.database.commit() self.get_engine().add_callback( self.get_engine_wid(), lambda meta: self.main_queue.put(lambda: self.save_metadata(meta)), ) # add extra check for engine on resume meta = self._engine.metadata(self.get_engine_wid()) if meta and meta.status in TaskStatus.final_states(): self.save_metadata(meta) return self.process_completed_task() while True: try: cb = self.main_queue.get(False) # callback from add_callback() returns True if in TaskStatus.final_states() res = cb() if res is True: break except queue.Empty: if self.database.workflowmetadata.please_abort: self.abort() return if self.database.workflowmetadata.please_pause: self.database.workflowmetadata.please_pause = False return self.stop_computation() continue self.process_completed_task() except Exception as e: import traceback err = traceback.format_exc() Logger.critical( f"A fatal error occurred while monitoring workflow = '{self.wid}', exiting: {e}: {err}" ) try: self.database.workflowmetadata.status = TaskStatus.FAILED self.database.workflowmetadata.error = traceback.format_exc() self.database.commit() self.get_engine().stop_engine() if self.dbcontainer: self.dbcontainer.stop() self.database.close() except Exception as e: Logger.critical( "An additional fatal error occurred while trying to store Janis state: " + str(e) + "\n\nSee the logfile for more information: " + Logger.WRITE_LOCATION ) Logger.close_file() return self