def _next_job(self, session): timeout = ProgressiveTimeout(0.5, 6.0, 0.5) job = None while job is None and self._running: try: with self._lock: job = session.query(McoreJob).filter(McoreJob.state == runstates.WAITING)\ .order_by(McoreJob.priority).first() if job is not None: job.state = runstates.RUNNING session.commit() except Exception as e: self._log.exception(e) job = None if job is None and self._running: timeout.sleep() #TODO Guess how to use a Condition (self._run_cvar) or an Event return job
def _run(self): self._log.debug("Run thread ready") session = self._create_session() try: while self._running: set_thread_title() job = self._next_job(session) if job is None: break set_thread_title("{}".format(job.name)) self._log.debug("Running task [{}] {} ...".format(job.id, job.name)) # Prepare execution script, env = job.script, job.env sb = ["Script:\n", script] if len(env) > 0: for k, v in env.iteritems(): sb += ["\n{}={}".format(k, v)] self._log.debug("".join(sb)) o = open(job.output, "w") cwd = self._conf.get("working_directory") if cwd is not None: cwd = os.path.abspath(cwd) job.hostname = self.hostname job.started = datetime.now() self._update_job(session, job) # Prepare the script file fd, script_path = tempfile.mkstemp(prefix=job.name + "-", suffix=".sh") with os.fdopen(fd, "w") as f: f.write(script) # Run the script exception_trace = None try: process = subprocess.Popen( args=["/bin/bash", script_path], stdin=None, stdout=o, stderr=subprocess.STDOUT, cwd=cwd, env=env, preexec_fn=os.setsid) set_thread_title("{} PID={}".format(job.name, process.pid)) session.refresh(job, ["state"]) timeout = ProgressiveTimeout(0.5, 6.0, 0.5) while job.state == runstates.RUNNING and process.poll() is None and not self._kill_threads: timeout.sleep() session.refresh(job, ["state"]) job.finished = datetime.now() if process.poll() is None: self._log.info("Killing job [{}] {} ...".format(job.id, job.name)) os.killpg(process.pid, signal.SIGTERM) timeout = ProgressiveTimeout(0.5, 6.0, 0.5) while process.poll() is None: timeout.sleep() job.state = runstates.ABORTED job.exitcode = process.returncode elif job.state == runstates.ABORTING: job.state = runstates.ABORTED else: job.state = runstates.FINISHED if process.returncode == 0 else runstates.FAILED job.exitcode = process.returncode except Exception as e: self._log.exception(e) job.state = runstates.FAILED job.finished = datetime.now() job.exitcode = exit_codes.RUN_THREAD_EXCEPTION finally: try: if process.poll() is None: self._log.info("Killing job [{}] {} ...".format(job.id, job.name)) os.killpg(process.pid, signal.SIGKILL) process.wait() except: self._log.exception("Exception while waiting for process {} to finish".format(process.pid)) o.close() if os.path.exists(script_path): os.remove(script_path) if job.state == runstates.FINISHED: self._log.debug("Job finished [{}] {}".format(job.id, job.name)) elif job.state == runstates.ABORTED: self._log.debug("Job aborted [{}] {}".format(job.id, job.name)) else: self._log.debug("Job failed [{}] {}".format(job.id, job.name)) self._update_job(session, job) except: self._log.exception("Unexpected exception in thread {}".format(current_thread().name)) finally: session.close() self._log.debug("Run thread finished")