def update_broken_commands(self): s = Session() s.begin() try: for c in s.query(Command).filter_by(exit_code=None): if not self.pid_exists(c.pid): c.progress = 1.0 c.exit_code = 666 c.finish_ts = int(time.time()) s.add(c) logger.info( 'Command {}, pid {} is considered broken, will be marked as ' 'finished'.format( c.uid, c.pid ) ) else: logger.warn( 'Command {}, pid {} is considered broken, but process is running'.format( c.uid, c.pid ) ) s.commit() except Exception: logger.exception('Failed to update broken commands') s.rollback() raise
def try_find_nonfailed_subprocess_and_status(self, job_id, task_id): if not task_id: return None, None for uid, subprocess in self.subprocesses.iteritems(): status = subprocess.status() # it is enough to check only task_id and ignore job_id if status.get('task_id') == task_id and (status['exit_code'] == 0 or status['progress'] < 1.0): return uid, status s = Session() # it is enough to check only task_id and ignore job_id query_start_ts = time.time() command = s.query(Command).filter(Command.task_id == task_id).order_by(desc(Command.start_ts)).first() logger.debug('find_nonfailed_subprocess query completed in {:.3f} seconds'.format(time.time() - query_start_ts)) if not command: return None, None if command.progress != 1.0: cmd_logger.warning( "Command {} ({}) was interrupted by minion restart, but not marked as finished".format( command.uid, command.command, ), extra={ 'job_id': job_id, 'task_id': task_id, }, ) return None, None if command.exit_code == 0: return command.uid, command.status return None, None
def status(self, uid): if uid in self.subprocesses: return self.subprocesses[uid].status() s = Session() command = s.query(Command).get(uid) if command is None: raise ValueError('Unknown command uid: {0}'.format(uid)) return command.status()
def run(self): self.start_ts = int(time.time()) self.process = Subprocess(self.cmd, stdout=Subprocess.STREAM, stderr=Subprocess.STREAM, env=self.env, io_loop=self.io_loop) # create db record s = Session() s.begin() command = minion.db.commands.Command( uid=self.uid, pid=self.process.pid, command=self.cmd_str, start_ts=int(time.time()), task_id=self.params.get('task_id'), job_id=self.params.get('job_id'), ) s.update_ts = int(time.time()) s.add(command) s.commit() self.watcher = self.watch(command) self.command = command
def run(self): self.start_ts = int(time.time()) self.process = Subprocess(self.cmd, stdout=Subprocess.STREAM, stderr=Subprocess.STREAM, env=self.env, io_loop=self.io_loop) # create db record s = Session() s.begin() command = minion.db.commands.Command( uid=self.uid, pid=self.process.pid, command=self.cmd_str, start_ts=int(time.time()), task_id=self.params.get('task_id'), ) s.update_ts = int(time.time()) s.add(command) s.commit() self.watcher = self.watch(command) self.command = command
def update_db_command(self): s = Session() s.begin() try: command = self.command command.progress = self.progress command.exit_code = self.exit_code command.command_code = self.command_code command.finish_ts = self.finish_ts s.add(command) s.commit() except Exception as e: logger.exception('Failed to update db command: {0}'.format(e)) s.rollback()
def on_update_progress(self): s = Session() s.begin() try: command = self.command command.progress = self.watcher.progress command.exit_code = self.watcher.exit_code command.command_code = self.command_code command.stdout = self.watcher.get_stdout() command.stderr = self.watcher.get_stderr() if command.exit_code is not None: command.artifacts = json.dumps(self.artifacts) command.finish_ts = self.finish_ts s.add(command) s.commit() except Exception: cmd_logger.exception('Failed to update db command', extra=self.log_extra) s.rollback()
def unfinished_commands(self, finish_ts_gte=None): res = {} # 1. Getting unfinished commands from local db s = Session() criterias = [] if finish_ts_gte: criterias.append(Command.finish_ts == None) criterias.append(Command.finish_ts >= finish_ts_gte) for c in s.query(Command).filter(or_(*criterias)): res[c.uid] = c.status() # 2. Updating with in-memory commands for uid, sp in self.subprocesses.iteritems(): cmd_status = sp.status() if (finish_ts_gte and cmd_status['finish_ts'] and cmd_status['finish_ts'] < finish_ts_gte): continue res[uid] = cmd_status return res
def feed(self, s): prev_progress = self.progress self.output.append(s) if abs(self.progress - prev_progress) > 0.01: try: s = Session() s.begin() self.command.progress = self.progress s.add(self.command) s.commit() except Exception as e: logger.exception('pid {0}: failed to update db command'.format( self.subprocess.pid)) pass
def on_update_progress(self): s = Session() s.begin() try: command = self.command command.progress = self.watcher.progress command.exit_code = self.watcher.exit_code command.command_code = self.command_code command.stdout = self.watcher.get_stdout() command.stderr = self.watcher.get_stderr() if command.exit_code is not None: command.artifacts = json.dumps(self.artifacts) command.finish_ts = self.finish_ts s.add(command) s.commit() except Exception: logger.exception('Failed to update db command') s.rollback()
def run(self): self.process = Subprocess(self.cmd, stdout=Subprocess.STREAM, stderr=Subprocess.STREAM, env=self.env, io_loop=self.io_loop) # create db record s = Session() s.begin() command = minion.db.commands.Command(uid=self.uid, pid=self.process.pid, command=self.cmd_str, start_ts=int(time.time()), task_id=self.params.get('task_id')) #TODO: #what about group_id, node, node_backend ? s.add(command) s.commit() self.watcher = self.watch(command)
def run(self): self.start_ts = int(time.time()) # create db record s = Session() s.begin() command = minion.db.commands.Command( uid=self.uid, pid=None, command=self.COMMAND, start_ts=self.start_ts, task_id=self.params.get('task_id'), job_id=self.params.get('job_id'), ) s.update_ts = int(time.time()) s.add(command) s.commit() try: yield self.execute() except Exception as e: cmd_logger.exception('Command execution failed', extra=self.log_extra) self.error = e self.finish_ts = int(time.time()) s.begin() try: command.progress = 1.0 command.exit_code = 1 if self.error else 0 command.command_code = 1 if self.error else 0 command.finish_ts = self.finish_ts command.artifacts = json.dumps(self.artifacts) s.add(command) s.commit() except Exception as e: cmd_logger.exception('Failed to update db command', extra=self.log_extra) s.rollback()
def run(self): # create db record s = Session() s.begin() command = minion.db.commands.Command( uid=self.uid, pid=None, command=self.cmd_str, start_ts=int(time.time()), task_id=self.params.get('task_id')) # TODO: what about group_id, node, node_backend ? s.add(command) s.commit() try: self.execute() except Exception as e: self.error = e # TODO: raise? self.finish_ts = int(time.time()) s.begin() try: command.progress = 1.0 command.exit_code = 1 if self.error else 0 command.command_code = 1 if self.error else 0 command.finish_ts = self.finish_ts s.add(command) s.commit() except Exception as e: logger.exception('Failed to update db command') s.rollback()
def update_broken_commands(self): s = Session() s.begin() try: for c in s.query(Command).filter_by(exit_code=None): if not self.pid_exists(c.pid): c.progress = 1.0 c.exit_code = 666 c.finish_ts = int(time.time()) s.add(c) logger.info('Command {0}, pid {1} is considered broken, ' 'will be marked as finished'.format( c.uid, c.pid)) else: logger.warn('Command {0}, pid {1} is considered broken, ' 'but process is running'.format(c.uid, c.pid)) s.commit() except Exception as e: logger.exception('Failed to update broken commands') s.rollback() raise
def run(self): self.start_ts = int(time.time()) # create db record s = Session() s.begin() command = minion.db.commands.Command( uid=self.uid, pid=None, command=self.COMMAND, start_ts=self.start_ts, task_id=self.params.get('task_id'), job_id=self.params.get('job_id'), ) s.update_ts = int(time.time()) s.add(command) s.commit() try: self.execute() except Exception as e: cmd_logger.exception('Command execution failed', extra=self.log_extra) self.error = e self.finish_ts = int(time.time()) s.begin() try: command.progress = 1.0 command.exit_code = 1 if self.error else 0 command.command_code = 1 if self.error else 0 command.finish_ts = self.finish_ts s.add(command) s.commit() except Exception as e: cmd_logger.exception('Failed to update db command', extra=self.log_extra) s.rollback()
def update_broken_commands(self): s = Session() s.begin() try: for c in s.query(Command).filter_by(exit_code=None): log_extra = {'task_id': c.task_id, 'job_id': c.job_id} if not self.pid_exists(c.pid): c.progress = 1.0 c.exit_code = 666 c.finish_ts = int(time.time()) s.add(c) cmd_logger.info( 'Command {}, pid {} is considered broken, will be marked as ' 'finished'.format(c.uid, c.pid), extra=log_extra, ) else: cmd_logger.warn( 'Command {}, pid {} is considered broken, but process is running' .format(c.uid, c.pid), extra=log_extra, ) s.commit() except Exception: logger.exception('Failed to update broken commands') s.rollback() raise
def run(self): # create db record s = Session() s.begin() command = minion.db.commands.Command( uid=self.uid, pid=None, command=self.cmd_str, start_ts=int(time.time()), task_id=self.params.get('task_id') ) # TODO: what about group_id, node, node_backend ? s.add(command) s.commit() try: self.execute() except Exception as e: self.error = e # TODO: raise? self.finish_ts = int(time.time()) s.begin() try: command.progress = 1.0 command.exit_code = 1 if self.error else 0 command.command_code = 1 if self.error else 0 command.finish_ts = self.finish_ts s.add(command) s.commit() except Exception as e: logger.exception('Failed to update db command') s.rollback()