def startit(): if not started: if process.get_state() in RUNNING_STATES: raise RPCError(Faults.ALREADY_STARTED, name) process.spawn() if process.spawnerr: raise RPCError(Faults.SPAWN_ERROR, name) # we use a list here to fake out lexical scoping; # using a direct assignment to 'started' in the # function appears to not work (symptom: 2nd or 3rd # call through, it forgets about 'started', claiming # it's undeclared). started.append(monotonic_time()) if not wait or not startsecs: return True t = monotonic_time() runtime = (t - started[0]) state = process.get_state() if state not in (ProcessStates.STARTING, ProcessStates.RUNNING): raise RPCError(Faults.ABNORMAL_TERMINATION, name) if runtime < startsecs: return NOT_DONE_YET if state == ProcessStates.RUNNING: return True raise RPCError(Faults.ABNORMAL_TERMINATION, name)
def transition(self): now = monotonic_time() state = self.state logger = self.config.options.logger if self.config.options.mood > SupervisorStates.RESTARTING: # dont start any processes if supervisor is shutting down if state == ProcessStates.EXITED: if self.config.autorestart: if self.config.autorestart is RestartUnconditionally: # EXITED -> STARTING self.spawn() else: # autorestart is RestartWhenExitUnexpected if self.exitstatus not in self.config.exitcodes: # EXITED -> STARTING self.spawn() elif state == ProcessStates.STOPPED and not self.laststart: if self.config.autostart: # STOPPED -> STARTING self.spawn() elif state == ProcessStates.BACKOFF: if self.backoff <= self.config.startretries: if now > self.delay: # BACKOFF -> STARTING self.spawn() if state == ProcessStates.STARTING: if now - self.laststart > self.config.startsecs: # STARTING -> RUNNING if the proc has started # successfully and it has stayed up for at least # proc.config.startsecs, self.delay = 0 self.backoff = 0 self._assertInState(ProcessStates.STARTING) self.change_state(ProcessStates.RUNNING) msg = ( 'entered RUNNING state, process has stayed up for ' '> than %s seconds (startsecs)' % self.config.startsecs) logger.info('success: %s %s' % (self.config.name, msg)) if state == ProcessStates.BACKOFF: if self.backoff > self.config.startretries: # BACKOFF -> FATAL if the proc has exceeded its number # of retries self.give_up() msg = ('entered FATAL state, too many start retries too ' 'quickly') logger.info('gave up: %s %s' % (self.config.name, msg)) elif state == ProcessStates.STOPPING: time_left = self.delay - now if time_left <= 0: # kill processes which are taking too long to stop with a final # sigkill. if this doesn't kill it, the process will be stuck # in the STOPPING state forever. self.config.options.logger.warn( 'killing %r (%s) with SIGKILL' % (self.config.name, self.pid)) self.kill(signal.SIGKILL)
def _spawn_as_parent(self, pid): # Parent self.pid = pid options = self.config.options options.close_child_pipes(self.pipes) options.logger.info('spawned: %r with pid %s' % (self.config.name, pid)) self.spawnerr = None self.delay = monotonic_time() + self.config.startsecs options.pidhistory[pid] = self return pid
def dispatch(self): while self.event_buffer: # dispatch the oldest event event = self.event_buffer.pop(0) ok = self._dispatchEvent(event) if not ok: # if we can't dispatch an event, rebuffer it and stop trying # to process any further events in the buffer self._acceptEvent(event, head=True) break self.last_dispatch = monotonic_time()
def transition(self): processes = self.processes.values() dispatch_capable = False for process in processes: process.transition() # this is redundant, we do it in _dispatchEvent too, but we # want to reduce function call overhead if process.state == ProcessStates.RUNNING: if process.listener_state == EventListenerStates.READY: dispatch_capable = True if dispatch_capable: if self.dispatch_throttle: now = monotonic_time() if now - self.last_dispatch < self.dispatch_throttle: return self.dispatch()
def tick(self, now=None): """ Send one or more 'tick' events when the timeslice related to the period for the event type rolls over """ if now is None: # now won't be None in unit tests now = monotonic_time() for event in events.TICK_EVENTS: period = event.period last_tick = self.ticks.get(period) if last_tick is None: # we just started up last_tick = self.ticks[period] = timeslice(period, now) this_tick = timeslice(period, now) if this_tick != last_tick: self.ticks[period] = this_tick events.notify(event(this_tick, self))
def change_state(self, new_state, expected=True): old_state = self.state if new_state is old_state: # exists for unit tests return False event_class = self.event_map.get(new_state) if event_class is not None: event = event_class(self, old_state, expected) events.notify(event) if new_state == ProcessStates.BACKOFF: now = monotonic_time() self.backoff = self.backoff + 1 self.delay = now + self.backoff self.state = new_state
def shutdown_report(self): unstopped = [] for group in self.process_groups.values(): unstopped.extend(group.get_unstopped_processes()) if unstopped: # throttle 'waiting for x to die' reports now = monotonic_time() if now > (self.lastshutdownreport + 3): # every 3 secs names = [ p.config.name for p in unstopped ] namestr = ', '.join(names) self.options.logger.info('waiting for %s to die' % namestr) self.lastshutdownreport = now for proc in unstopped: state = getProcessStateDescription(proc.get_state()) self.options.logger.blather( '%s state: %s' % (proc.config.name, state)) return unstopped
def getProcessInfo(self, name): """ Get info about a process named name @param string name The name of the process (or 'group:name') @return struct result A structure containing data about the process """ self._update('getProcessInfo') group, process = self._getGroupAndProcess(name) if process is None: raise RPCError(Faults.BAD_NAME, name) start = int(process.laststart) stop = int(process.laststop) now = int(monotonic_time()) state = process.get_state() spawnerr = process.spawnerr or '' exitstatus = process.exitstatus or 0 stdout_logfile = process.config.stdout_logfile or '' stderr_logfile = process.config.stderr_logfile or '' info = { 'name':process.config.name, 'group':group.config.name, 'start':start, 'stop':stop, 'now':now, 'state':state, 'statename':getProcessStateDescription(state), 'spawnerr':spawnerr, 'exitstatus':exitstatus, 'logfile':stdout_logfile, # b/c alias 'stdout_logfile':stdout_logfile, 'stderr_logfile':stderr_logfile, 'pid':process.pid, } description = self._interpretProcessInfo(info) info['description'] = description return info
def finish(self, pid, sts): """ The process was reaped and we need to report and manage its state """ self.drain() es, msg = decode_wait_status(sts) now = monotonic_time() self.laststop = now processname = self.config.name tooquickly = now - self.laststart < self.config.startsecs exit_expected = es in self.config.exitcodes if self.killing: # likely the result of a stop request # implies STOPPING -> STOPPED self.killing = 0 self.delay = 0 self.exitstatus = es msg = "stopped: %s (%s)" % (processname, msg) self._assertInState(ProcessStates.STOPPING) self.change_state(ProcessStates.STOPPED) elif tooquickly: # the program did not stay up long enough to make it to RUNNING # implies STARTING -> BACKOFF self.exitstatus = None self.spawnerr = 'Exited too quickly (process log may have details)' msg = "exited: %s (%s)" % (processname, msg + "; not expected") self._assertInState(ProcessStates.STARTING) self.change_state(ProcessStates.BACKOFF) else: # this finish was not the result of a stop request, the # program was in the RUNNING state but exited implies # RUNNING -> EXITED self.delay = 0 self.backoff = 0 self.exitstatus = es if self.state == ProcessStates.STARTING: # XXX I don't know under which circumstances this # happens, but in the wild, there is a transition that # subverts the RUNNING state (directly from STARTING # to EXITED), so we perform the correct transition # here. self.change_state(ProcessStates.RUNNING) self._assertInState(ProcessStates.RUNNING) if exit_expected: # expected exit code msg = "exited: %s (%s)" % (processname, msg + "; expected") self.change_state(ProcessStates.EXITED, expected=True) else: # unexpected exit code self.spawnerr = 'Bad exit code %s' % es msg = "exited: %s (%s)" % (processname, msg + "; not expected") self.change_state(ProcessStates.EXITED, expected=False) self.config.options.logger.info(msg) self.pid = 0 self.config.options.close_parent_pipes(self.pipes) self.pipes = {} self.dispatchers = {} # if we died before we processed the current event (only happens # if we're an event listener), notify the event system that this # event was rejected so it can be processed again. if self.event is not None: # Note: this should only be true if we were in the BUSY # state when finish() was called. events.notify(events.EventRejectedEvent(self, self.event)) self.event = None
def kill(self, sig): """Send a signal to the subprocess. This may or may not kill it. Return None if the signal was sent, or an error message string if an error occurred or if the subprocess is not running. """ now = monotonic_time() options = self.config.options # Properly stop processes in BACKOFF state. if self.state == ProcessStates.BACKOFF: msg = ("Attempted to kill %s, which is in BACKOFF state." % (self.config.name)) options.logger.debug(msg) self.change_state(ProcessStates.STOPPED) return None if not self.pid: msg = ("attempted to kill %s with sig %s but it wasn't running" % (self.config.name, signame(sig))) options.logger.debug(msg) return msg #If we're in the stopping state, then we've already sent the stop #signal and this is the kill signal if self.state == ProcessStates.STOPPING: killasgroup = self.config.killasgroup else: killasgroup = self.config.stopasgroup as_group = "" if killasgroup: as_group = "process group " options.logger.debug('killing %s (pid %s) %swith signal %s' % (self.config.name, self.pid, as_group, signame(sig)) ) # RUNNING/STARTING/STOPPING -> STOPPING self.killing = 1 self.delay = now + self.config.stopwaitsecs # we will already be in the STOPPING state if we're doing a # SIGKILL as a result of overrunning stopwaitsecs self._assertInState(ProcessStates.RUNNING,ProcessStates.STARTING, ProcessStates.STOPPING) self.change_state(ProcessStates.STOPPING) pid = self.pid if killasgroup: # send to the whole process group instead pid = -self.pid try: options.kill(pid, sig) except: io = StringIO() traceback.print_exc(file=io) tb = io.getvalue() msg = 'unknown problem killing %s (%s):%s' % (self.config.name, self.pid, tb) options.logger.critical(msg) self.change_state(ProcessStates.UNKNOWN) self.pid = 0 self.killing = 0 self.delay = 0 return msg return None
def spawn(self): """Start the subprocess. It must not be running already. Return the process id. If the fork() call fails, return None. """ options = self.config.options if self.pid: msg = 'process %r already running' % self.config.name options.logger.warn(msg) return self.killing = 0 self.spawnerr = None self.exitstatus = None self.system_stop = 0 self.administrative_stop = 0 self.laststart = monotonic_time() self._assertInState(ProcessStates.EXITED, ProcessStates.FATAL, ProcessStates.BACKOFF, ProcessStates.STOPPED) self.change_state(ProcessStates.STARTING) try: filename, argv = self.get_execv_args() except ProcessException as what: self.record_spawnerr(what.args[0]) self._assertInState(ProcessStates.STARTING) self.change_state(ProcessStates.BACKOFF) return try: self.dispatchers, self.pipes = self.config.make_dispatchers(self) except OSError as why: code = why.args[0] if code == errno.EMFILE: # too many file descriptors open msg = 'too many open files to spawn %r' % self.config.name else: msg = 'unknown error: %s' % errno.errorcode.get(code, code) self.record_spawnerr(msg) self._assertInState(ProcessStates.STARTING) self.change_state(ProcessStates.BACKOFF) return try: pid = options.fork() except OSError as why: code = why.args[0] if code == errno.EAGAIN: # process table full msg = ('Too many processes in process table to spawn %r' % self.config.name) else: msg = 'unknown error: %s' % errno.errorcode.get(code, code) self.record_spawnerr(msg) self._assertInState(ProcessStates.STARTING) self.change_state(ProcessStates.BACKOFF) options.close_parent_pipes(self.pipes) options.close_child_pipes(self.pipes) return if pid != 0: return self._spawn_as_parent(pid) else: return self._spawn_as_child(filename, argv)