Exemple #1
0
        def startit():
            if not started:

                if process.get_state() in RUNNING_STATES:
                    raise RPCError(Faults.ALREADY_STARTED, name)

                process.spawn()

                if process.spawnerr:
                    raise RPCError(Faults.SPAWN_ERROR, name)

                # we use a list here to fake out lexical scoping;
                # using a direct assignment to 'started' in the
                # function appears to not work (symptom: 2nd or 3rd
                # call through, it forgets about 'started', claiming
                # it's undeclared).
                started.append(monotonic_time())

            if not wait or not startsecs:
                return True

            t = monotonic_time()
            runtime = (t - started[0])
            state = process.get_state()

            if state not in (ProcessStates.STARTING, ProcessStates.RUNNING):
                raise RPCError(Faults.ABNORMAL_TERMINATION, name)

            if runtime < startsecs:
                return NOT_DONE_YET

            if state == ProcessStates.RUNNING:
                return True

            raise RPCError(Faults.ABNORMAL_TERMINATION, name)
Exemple #2
0
    def transition(self):
        now = monotonic_time()
        state = self.state

        logger = self.config.options.logger

        if self.config.options.mood > SupervisorStates.RESTARTING:
            # dont start any processes if supervisor is shutting down
            if state == ProcessStates.EXITED:
                if self.config.autorestart:
                    if self.config.autorestart is RestartUnconditionally:
                        # EXITED -> STARTING
                        self.spawn()
                    else: # autorestart is RestartWhenExitUnexpected
                        if self.exitstatus not in self.config.exitcodes:
                            # EXITED -> STARTING
                            self.spawn()
            elif state == ProcessStates.STOPPED and not self.laststart:
                if self.config.autostart:
                    # STOPPED -> STARTING
                    self.spawn()
            elif state == ProcessStates.BACKOFF:
                if self.backoff <= self.config.startretries:
                    if now > self.delay:
                        # BACKOFF -> STARTING
                        self.spawn()

        if state == ProcessStates.STARTING:
            if now - self.laststart > self.config.startsecs:
                # STARTING -> RUNNING if the proc has started
                # successfully and it has stayed up for at least
                # proc.config.startsecs,
                self.delay = 0
                self.backoff = 0
                self._assertInState(ProcessStates.STARTING)
                self.change_state(ProcessStates.RUNNING)
                msg = (
                    'entered RUNNING state, process has stayed up for '
                    '> than %s seconds (startsecs)' % self.config.startsecs)
                logger.info('success: %s %s' % (self.config.name, msg))

        if state == ProcessStates.BACKOFF:
            if self.backoff > self.config.startretries:
                # BACKOFF -> FATAL if the proc has exceeded its number
                # of retries
                self.give_up()
                msg = ('entered FATAL state, too many start retries too '
                       'quickly')
                logger.info('gave up: %s %s' % (self.config.name, msg))

        elif state == ProcessStates.STOPPING:
            time_left = self.delay - now
            if time_left <= 0:
                # kill processes which are taking too long to stop with a final
                # sigkill.  if this doesn't kill it, the process will be stuck
                # in the STOPPING state forever.
                self.config.options.logger.warn(
                    'killing %r (%s) with SIGKILL' % (self.config.name,
                                                      self.pid))
                self.kill(signal.SIGKILL)
Exemple #3
0
 def _spawn_as_parent(self, pid):
     # Parent
     self.pid = pid
     options = self.config.options
     options.close_child_pipes(self.pipes)
     options.logger.info('spawned: %r with pid %s' % (self.config.name, pid))
     self.spawnerr = None
     self.delay = monotonic_time() + self.config.startsecs
     options.pidhistory[pid] = self
     return pid
Exemple #4
0
 def dispatch(self):
     while self.event_buffer:
         # dispatch the oldest event
         event = self.event_buffer.pop(0)
         ok = self._dispatchEvent(event)
         if not ok:
             # if we can't dispatch an event, rebuffer it and stop trying
             # to process any further events in the buffer
             self._acceptEvent(event, head=True)
             break
     self.last_dispatch = monotonic_time()
Exemple #5
0
 def transition(self):
     processes = self.processes.values()
     dispatch_capable = False
     for process in processes:
         process.transition()
         # this is redundant, we do it in _dispatchEvent too, but we
         # want to reduce function call overhead
         if process.state == ProcessStates.RUNNING:
             if process.listener_state == EventListenerStates.READY:
                 dispatch_capable = True
     if dispatch_capable:
         if self.dispatch_throttle:
             now = monotonic_time()
             if now - self.last_dispatch < self.dispatch_throttle:
                 return
         self.dispatch()
Exemple #6
0
 def tick(self, now=None):
     """ Send one or more 'tick' events when the timeslice related to
     the period for the event type rolls over """
     if now is None:
         # now won't be None in unit tests
         now = monotonic_time()
     for event in events.TICK_EVENTS:
         period = event.period
         last_tick = self.ticks.get(period)
         if last_tick is None:
             # we just started up
             last_tick = self.ticks[period] = timeslice(period, now)
         this_tick = timeslice(period, now)
         if this_tick != last_tick:
             self.ticks[period] = this_tick
             events.notify(event(this_tick, self))
Exemple #7
0
    def change_state(self, new_state, expected=True):
        old_state = self.state
        if new_state is old_state:
            # exists for unit tests
            return False

        event_class = self.event_map.get(new_state)
        if event_class is not None:
            event = event_class(self, old_state, expected)
            events.notify(event)

        if new_state == ProcessStates.BACKOFF:
            now = monotonic_time()
            self.backoff = self.backoff + 1
            self.delay = now + self.backoff

        self.state = new_state
Exemple #8
0
    def shutdown_report(self):
        unstopped = []

        for group in self.process_groups.values():
            unstopped.extend(group.get_unstopped_processes())

        if unstopped:
            # throttle 'waiting for x to die' reports
            now = monotonic_time()
            if now > (self.lastshutdownreport + 3): # every 3 secs
                names = [ p.config.name for p in unstopped ]
                namestr = ', '.join(names)
                self.options.logger.info('waiting for %s to die' % namestr)
                self.lastshutdownreport = now
                for proc in unstopped:
                    state = getProcessStateDescription(proc.get_state())
                    self.options.logger.blather(
                        '%s state: %s' % (proc.config.name, state))
        return unstopped
Exemple #9
0
    def getProcessInfo(self, name):
        """ Get info about a process named name

        @param string name The name of the process (or 'group:name')
        @return struct result     A structure containing data about the process
        """
        self._update('getProcessInfo')

        group, process = self._getGroupAndProcess(name)

        if process is None:
            raise RPCError(Faults.BAD_NAME, name)

        start = int(process.laststart)
        stop = int(process.laststop)
        now = int(monotonic_time())

        state = process.get_state()
        spawnerr = process.spawnerr or ''
        exitstatus = process.exitstatus or 0
        stdout_logfile = process.config.stdout_logfile or ''
        stderr_logfile = process.config.stderr_logfile or ''

        info = {
            'name':process.config.name,
            'group':group.config.name,
            'start':start,
            'stop':stop,
            'now':now,
            'state':state,
            'statename':getProcessStateDescription(state),
            'spawnerr':spawnerr,
            'exitstatus':exitstatus,
            'logfile':stdout_logfile, # b/c alias
            'stdout_logfile':stdout_logfile,
            'stderr_logfile':stderr_logfile,
            'pid':process.pid,
            }

        description = self._interpretProcessInfo(info)
        info['description'] = description
        return info
Exemple #10
0
    def finish(self, pid, sts):
        """ The process was reaped and we need to report and manage its state
        """
        self.drain()

        es, msg = decode_wait_status(sts)

        now = monotonic_time()
        self.laststop = now
        processname = self.config.name

        tooquickly = now - self.laststart < self.config.startsecs
        exit_expected = es in self.config.exitcodes

        if self.killing:
            # likely the result of a stop request
            # implies STOPPING -> STOPPED
            self.killing = 0
            self.delay = 0
            self.exitstatus = es

            msg = "stopped: %s (%s)" % (processname, msg)
            self._assertInState(ProcessStates.STOPPING)
            self.change_state(ProcessStates.STOPPED)

        elif tooquickly:
            # the program did not stay up long enough to make it to RUNNING
            # implies STARTING -> BACKOFF
            self.exitstatus = None
            self.spawnerr = 'Exited too quickly (process log may have details)'
            msg = "exited: %s (%s)" % (processname, msg + "; not expected")
            self._assertInState(ProcessStates.STARTING)
            self.change_state(ProcessStates.BACKOFF)

        else:
            # this finish was not the result of a stop request, the
            # program was in the RUNNING state but exited implies
            # RUNNING -> EXITED
            self.delay = 0
            self.backoff = 0
            self.exitstatus = es

            if self.state == ProcessStates.STARTING:
                # XXX I don't know under which circumstances this
                # happens, but in the wild, there is a transition that
                # subverts the RUNNING state (directly from STARTING
                # to EXITED), so we perform the correct transition
                # here.
                self.change_state(ProcessStates.RUNNING)

            self._assertInState(ProcessStates.RUNNING)

            if exit_expected:
                # expected exit code
                msg = "exited: %s (%s)" % (processname, msg + "; expected")
                self.change_state(ProcessStates.EXITED, expected=True)
            else:
                # unexpected exit code
                self.spawnerr = 'Bad exit code %s' % es
                msg = "exited: %s (%s)" % (processname, msg + "; not expected")
                self.change_state(ProcessStates.EXITED, expected=False)

        self.config.options.logger.info(msg)

        self.pid = 0
        self.config.options.close_parent_pipes(self.pipes)
        self.pipes = {}
        self.dispatchers = {}

        # if we died before we processed the current event (only happens
        # if we're an event listener), notify the event system that this
        # event was rejected so it can be processed again.
        if self.event is not None:
            # Note: this should only be true if we were in the BUSY
            # state when finish() was called.
            events.notify(events.EventRejectedEvent(self, self.event))
            self.event = None
Exemple #11
0
    def kill(self, sig):
        """Send a signal to the subprocess.  This may or may not kill it.

        Return None if the signal was sent, or an error message string
        if an error occurred or if the subprocess is not running.
        """
        now = monotonic_time()
        options = self.config.options

        # Properly stop processes in BACKOFF state.
        if self.state == ProcessStates.BACKOFF:
            msg = ("Attempted to kill %s, which is in BACKOFF state." %
                   (self.config.name))
            options.logger.debug(msg)
            self.change_state(ProcessStates.STOPPED)
            return None

        if not self.pid:
            msg = ("attempted to kill %s with sig %s but it wasn't running" %
                   (self.config.name, signame(sig)))
            options.logger.debug(msg)
            return msg

        #If we're in the stopping state, then we've already sent the stop
        #signal and this is the kill signal
        if self.state == ProcessStates.STOPPING:
            killasgroup = self.config.killasgroup
        else:
            killasgroup = self.config.stopasgroup

        as_group = ""
        if killasgroup:
            as_group = "process group "

        options.logger.debug('killing %s (pid %s) %swith signal %s'
                             % (self.config.name,
                                self.pid,
                                as_group,
                                signame(sig))
                             )

        # RUNNING/STARTING/STOPPING -> STOPPING
        self.killing = 1
        self.delay = now + self.config.stopwaitsecs
        # we will already be in the STOPPING state if we're doing a
        # SIGKILL as a result of overrunning stopwaitsecs
        self._assertInState(ProcessStates.RUNNING,ProcessStates.STARTING,
                            ProcessStates.STOPPING)
        self.change_state(ProcessStates.STOPPING)

        pid = self.pid
        if killasgroup:
            # send to the whole process group instead
            pid = -self.pid

        try:
            options.kill(pid, sig)
        except:
            io = StringIO()
            traceback.print_exc(file=io)
            tb = io.getvalue()
            msg = 'unknown problem killing %s (%s):%s' % (self.config.name,
                                                          self.pid, tb)
            options.logger.critical(msg)
            self.change_state(ProcessStates.UNKNOWN)
            self.pid = 0
            self.killing = 0
            self.delay = 0
            return msg

        return None
Exemple #12
0
    def spawn(self):
        """Start the subprocess.  It must not be running already.

        Return the process id.  If the fork() call fails, return None.
        """
        options = self.config.options

        if self.pid:
            msg = 'process %r already running' % self.config.name
            options.logger.warn(msg)
            return

        self.killing = 0
        self.spawnerr = None
        self.exitstatus = None
        self.system_stop = 0
        self.administrative_stop = 0
        self.laststart = monotonic_time()

        self._assertInState(ProcessStates.EXITED, ProcessStates.FATAL,
                            ProcessStates.BACKOFF, ProcessStates.STOPPED)

        self.change_state(ProcessStates.STARTING)

        try:
            filename, argv = self.get_execv_args()
        except ProcessException as what:
            self.record_spawnerr(what.args[0])
            self._assertInState(ProcessStates.STARTING)
            self.change_state(ProcessStates.BACKOFF)
            return

        try:
            self.dispatchers, self.pipes = self.config.make_dispatchers(self)
        except OSError as why:
            code = why.args[0]
            if code == errno.EMFILE:
                # too many file descriptors open
                msg = 'too many open files to spawn %r' % self.config.name
            else:
                msg = 'unknown error: %s' % errno.errorcode.get(code, code)
            self.record_spawnerr(msg)
            self._assertInState(ProcessStates.STARTING)
            self.change_state(ProcessStates.BACKOFF)
            return

        try:
            pid = options.fork()
        except OSError as why:
            code = why.args[0]
            if code == errno.EAGAIN:
                # process table full
                msg  = ('Too many processes in process table to spawn %r' %
                        self.config.name)
            else:
                msg = 'unknown error: %s' % errno.errorcode.get(code, code)

            self.record_spawnerr(msg)
            self._assertInState(ProcessStates.STARTING)
            self.change_state(ProcessStates.BACKOFF)
            options.close_parent_pipes(self.pipes)
            options.close_child_pipes(self.pipes)
            return

        if pid != 0:
            return self._spawn_as_parent(pid)

        else:
            return self._spawn_as_child(filename, argv)