Exemple #1
0
 def kill_empty_zygote(self, zygote, sig=signal.SIGQUIT):
     """Send zygote SIGQUIT if it has zero workers. """
     # The only valid time to kill a zygote is if it doesn't have
     # any workers left.
     if zygote.worker_count == 0:
         self.logger.info("killing zygote with pid %d" % zygote.pid)
         safe_kill(zygote.pid, sig)
Exemple #2
0
 def kill_empty_zygote(self, zygote, sig=signal.SIGQUIT):
     """Send zygote SIGQUIT if it has zero workers. """
     # The only valid time to kill a zygote is if it doesn't have
     # any workers left.
     if zygote.worker_count == 0:
         self.logger.info("killing zygote with pid %d" % zygote.pid)
         safe_kill(zygote.pid, sig)
Exemple #3
0
 def kill_workers(self, num_workers_to_kill):
     if num_workers_to_kill > len(self.children):
         self.logger.error(
             "Request to kill %d workers out of %d current workers", num_workers_to_kill, len(self.children)
         )
         return
     worker_pids = random.sample(self.children, num_workers_to_kill)
     for pid in worker_pids:
         safe_kill(pid)
     wait_for_pids(worker_pids, self.WAIT_FOR_KILL_TIME, self.logger)
Exemple #4
0
 def kill_workers(self, num_workers_to_kill):
     if num_workers_to_kill > len(self.children):
         self.logger.error(
             'Request to kill %d workers out of %d current workers',
             num_workers_to_kill,
             len(self.children)
         )
         return
     worker_pids = random.sample(self.children, num_workers_to_kill)
     for pid in worker_pids:
         safe_kill(pid)
     wait_for_pids(worker_pids, self.WAIT_FOR_KILL_TIME, self.logger)
Exemple #5
0
    def kill_all_workers(self):
        """Kill all workers and wait (synchronously) for them
        to exit"""
        # reset the signal handler so that we don't get interrupted
        # by SIGCHLDs
        signal.signal(signal.SIGCHLD, signal.SIG_DFL)
        waiting_pids = set()

        self.logger.debug('zygote requesting kill on %d pids', len(self.children))
        for pid in self.children:
            if safe_kill(pid, signal.SIGQUIT):
                waiting_pids.add(pid)
        wait_for_pids(waiting_pids, self.WAIT_FOR_KILL_TIME, self.logger)
        self.logger.debug('zygote done killing children, terminating')
        sys.exit(0)
Exemple #6
0
    def transition_idle_workers(self):
        """Transition idle HTTP workers from old zygotes to the current
        zygote.
        """
        if not self.started_transition:
            self.started_transition = time.time()
        if (time.time() - self.started_transition) > self.WAIT_FOR_KILL_TIME:
            self.logger.debug(
                "sending SIGKILL for transition because it was Too Damn Slow")
            sig = signal.SIGKILL
        else:
            sig = signal.SIGQUIT

        other_zygotes = self.zygote_collection.other_zygotes(
            self.current_zygote)
        if self.current_zygote.canary and self.prev_zygote:
            if self.prev_zygote in other_zygotes:
                other_zygotes.remove(self.prev_zygote)

        kill_count = 0
        other_zygote_count = len(other_zygotes)
        for zygote in other_zygotes:
            for worker in zygote.idle_workers():
                self.logger.debug("killing worker %d with signal %d",
                                  worker.pid, sig)
                if safe_kill(worker.pid, sig):
                    kill_count += 1
        self.logger.info('Attempted to transition %d workers from %d zygotes',
                         kill_count, other_zygote_count)

        if other_zygote_count:
            # The list of other zygotes was at least one, so we should
            # reschedule another call to transition_idle_workers. When a zygote
            # runs out of worker children, the handle_protocol_msg function will
            # notice this fact when it receives the final MessageWorkerExit, and
            # at that time it will kill the worker, which is how this timeout
            # loop gets ended.
            self.io_loop.add_timeout(time.time() + self.POLL_INTERVAL,
                                     self.transition_idle_workers)
        else:
            self.started_transition = None

        # Cleanup empty zygotes for the next iteration of the transition.
        for zygote in other_zygotes:
            if zygote.worker_count == 0:
                self.kill_empty_zygote(zygote, sig)
Exemple #7
0
    def transition_idle_workers(self):
        """Transition idle HTTP workers from old zygotes to the current
        zygote.
        """
        if not self.started_transition:
            self.started_transition = time.time()
        if (time.time() - self.started_transition) > self.WAIT_FOR_KILL_TIME:
            self.logger.debug("sending SIGKILL for transition because it was Too Damn Slow")
            sig = signal.SIGKILL
        else:
            sig = signal.SIGQUIT

        other_zygotes = self.zygote_collection.other_zygotes(self.current_zygote)
        if self.current_zygote.canary and self.prev_zygote:
            if self.prev_zygote in other_zygotes:
                other_zygotes.remove(self.prev_zygote)

        kill_count = 0
        other_zygote_count = len(other_zygotes)
        for zygote in other_zygotes:
            for worker in zygote.idle_workers():
                self.logger.debug("killing worker %d with signal %d", worker.pid, sig)
                if safe_kill(worker.pid, sig):
                    kill_count += 1
        self.logger.info('Attempted to transition %d workers from %d zygotes', kill_count, other_zygote_count)

        if other_zygote_count:
            # The list of other zygotes was at least one, so we should
            # reschedule another call to transition_idle_workers. When a zygote
            # runs out of worker children, the handle_protocol_msg function will
            # notice this fact when it receives the final MessageWorkerExit, and
            # at that time it will kill the worker, which is how this timeout
            # loop gets ended.
            self.io_loop.add_timeout(time.time() + self.POLL_INTERVAL, self.transition_idle_workers)
        else:
            self.started_transition = None

        # Cleanup empty zygotes for the next iteration of the transition.
        for zygote in other_zygotes:
            if zygote.worker_count == 0:
                self.kill_empty_zygote(zygote, sig)
Exemple #8
0
    def handle_protocol_msg(self, fd, events):
        """Callback for messages received on the master_socket"""
        assert fd == self.master_socket.fileno()
        data = self.master_socket.recv(self.RECV_SIZE)
        msg = message.Message.parse(data)
        msg_type = type(msg)
        self.logger.debug('received message of type %s from pid %d',
                          msg_type.__name__, msg.pid)

        if msg_type is message.MessageCanaryInit:
            self.logger.info(
                "Canary zygote initialized. Transitioning idle workers.")
            # This is not the canary zygote anymore
            self.current_zygote.canary = False
            # We can also release the handle on the previous
            # zygote. It is already in the zygote_collection for
            # accounting purposses, but we won't need to keep track of
            # it anymore.
            self.prev_zygote = None
            # Canary initialization was successful, we can now transition workers
            self.io_loop.add_callback(self.transition_idle_workers)
        elif msg_type is message.MessageWorkerStart:
            # a new worker was spawned by one of our zygotes; add it to
            # zygote_collection, and note the time created and the zygote parent
            zygote = self.zygote_collection[msg.worker_ppid]
            if zygote:
                zygote.add_worker(msg.pid, msg.time_created)
        elif msg_type is message.MessageWorkerExitInitFail:
            if not self.current_zygote.canary:
                self.logger.error("A worker initialization failed, giving up")
                self.stop()
                return
        elif msg_type is message.MessageWorkerExit:
            # a worker exited. tell the current/active zygote to spawn a new
            # child. if this was the last child of a different (non-current)
            # zygote, kill that zygote
            zygote = self.zygote_collection[msg.pid]
            if not zygote:
                return

            zygote.remove_worker(msg.child_pid)
            if zygote.shutting_down:
                self.logger.debug(
                    'Removed a worker from shutting down zygote %d, %d left',
                    msg.pid, len(zygote.workers()))
                return
            else:
                self.logger.debug('Removed a worker from zygote %d, %d left',
                                  msg.pid, len(zygote.workers()))

            if not self.stopped:
                if zygote in (self.current_zygote, self.prev_zygote):
                    if self.num_workers > zygote.worker_count:
                        # Only start a new if we're below quota. This
                        # is how we scale down the number of workers.
                        zygote.request_spawn()
                else:
                    # Not a zygote that we care about. Request shutdown.
                    zygote.request_shut_down()
        elif msg_type is message.MessageHTTPBegin:
            # a worker started servicing an HTTP request
            worker = self.zygote_collection.get_worker(msg.pid)
            if worker:
                worker.start_request(msg.remote_ip, msg.http_line)
        elif msg_type is message.MessageHTTPEnd:
            # a worker finished servicing an HTTP request
            worker = self.zygote_collection.get_worker(msg.pid)
            if worker:
                worker.end_request()
                if self.max_requests is not None and worker.request_count >= self.max_requests:
                    self.logger.info(
                        'Worker %d reached max_requests %d, killing it',
                        worker.pid, self.max_requests)
                    safe_kill(worker.pid, signal.SIGQUIT)
        else:
            self.logger.warning('master got unexpected message of type %s',
                                msg_type)
Exemple #9
0
    def handle_protocol_msg(self, fd, events):
        """Callback for messages received on the master_socket"""
        assert fd == self.master_socket.fileno()
        data = self.master_socket.recv(self.RECV_SIZE)
        msg = message.Message.parse(data)
        msg_type = type(msg)
        self.logger.debug('received message of type %s from pid %d', msg_type.__name__, msg.pid)

        if msg_type is message.MessageCanaryInit:
            self.logger.info("Canary zygote initialized. Transitioning idle workers.")
            # This is not the canary zygote anymore
            self.current_zygote.canary = False
            # We can also release the handle on the previous
            # zygote. It is already in the zygote_collection for
            # accounting purposses, but we won't need to keep track of
            # it anymore.
            self.prev_zygote = None
            # Canary initialization was successful, we can now transition workers
            self.io_loop.add_callback(self.transition_idle_workers)
        elif msg_type is message.MessageWorkerStart:
            # a new worker was spawned by one of our zygotes; add it to
            # zygote_collection, and note the time created and the zygote parent
            zygote = self.zygote_collection[msg.worker_ppid]
            if zygote:
                zygote.add_worker(msg.pid, msg.time_created)
        elif msg_type is message.MessageWorkerExitInitFail:
            if not self.current_zygote.canary:
                self.logger.error("A worker initialization failed, giving up")
                self.stop()
                return
        elif msg_type is message.MessageWorkerExit:
            # a worker exited. tell the current/active zygote to spawn a new
            # child. if this was the last child of a different (non-current)
            # zygote, kill that zygote
            zygote = self.zygote_collection[msg.pid]
            if not zygote:
                return

            zygote.remove_worker(msg.child_pid)
            if zygote.shutting_down:
                self.logger.debug('Removed a worker from shutting down zygote %d, %d left', msg.pid, len(zygote.workers()))
                return
            else:
                self.logger.debug('Removed a worker from zygote %d, %d left', msg.pid, len(zygote.workers()))

            if not self.stopped:
                if zygote in (self.current_zygote, self.prev_zygote):
                    if self.num_workers > zygote.worker_count:
                        # Only start a new if we're below quota. This
                        # is how we scale down the number of workers.
                        zygote.request_spawn()
                else:
                    # Not a zygote that we care about. Request shutdown.
                    zygote.request_shut_down()
        elif msg_type is message.MessageHTTPBegin:
            # a worker started servicing an HTTP request
            worker = self.zygote_collection.get_worker(msg.pid)
            if worker:
                worker.start_request(msg.remote_ip, msg.http_line)
        elif msg_type is message.MessageHTTPEnd:
            # a worker finished servicing an HTTP request
            worker = self.zygote_collection.get_worker(msg.pid)
            if worker:
                worker.end_request()
                if self.max_requests is not None and worker.request_count >= self.max_requests:
                    self.logger.info('Worker %d reached max_requests %d, killing it', worker.pid, self.max_requests)
                    safe_kill(worker.pid, signal.SIGQUIT)
        else:
            self.logger.warning('master got unexpected message of type %s', msg_type)