예제 #1
0
파일: arbiter.py 프로젝트: Jud/circus
class Arbiter(object):
    """Class used to control a list of watchers.

    Options:

    - **watchers** -- a list of Watcher objects
    - **endpoint** -- the controller ZMQ endpoint
    - **pubsub_endpoint** -- the pubsub endpoint
    - **stats_endpoint** -- the stats endpoint. If not provided,
      the *circusd-stats* process will not be launched.
    - **check_delay** -- the delay between two controller points
      (default: 1 s)
    - **prereload_fn** -- callable that will be executed on each reload
      (default: None)
    - **context** -- if provided, the zmq context to reuse.
      (default: None)
    - **loop**: if provided, a :class:`zmq.eventloop.ioloop.IOLoop` instance
       to reuse. (default: None)
    - **plugins** -- a list of plugins. Each item is a mapping with:

        - **use** -- Fully qualified name that points to the plugin class
        - every other value is passed to the plugin in the **config** option
    - **sockets** -- a mapping of sockets. Each key is the socket name,
      and each value a :class:`CircusSocket` class. (default: None)
    - **warmup_delay** -- a delay in seconds between two watchers startup.
      (default: 0)
    - **httpd** -- If True, a circushttpd process is run (default: False)
    - **httpd_host** -- the circushttpd host (default: localhost)
    - **httpd_port** -- the circushttpd port (default: 8080)
    - **debug** -- if True, adds a lot of debug info in the stdout (default:
      False)
    - **proc_name** -- the arbiter process name
    """

    restart_after_stop = False
    def __init__(self, watchers, endpoint, pubsub_endpoint, check_delay=.5,
                 prereload_fn=None, context=None, loop=None,
                 stats_endpoint=None, plugins=None, sockets=None,
                 warmup_delay=0, httpd=False, httpd_host='localhost',
                 httpd_port=8080, debug=False, ssh_server=None,
                 proc_name='circusd'):
        self.watchers = watchers
        self.endpoint = endpoint
        self.check_delay = check_delay
        self.prereload_fn = prereload_fn
        self.pubsub_endpoint = pubsub_endpoint
        self.proc_name = proc_name

        self.ctrl = self.loop = None
        self.socket_event = False

        # initialize zmq context
        self.context = context or zmq.Context.instance()
        self.pid = os.getpid()
        self._watchers_names = {}
        self.alive = True
        self._lock = RLock()
        self.debug = debug
        if self.debug:
            stdout_stream = stderr_stream = {'class': 'StdoutStream'}
        else:
            stdout_stream = stderr_stream = None

        # initializing circusd-stats as a watcher when configured
        self.stats_endpoint = stats_endpoint
        if self.stats_endpoint is not None:
            cmd = "%s -c 'from circus import stats; stats.main()'" % \
                sys.executable
            cmd += ' --endpoint %s' % self.endpoint
            cmd += ' --pubsub %s' % self.pubsub_endpoint
            cmd += ' --statspoint %s' % self.stats_endpoint
            if ssh_server is not None:
                cmd += ' --ssh %s' % ssh_server
            if debug:
                cmd += ' --log-level DEBUG'
            stats_watcher = Watcher('circusd-stats', cmd, use_sockets=True,
                                    singleton=True,
                                    stdout_stream=stdout_stream,
                                    stderr_stream=stderr_stream,
                                    copy_env=True, copy_path=True)

            self.watchers.append(stats_watcher)

        # adding the httpd
        if httpd:
            cmd = ("%s -c 'from circusweb import circushttpd; "
                   "circushttpd.main()'") % sys.executable
            cmd += ' --endpoint %s' % self.endpoint
            cmd += ' --fd $(circus.sockets.circushttpd)'
            if ssh_server is not None:
                cmd += ' --ssh %s' % ssh_server
            httpd_watcher = Watcher('circushttpd', cmd, use_sockets=True,
                                    singleton=True,
                                    stdout_stream=stdout_stream,
                                    stderr_stream=stderr_stream,
                                    copy_env=True, copy_path=True)
            self.watchers.append(httpd_watcher)
            httpd_socket = CircusSocket(name='circushttpd', host=httpd_host,
                                        port=httpd_port)

            # adding the socket
            if sockets is None:
                sockets = [httpd_socket]
            else:
                sockets.append(httpd_socket)

        # adding each plugin as a watcher
        if plugins is not None:
            for plugin in plugins:
                fqnd = plugin['use']
                name = 'plugin:%s' % fqnd.replace('.', '-')
                cmd = get_plugin_cmd(plugin, self.endpoint,
                                     self.pubsub_endpoint, self.check_delay,
                                     ssh_server, debug=self.debug)
                plugin_watcher = Watcher(name, cmd, priority=1, singleton=True,
                                         stdout_stream=stdout_stream,
                                         stderr_stream=stderr_stream,
                                         copy_env=True, copy_path=True)
                self.watchers.append(plugin_watcher)

        self.sockets = CircusSockets(sockets)
        self.warmup_delay = warmup_delay
        self.loop = ioloop.IOLoop.instance()
        self.ctrl = Controller(self.endpoint, self.context, self.loop, self,
                               self.check_delay)

    def get_socket(self, name):
        for i in self.sockets:
            if i.name == name:
                return i

        return None

    @classmethod
    def get_socket_config(cls, config, name):
        for i in config.get('sockets', []):
            if i['name'] == name:
                return i
        return None

    @classmethod
    def get_watcher_config(cls, config, name):
        for i in config.get('watchers', []):
            if i['name'] == name:
                return i
        return None

    @classmethod
    def cfg2dict(cls, cfg):
        return dict(
            endpoint=cfg['endpoint'],
            pubsub_endpoint=cfg['pubsub_endpoint'],
            check_delay=cfg.get('check_delay', 1.),
            prereload_fn=cfg.get('prereload_fn'),
            stats_endpoint=cfg.get('stats_endpoint'),
            plugins=cfg.get('plugins'),
            warmup_delay=cfg.get('warmup_delay', 0),
            httpd=cfg.get('httpd', False),
            httpd_host=cfg.get('httpd_host', 'localhost'),
            httpd_port=cfg.get('httpd_port', 8080),
            debug=cfg.get('debug', False),
            stream_backend=cfg.get('stream_backend', 'thread'),
            ssh_server=cfg.get('ssh_server', None),
    )

    def reload_from_config(self, config_file=None):
        cfg = get_config(config_file if config_file else self.config_file)

        # if arbiter is changed, reload everything
        if self.cfg2dict(cfg) != self.cfg:
            return True

        current_socket_names = set([i.name for i in self.sockets])
        new_socket_names = set([i['name'] for i in cfg.get('sockets', [])])
        added_socket_names = new_socket_names - current_socket_names
        deleted_socket_names = current_socket_names - new_socket_names
        maybechanged_socket_names = current_socket_names - deleted_socket_names
        changed_socket_names = set([])
        watcher_names_with_changed_socket = set([])
        watcher_names_with_deleted_socket = set([])

        # get changed sockets
        for n in maybechanged_socket_names:
            s = self.get_socket(n)
            if s.cfg2dict(self.get_socket_config(cfg, n)) != s.cfg:
                changed_socket_names.add(n)

                # just delete the socket and add it again
                deleted_socket_names.add(n)
                added_socket_names.add(n)

                # Get the watchers whichs use these, so they could be deleted and added also
                for w in self.iter_watchers:
                    if 'circus.sockets.%s' % n.lower() in w.cmd:
                        watcher_names_with_changed_socket.add(w.name)

        # get deleted sockets
        for n in deleted_socket_names:
            s = self.get_socket(n)
            s.close()
            # Get the watchers whichs use these, these should not be active anymore
            for w in self.iter_watchers():
                if 'circus.sockets.%s' % n.lower() in w.cmd:
                    watcher_names_with_deleted_socket.add(w.name)
            self.sockets.remove(s)

        # get added sockets
        for n in added_socket_names:
            s = CircusSocket.load_from_config(self.get_socket_config(cfg, n))
            s.bind_and_listen()
            self.sockets.append(s)

        if added_socket_names or deleted_socket_names:
            # make sure all existing watchers get the new sockets in their attributes and get the old removed
            for watcher in self.iter_watchers():
                watcher.initialize(self.evpub_socket, self.sockets, self)

        current_watcher_names = set([i.name for i in self.iter_watchers()])
        new_watcher_names = set([i['name'] for i in cfg.get('watchers', [])])
        added_watcher_names = (new_watcher_names - current_watcher_names) | watcher_names_with_changed_socket
        deleted_watcher_names = current_watcher_names - new_watcher_names - watcher_names_with_changed_socket
        maybechanged_watcher_names = current_watcher_names - deleted_watcher_names
        changed_watcher_names = set([])

        if watcher_names_with_deleted_socket and watcher_names_with_deleted_socket not in new_watcher_names:
            raise ValueError('Watchers %s uses a socket which is deleted' % watcher_names_with_deleted_socket)

        #get changed watchers
        for n in maybechanged_watcher_names:
            w = self.get_watcher(n)
            new_cfg = w.cfg2dict(self.get_watcher_config(cfg, n))
            old_cfg = w.cfg2dict(w.cfg)  # cfg2dict is used to make sure a copy is returned
            if new_cfg != old_cfg:
                old_cfg['numprocesses'] = new_cfg['numprocesses']
                if new_cfg == old_cfg:
                    # if nothing but the number of processes is changed, just changes this
                    w.set_numprocesses(int(new_cfg['numprocesses']))
                else:
                    # Other thing are changed. Just delete and add the watcher.
                    changed_watcher_names.add(n)
                    deleted_watcher_names.add(n)
                    added_watcher_names.add(n)

        # get deleted watchers
        for n in deleted_watcher_names:
            w = self.get_watcher(n)
            w.stop()
            del self._watchers_names[w.name.lower()]
            self.watchers.remove(w)

        # get added watchers
        for n in added_watcher_names:
            w = Watcher.load_from_config(self.get_watcher_config(cfg, n))
            w.initialize(self.evpub_socket, self.sockets, self)
            w.start()
            self.watchers.append(w)
            self._watchers_names[w.name.lower()] = w

        return False

    @classmethod
    def load_from_config(cls, config_file):
        cfg = get_config(config_file)

        watchers = []
        for watcher in cfg.get('watchers', []):
            watchers.append(Watcher.load_from_config(watcher))

        sockets = []
        for socket in cfg.get('sockets', []):
            sockets.append(CircusSocket.load_from_config(socket))

        httpd = cfg.get('httpd', False)
        if httpd:
            # controlling that we have what it takes to run the web UI
            # if something is missing this will tell the user
            try:
                import circusweb     # NOQA
            except ImportError:
                logger.error('You need to install circus-web')
                sys.exit(1)

        # creating arbiter
        arbiter = cls(watchers, cfg['endpoint'], cfg['pubsub_endpoint'],
                      check_delay=cfg.get('check_delay', 1.),
                      prereload_fn=cfg.get('prereload_fn'),
                      stats_endpoint=cfg.get('stats_endpoint'),
                      plugins=cfg.get('plugins'), sockets=sockets,
                      warmup_delay=cfg.get('warmup_delay', 0),
                      httpd=httpd,
                      httpd_host=cfg.get('httpd_host', 'localhost'),
                      httpd_port=cfg.get('httpd_port', 8080),
                      debug=cfg.get('debug', False),
                      ssh_server=cfg.get('ssh_server', None))

        # store the cfg which will be used, so it can be used later for checking if the cfg has been changed
        arbiter.cfg = arbiter.cfg2dict(cfg)

        arbiter.config_file = config_file

        return arbiter

    def iter_watchers(self, reverse=True):
        watchers = [(watcher.priority, watcher) for watcher in self.watchers]
        watchers.sort(reverse=reverse)
        for __, watcher in watchers:
            yield watcher

    @debuglog
    def initialize(self):
        # set process title
        _setproctitle(self.proc_name)

        # event pub socket
        self.evpub_socket = self.context.socket(zmq.PUB)
        self.evpub_socket.bind(self.pubsub_endpoint)
        self.evpub_socket.linger = 0

        # initialize sockets
        if len(self.sockets) > 0:
            self.sockets.bind_and_listen_all()
            logger.info("sockets started")

        # initialize watchers
        for watcher in self.iter_watchers():
            self._watchers_names[watcher.name.lower()] = watcher
            watcher.initialize(self.evpub_socket, self.sockets, self)

    def start_watcher(self, watcher):
        """Aska a specific watcher to start and wait for the specified
        warmup delay."""
        if watcher.autostart:
            watcher.start()
            sleep(self.warmup_delay)

    @debuglog
    def start(self):
        """Starts all the watchers.

        The start command is an infinite loop that waits
        for any command from a client and that watches all the
        processes and restarts them if needed.
        """
        logger.info("Starting master on pid %s", self.pid)
        self.initialize()

        # start controller
        self.ctrl.start()
        try:
            # initialize processes
            logger.debug('Initializing watchers')
            for watcher in self.iter_watchers():
                self.start_watcher(watcher)

            logger.info('Arbiter now waiting for commands')

            while True:
                try:
                    self.loop.start()
                except zmq.ZMQError as e:
                    if e.errno == errno.EINTR:
                        continue
                    else:
                        raise
                else:
                    break
        finally:
            self.ctrl.stop()
            self.evpub_socket.close()

    def stop(self, restart_after_stop=False):
        self.restart_after_stop = restart_after_stop

        if self.alive:
            self.stop_watchers(stop_alive=True)

        if self.loop.running():
            self.loop.stop()

        # close sockets
        self.sockets.close_all()

    def reap_processes(self):
        # map watcher to pids
        watchers_pids = {}
        for watcher in self.iter_watchers():
            if not watcher.stopped:
                for process in watcher.processes.values():
                    watchers_pids[process.pid] = watcher

        # detect dead children
        while True:
            try:
                # wait for our child (so it's not a zombie)
                pid, status = os.waitpid(-1, os.WNOHANG)
                if not pid:
                    break

                if pid in watchers_pids:
                    watcher = watchers_pids[pid]
                    watcher.reap_process(pid, status)
            except OSError as e:
                if e.errno == errno.EAGAIN:
                    sleep(0)
                    continue
                elif e.errno == errno.ECHILD:
                    # process already reaped
                    return
                else:
                    raise

    def manage_watchers(self):
        if not self.alive:
            return

        with self._lock:
            need_on_demand = False
            # manage and reap processes
            self.reap_processes()
            for watcher in self.iter_watchers():
                if watcher.on_demand and watcher.stopped:
                    need_on_demand = True
                watcher.manage_processes()
            if need_on_demand:
                 (rlist, wlist, xlist) = select.select([x.fileno() for x in self.sockets.values()], [], [], 0)
                 if rlist:
                     self.socket_event = True
                     self.start_watchers()
                     self.socket_event = False


    @debuglog
    def reload(self, graceful=True):
        """Reloads everything.

        Run the :func:`prereload_fn` callable if any, then gracefuly
        reload all watchers.
        """
        if self.prereload_fn is not None:
            self.prereload_fn(self)

        # reopen log files
        for handler in logger.handlers:
            if isinstance(handler, logging.FileHandler):
                handler.acquire()
                handler.stream.close()
                handler.stream = open(handler.baseFilename, handler.mode)
                handler.release()

        # gracefully reload watchers
        for watcher in self.iter_watchers():
            watcher.reload(graceful=graceful)
            sleep(self.warmup_delay)

    def numprocesses(self):
        """Return the number of processes running across all watchers."""
        return sum([len(watcher) for watcher in self.watchers])

    def numwatchers(self):
        """Return the number of watchers."""
        return len(self.watchers)

    def get_watcher(self, name):
        """Return the watcher *name*."""
        return self._watchers_names[name]

    def statuses(self):
        return dict([(watcher.name, watcher.status())
                     for watcher in self.watchers])

    def add_watcher(self, name, cmd, **kw):
        """Adds a watcher.

        Options:

        - **name**: name of the watcher to add
        - **cmd**: command to run.
        - all other options defined in the Watcher constructor.
        """
        if name in self._watchers_names:
            raise AlreadyExist("%r already exist" % name)

        if not name:
            return ValueError("command name shouldn't be empty")

        watcher = Watcher(name, cmd, **kw)
        watcher.initialize(self.evpub_socket, self.sockets, self)
        self.watchers.append(watcher)
        self._watchers_names[watcher.name.lower()] = watcher
        return watcher

    def rm_watcher(self, name):
        """Deletes a watcher.

        Options:

        - **name**: name of the watcher to delete
        """
        logger.debug('Deleting %r watcher', name)

        # remove the watcher from the list
        watcher = self._watchers_names.pop(name)
        del self.watchers[self.watchers.index(watcher)]

        # stop the watcher
        watcher.stop()

    def start_watchers(self):
        for watcher in self.iter_watchers():
            watcher.start()
            sleep(self.warmup_delay)

    def stop_watchers(self, stop_alive=False):
        if not self.alive:
            return

        if stop_alive:
            logger.info('Arbiter exiting')
            self.alive = False

        for watcher in self.iter_watchers(reverse=False):
            watcher.stop()

    def restart(self):
        self.stop_watchers()
        self.start_watchers()