class Service(ChromaService):
    def __init__(self):
        super(Service, self).__init__()
        self.threads = []
        self._children_started = threading.Event()
        self._complete = threading.Event()

    def run(self):
        from chroma_core.services.power_control.manager import PowerControlManager
        from chroma_core.services.power_control.monitor_daemon import PowerMonitorDaemon
        from chroma_core.services.power_control.rpc import PowerControlRpc

        super(Service, self).run()

        manager = PowerControlManager()
        monitor_daemon = PowerMonitorDaemon(manager)

        self._rpc_thread = ServiceThread(PowerControlRpc(manager))
        self._monitor_daemon_thread = ServiceThread(monitor_daemon)

        self._rpc_thread.start()
        self._monitor_daemon_thread.start()

        self._children_started.set()
        self._complete.wait()

    def stop(self):
        super(Service, self).stop()

        # Guard against trying to stop after child threads are created, but before they are started.
        self._children_started.wait()

        self.log.info("Stopping...")
        self._rpc_thread.stop()
        self._monitor_daemon_thread.stop()

        self.log.info("Joining...")
        self._rpc_thread.join()
        self._monitor_daemon_thread.join()

        self.log.info("Complete.")

        self._complete.set()
Example #2
0
class Service(ChromaService):
    def __init__(self):
        super(Service, self).__init__()

        self._children_started = threading.Event()
        self._complete = threading.Event()

    def run(self):
        from chroma_core.services.job_scheduler.job_scheduler import JobScheduler
        from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerRpc
        from chroma_core.services.job_scheduler.agent_rpc import AgentRpc

        super(Service, self).run()

        # Cancel anything that's left behind from a previous run
        for command in Command.objects.filter(complete=False):
            command.completed(True, True)
        Job.objects.filter(~Q(state="complete")).update(state="complete", cancelled=True)

        self._job_scheduler = JobScheduler()
        self._queue_thread = ServiceThread(QueueHandler(self._job_scheduler))
        self._rpc_thread = ServiceThread(JobSchedulerRpc(self._job_scheduler))
        self._progress_thread = ServiceThread(self._job_scheduler.progress)
        AgentRpc.start()
        self._queue_thread.start()
        self._rpc_thread.start()
        self._progress_thread.start()

        self._children_started.set()
        self._mail_alerts_thread = MailAlerts(settings.EMAIL_SENDER, settings.EMAIL_SUBJECT_PREFIX, settings.EMAIL_HOST)
        self._mail_alerts_thread.start()

        self._complete.wait()

        self.log.info("Cancelling outstanding jobs...")

        for job in Job.objects.filter(~Q(state="complete")).order_by("-id"):
            self._job_scheduler.cancel_job(job.id)

    def stop(self):
        from chroma_core.services.job_scheduler.agent_rpc import AgentRpc

        super(Service, self).stop()

        # Guard against trying to stop after child threads are created, but before they are started
        self._children_started.wait()

        AgentRpc.shutdown()

        self.log.info("Stopping...")
        self._rpc_thread.stop()
        self._queue_thread.stop()
        self._progress_thread.stop()
        self._mail_alerts_thread.stop()

        self.log.info("Joining...")
        self._rpc_thread.join()
        self._queue_thread.join()
        self._job_scheduler.join_run_threads()
        self._progress_thread.join()
        self._mail_alerts_thread.join()

        self.log.info("Complete.")

        self._complete.set()
    def run(self):
        super(Service, self).run()

        self.amqp_tx_forwarder = AmqpTxForwarder(self.queues)
        self.amqp_rx_forwarder = AmqpRxForwarder(self.queues)

        # This thread listens to an AMQP queue and appends incoming messages
        # to queues for retransmission to agents
        tx_svc_thread = ServiceThread(self.amqp_tx_forwarder)
        # This thread listens to local queues and appends received messages
        # to an AMQP queue
        rx_svc_thread = ServiceThread(self.amqp_rx_forwarder)
        rx_svc_thread.start()
        tx_svc_thread.start()

        # FIXME: this TERMINATE_ALL format could in principle
        # be passed back from the agent (but it should never
        # originate there), affecting sessions for other agents.

        # At restart, message receiving services to clear out any
        # existing session state (from a previous instance of this
        # service).
        for plugin in ["action_runner"]:
            self.queues.receive({
                "fqdn": None,
                "type": "SESSION_TERMINATE_ALL",
                "plugin": plugin,
                "session_id": None,
                "session_seq": None,
                "body": None,
            })

        # This thread services session management RPCs, so that other
        # services can explicitly request a session reset
        session_rpc_thread = ServiceThread(HttpAgentRpc(self))
        session_rpc_thread.start()

        # Hook up the request handler
        MessageView.queues = self.queues
        MessageView.sessions = self.sessions
        MessageView.hosts = self.hosts
        ValidatedClientView.valid_certs = self.valid_certs

        # The thread for generating HostOfflineAlerts
        host_checker_thread = ServiceThread(
            HostStatePoller(self.hosts, self.sessions))
        host_checker_thread.start()

        # The main thread serves incoming requests to exchanges messages
        # with agents, until it is interrupted (gevent handles signals for us)
        self.server = wsgi.WSGIServer(("", HTTP_AGENT_PORT), WSGIHandler())
        self.server.serve_forever()

        session_rpc_thread.stop()
        tx_svc_thread.stop()
        rx_svc_thread.stop()
        host_checker_thread.stop()
        session_rpc_thread.join()
        tx_svc_thread.join()
        rx_svc_thread.join()
        host_checker_thread.join()
    def _execute_inner(self, *args, **options):
        if options['verbose']:
            log_enable_stdout()

        if options['gevent']:
            from gevent.monkey import patch_all
            patch_all(thread=True)
            # Gevent's implementation of select removes 'poll'
            import subprocess
            subprocess._has_poll = False

            import django.db
            django.db.connections._connections = threading.local()

        if options['trace']:

            class Trace(object):
                def __init__(self):
                    self.tracefile = open('trace.log', 'w', buffering=0)
                    self.tracefile.write("Started at %s: %s %s\n" %
                                         (IMLDateTime.utcnow(), args, options))

                def __call__(self, frame, event, arg):
                    if event == "line":
                        try:
                            pyfile = frame.f_globals['__file__'].strip('co')
                            line = linecache.getline(pyfile, frame.f_lineno)
                        except KeyError:
                            pass
                        else:
                            if line is not None:
                                self.tracefile.write(
                                    "%s:%s %s" %
                                    (pyfile, frame.f_lineno, line))

                    return self

            chroma_core.services.log.trace = Trace()
            sys.settrace(chroma_core.services.log.trace)

        from chroma_core.lib.service_config import ServiceConfig
        if not ServiceConfig().configured():
            # FIXME: return an error code which will prevent supervisord from restarting this service
            # (using the 'exitcodes' option for the programs in the supervisord conf)
            sys.stderr.write(
                "Chroma is not configured, please run chroma-config setup first\n"
            )
            sys.exit(-1)

        if not options['lightweight_rpc']:
            RpcClientFactory.initialize_threads()

        log_set_filename("%s.log" % options['name'])

        # Respond to Ctrl+C
        stopped = threading.Event()

        # Ensure that threads are .start()ed before we possibly try to .join() them
        setup_complete = threading.Event()

        def signal_handler(*args, **kwargs):
            """Params undefined because gevent vs. threading pass
            different things to handler

            """
            if not setup_complete.is_set():
                log.warning("Terminated during setup, exiting hard")
                os._exit(0)

            if not options['lightweight_rpc']:
                RpcClientFactory.shutdown_threads()

            for service_thread in service_mains:
                log.info("Stopping %s" % service_thread.service.name)
                service_thread.service.stop()

            for service_thread in service_mains:
                log.info("Joining %s" % service_thread.service.name)
                service_thread.join()

            stopped.set()

        if options['gevent']:
            import gevent
            gevent.signal(signal.SIGINT, signal_handler)
            gevent.signal(signal.SIGTERM, signal_handler)
        else:
            signal.signal(signal.SIGINT, signal_handler)
            signal.signal(signal.SIGTERM, signal_handler)

        service_mains = []
        for service_name in args:
            module_path = "chroma_core.services.%s" % service_name

            # Load the module
            mod = __import__(module_path)
            components = module_path.split('.')
            for comp in components[1:]:
                mod = getattr(mod, comp)

            service = getattr(mod, 'Service')()
            service.log = log_register(service.name)

            service_thread = ServiceThread(service)
            service_thread.start()
            service_mains.append(service_thread)

        setup_complete.set()

        while not stopped.is_set():
            # Using a timeout changes the behaviour of CPython's waiting so that it will
            # receive signals (like ctrl-c SIGINT) immediately -- logically we don't want
            # any timeout here, but a pure wait() breaks ctrl-c.
            stopped.wait(10)

        if len(threading.enumerate()) > 1 and not options['gevent']:
            log.error("Rogue thread still running, exiting hard")
            log.error([t.name for t in threading.enumerate()])
            os._exit(-1)