class Service(ChromaService): def __init__(self): super(Service, self).__init__() self.threads = [] self._children_started = threading.Event() self._complete = threading.Event() def run(self): from chroma_core.services.power_control.manager import PowerControlManager from chroma_core.services.power_control.monitor_daemon import PowerMonitorDaemon from chroma_core.services.power_control.rpc import PowerControlRpc super(Service, self).run() manager = PowerControlManager() monitor_daemon = PowerMonitorDaemon(manager) self._rpc_thread = ServiceThread(PowerControlRpc(manager)) self._monitor_daemon_thread = ServiceThread(monitor_daemon) self._rpc_thread.start() self._monitor_daemon_thread.start() self._children_started.set() self._complete.wait() def stop(self): super(Service, self).stop() # Guard against trying to stop after child threads are created, but before they are started. self._children_started.wait() self.log.info("Stopping...") self._rpc_thread.stop() self._monitor_daemon_thread.stop() self.log.info("Joining...") self._rpc_thread.join() self._monitor_daemon_thread.join() self.log.info("Complete.") self._complete.set()
class Service(ChromaService): def __init__(self): super(Service, self).__init__() self._children_started = threading.Event() self._complete = threading.Event() def run(self): from chroma_core.services.job_scheduler.job_scheduler import JobScheduler from chroma_core.services.job_scheduler.job_scheduler_client import JobSchedulerRpc from chroma_core.services.job_scheduler.agent_rpc import AgentRpc super(Service, self).run() # Cancel anything that's left behind from a previous run for command in Command.objects.filter(complete=False): command.completed(True, True) Job.objects.filter(~Q(state="complete")).update(state="complete", cancelled=True) self._job_scheduler = JobScheduler() self._queue_thread = ServiceThread(QueueHandler(self._job_scheduler)) self._rpc_thread = ServiceThread(JobSchedulerRpc(self._job_scheduler)) self._progress_thread = ServiceThread(self._job_scheduler.progress) AgentRpc.start() self._queue_thread.start() self._rpc_thread.start() self._progress_thread.start() self._children_started.set() self._mail_alerts_thread = MailAlerts(settings.EMAIL_SENDER, settings.EMAIL_SUBJECT_PREFIX, settings.EMAIL_HOST) self._mail_alerts_thread.start() self._complete.wait() self.log.info("Cancelling outstanding jobs...") for job in Job.objects.filter(~Q(state="complete")).order_by("-id"): self._job_scheduler.cancel_job(job.id) def stop(self): from chroma_core.services.job_scheduler.agent_rpc import AgentRpc super(Service, self).stop() # Guard against trying to stop after child threads are created, but before they are started self._children_started.wait() AgentRpc.shutdown() self.log.info("Stopping...") self._rpc_thread.stop() self._queue_thread.stop() self._progress_thread.stop() self._mail_alerts_thread.stop() self.log.info("Joining...") self._rpc_thread.join() self._queue_thread.join() self._job_scheduler.join_run_threads() self._progress_thread.join() self._mail_alerts_thread.join() self.log.info("Complete.") self._complete.set()
def run(self): super(Service, self).run() self.amqp_tx_forwarder = AmqpTxForwarder(self.queues) self.amqp_rx_forwarder = AmqpRxForwarder(self.queues) # This thread listens to an AMQP queue and appends incoming messages # to queues for retransmission to agents tx_svc_thread = ServiceThread(self.amqp_tx_forwarder) # This thread listens to local queues and appends received messages # to an AMQP queue rx_svc_thread = ServiceThread(self.amqp_rx_forwarder) rx_svc_thread.start() tx_svc_thread.start() # FIXME: this TERMINATE_ALL format could in principle # be passed back from the agent (but it should never # originate there), affecting sessions for other agents. # At restart, message receiving services to clear out any # existing session state (from a previous instance of this # service). for plugin in ["action_runner"]: self.queues.receive({ "fqdn": None, "type": "SESSION_TERMINATE_ALL", "plugin": plugin, "session_id": None, "session_seq": None, "body": None, }) # This thread services session management RPCs, so that other # services can explicitly request a session reset session_rpc_thread = ServiceThread(HttpAgentRpc(self)) session_rpc_thread.start() # Hook up the request handler MessageView.queues = self.queues MessageView.sessions = self.sessions MessageView.hosts = self.hosts ValidatedClientView.valid_certs = self.valid_certs # The thread for generating HostOfflineAlerts host_checker_thread = ServiceThread( HostStatePoller(self.hosts, self.sessions)) host_checker_thread.start() # The main thread serves incoming requests to exchanges messages # with agents, until it is interrupted (gevent handles signals for us) self.server = wsgi.WSGIServer(("", HTTP_AGENT_PORT), WSGIHandler()) self.server.serve_forever() session_rpc_thread.stop() tx_svc_thread.stop() rx_svc_thread.stop() host_checker_thread.stop() session_rpc_thread.join() tx_svc_thread.join() rx_svc_thread.join() host_checker_thread.join()
def _execute_inner(self, *args, **options): if options['verbose']: log_enable_stdout() if options['gevent']: from gevent.monkey import patch_all patch_all(thread=True) # Gevent's implementation of select removes 'poll' import subprocess subprocess._has_poll = False import django.db django.db.connections._connections = threading.local() if options['trace']: class Trace(object): def __init__(self): self.tracefile = open('trace.log', 'w', buffering=0) self.tracefile.write("Started at %s: %s %s\n" % (IMLDateTime.utcnow(), args, options)) def __call__(self, frame, event, arg): if event == "line": try: pyfile = frame.f_globals['__file__'].strip('co') line = linecache.getline(pyfile, frame.f_lineno) except KeyError: pass else: if line is not None: self.tracefile.write( "%s:%s %s" % (pyfile, frame.f_lineno, line)) return self chroma_core.services.log.trace = Trace() sys.settrace(chroma_core.services.log.trace) from chroma_core.lib.service_config import ServiceConfig if not ServiceConfig().configured(): # FIXME: return an error code which will prevent supervisord from restarting this service # (using the 'exitcodes' option for the programs in the supervisord conf) sys.stderr.write( "Chroma is not configured, please run chroma-config setup first\n" ) sys.exit(-1) if not options['lightweight_rpc']: RpcClientFactory.initialize_threads() log_set_filename("%s.log" % options['name']) # Respond to Ctrl+C stopped = threading.Event() # Ensure that threads are .start()ed before we possibly try to .join() them setup_complete = threading.Event() def signal_handler(*args, **kwargs): """Params undefined because gevent vs. threading pass different things to handler """ if not setup_complete.is_set(): log.warning("Terminated during setup, exiting hard") os._exit(0) if not options['lightweight_rpc']: RpcClientFactory.shutdown_threads() for service_thread in service_mains: log.info("Stopping %s" % service_thread.service.name) service_thread.service.stop() for service_thread in service_mains: log.info("Joining %s" % service_thread.service.name) service_thread.join() stopped.set() if options['gevent']: import gevent gevent.signal(signal.SIGINT, signal_handler) gevent.signal(signal.SIGTERM, signal_handler) else: signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) service_mains = [] for service_name in args: module_path = "chroma_core.services.%s" % service_name # Load the module mod = __import__(module_path) components = module_path.split('.') for comp in components[1:]: mod = getattr(mod, comp) service = getattr(mod, 'Service')() service.log = log_register(service.name) service_thread = ServiceThread(service) service_thread.start() service_mains.append(service_thread) setup_complete.set() while not stopped.is_set(): # Using a timeout changes the behaviour of CPython's waiting so that it will # receive signals (like ctrl-c SIGINT) immediately -- logically we don't want # any timeout here, but a pure wait() breaks ctrl-c. stopped.wait(10) if len(threading.enumerate()) > 1 and not options['gevent']: log.error("Rogue thread still running, exiting hard") log.error([t.name for t in threading.enumerate()]) os._exit(-1)