class Controller(Thread): '''The controller is the link between the transport layer and the resources layer. Basically, its job is to load resources modules and objects and to call their generic "process" method. ''' def __init__(self, tq=None, pq=None): self.logger.debug("Initializing the controller...") Thread.__init__(self, name="CONTROLLER") self.tq = tq self.pq = pq self.scheduler = SynSched() self.locator = ResourceLocator(pq) self.alerter = AlertsController(self.locator, self.scheduler, pq) self.logger.debug("Controller successfully initialized.") def start_scheduler(self): # Start the scheduler thread self.scheduler.start() self.alerter.start() # Prepopulate tasks from config file if config.monitor['enable_monitoring']: self._enable_monitoring() if config.compliance['enable_compliance']: self._enable_compliance() def _get_monitor_interval(self, resource): try: default_interval = config.monitor['default_interval'] return int(config.monitor.get(resource, default_interval)) except ValueError: return default_interval def _get_compliance_interval(self, resource): try: default_interval = config.compliance['default_interval'] return int(config.compliance.get(resource, default_interval)) except ValueError: return default_interval def _enable_monitoring(self): resources = self.locator.get_instance() for resource in resources.values(): if not len(resource.states): continue interval = self._get_monitor_interval(resource.__resource__) self.scheduler.add_job(resource.monitor_states, interval) def _enable_compliance(self): resources = self.locator.get_instance() for resource in resources.values(): if not len(resource.states): continue interval = self._get_compliance_interval(resource.__resource__) self.scheduler.add_job(resource.check_compliance, interval) def stop_scheduler(self): # Shutdown the scheduler/monitor self.logger.debug("Shutting down global scheduler...") if self.scheduler.isAlive(): self.scheduler.shutdown() self.scheduler.join() self.logger.debug("Scheduler stopped.") def close(self): # Stop this thread by putting a stop message in the blocking queue get self.tq.put("stop") # Close properly each resource try: for resource in self.locator.get_instance().itervalues(): resource.close() except ResourceException, e: self.logger.debug(str(e)) self.stop_scheduler()
class Dispatcher(object): """This module dispatches commands incoming from the command line to specific transports. It is also responsible for starting threads and catching signals like SIGINT and SIGTERM. """ def __init__(self, transport): self.transport = transport self.force_close = False # Handle signals #signal.signal(signal.SIGINT, self.stop) signal.signal(signal.SIGTERM, self.stop) # Threads instances variables self.controller = None self.sched = None self.resourcefile = None # These queues will be shared between the controller and the # transport and are used for incoming tasks and responses self.pq = Queue() self.tq = Queue() def stop(self, signum, frame): """This method handles SIGINT and SIGTERM signals. """ self.logger.debug("Stopping due to signal #%d" % signum) self.stop_synapse() def stop_synapse(self): """Closes all threads and exits properly. """ if self.resourcefile: self.resourcefile.done = True # Close the controller and wait for it to quit if self.controller: if self.controller.isAlive(): self.controller.close() self.controller.join() self.logger.debug("Controller thread stopped") # Shutdown the scheduler/monitor if self.sched: if self.sched.isAlive(): self.sched.shutdown() self.sched.join() self.logger.debug("Scheduler stopped") self.force_close = True self.logger.info("Successfully stopped.") def dispatch(self): """This method actually dispatches to specific transport methods according to command line parameters. """ self.logger.info('Starting on %s transport' % self.transport.capitalize()) transports = { 'amqp': self.start_amqp, 'http': self.start_resourcefile, 'file': self.start_resourcefile, } try: transports[self.transport]() except KeyError as err: self.logger.error("Transport unknown. [%s]" % err) self.stop_synapse() sys.exit() def start_amqp(self): """Starts all needed threads: scheduler, controller and AMQP transport IOLOOP. """ retry_timeout = config.rabbitmq['retry_timeout'] try: self.sched = SynSched() self.controller = Controller(scheduler=self.sched, tq=self.tq, pq=self.pq) # Start the controller self.controller.start() # Start the scheduler self.sched.start() self.amqpsynapse = AmqpSynapse(config.rabbitmq, pq=self.pq, tq=self.tq) while not self.force_close: try: self.amqpsynapse.run() except (AmqpError, IOError, AttributeError, TypeError) as err: self.logger.error(err) try: self.logger.debug("Sleeping %d sec" % retry_timeout) time.sleep(retry_timeout) except KeyboardInterrupt: self.stop_synapse() except KeyboardInterrupt: self.stop_synapse() except SystemExit: pass except ResourceException as err: self.logger.error(str(err)) def start_resourcefile(self): """This method handles the --uri file and --uri http commands. """ from synapse.resourcefile import ResourceFile try: self.resourcefile = ResourceFile(self.transport) self.resourcefile.fetch() except KeyboardInterrupt: self.stop_synapse()
class NagiosPluginsController(ResourcesController): __resource__ = "nagios" def __init__(self, module): super(NagiosPluginsController, self).__init__(module) self.path = self._configure() self.plugins = {} self._load_configs() self.scheduler = SynSched() self.scheduler.start() self._load_jobs() self.scheduler.add_job(self._reload, 30) def read(self, res_id=None, attributes=None): sensors = attributes.keys() status = {} for sensor in sensors: if sensor in self.plugins.keys(): status[sensor] = exec_cmd(self.plugins[sensor]['command']) return status def _configure(self): config_path = os.path.join(config.paths['config_path'], 'nagios.d') if not os.path.exists(config_path): os.makedirs(config_path) return config_path def _reload(self): self._load_configs() self._load_jobs() def _load_configs(self): for conf_file in os.listdir(self.path): if not conf_file.endswith('.conf'): continue full_path = os.path.join(self.path, conf_file) conf = RawConfigParser() conf.read(full_path) for section in conf.sections(): if section not in self.plugins: self.plugins[section] = dict(conf.items(section)) self.plugins[section]['scheduled'] = False def _load_jobs(self): for key, value in self.plugins.iteritems(): if value['scheduled']: continue try: interval = int(value['interval']) command = value['command'] if os.path.exists(command.split()[0]): self.scheduler.add_job(self._execute, interval, actionargs=(key, command)) self.plugins[key]['scheduled'] = True else: self.logger.warning("%s doesn't exist" % command) except ValueError: self.logger.warning("Interval value for %s must be an int" % key) except KeyError as err: self.logger.warning("Error when parsing %s (%s)" % (self.path, key)) def _execute(self, name, cmd): result = exec_cmd(cmd) if result['returncode'] != 0: result['name'] = name msg = OutgoingMessage(collection=self.__resource__, status=result, msg_type='alert') self.publish(msg) def close(self): super(NagiosPluginsController, self).close() self.logger.debug("Shutting down nagios scheduler") self.scheduler.shutdown()