class Reactionner(Satellite): """ This class is an application that launches actions for the schedulers Actions can be: Notifications Event handlers When running the Reactionner will : Respond to pings from Arbiter Listen for new configurations from Arbiter The configuration consists of a list of Schedulers for which the Reactionner will launch actions for. """ do_checks = False # I do not do checks do_actions = True my_type = 'reactionner' properties = Satellite.properties.copy() properties.update({ 'pidfile': PathProp(default='reactionnerd.pid'), 'port': IntegerProp(default=7769), 'local_log': PathProp(default='reactionnerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''): super(Reactionner, self).__init__('reactionner', config_file, is_daemon, do_replace, debug, debug_file)
class Poller(Satellite): """Poller class. Referenced as "app" in most Interface """ do_checks = True # I do checks do_actions = False # but no actions my_type = 'poller' properties = Satellite.properties.copy() properties.update({ 'daemon_type': StringProp(default='poller'), 'pidfile': PathProp(default='pollerd.pid'), 'port': IntegerProp(default=7771), 'local_log': PathProp(default='pollerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, port=None, local_log=None, daemon_name=None): self.daemon_name = 'poller' if daemon_name: self.daemon_name = daemon_name super(Poller, self).__init__(self.daemon_name, config_file, is_daemon, do_replace, debug, debug_file, port, local_log)
class Receiver(Satellite): """Receiver class. Referenced as "app" in most Interface """ my_type = 'receiver' properties = Satellite.properties.copy() properties.update({ 'pidfile': PathProp(default='receiverd.pid'), 'port': IntegerProp(default=7773), 'local_log': PathProp(default='receiverd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file): super(Receiver, self).__init__('receiver', config_file, is_daemon, do_replace, debug, debug_file) # Our arbiters self.arbiters = {} # Our pollers and reactionners self.pollers = {} self.reactionners = {} # Modules are load one time self.have_modules = False # Can have a queue of external_commands give by modules # will be taken by arbiter to process self.external_commands = [] # and the unprocessed one, a buffer self.unprocessed_external_commands = [] self.host_assoc = {} self.direct_routing = False self.accept_passive_unknown_check_results = False self.http_interface = ReceiverInterface(self) # Now create the external commander. It's just here to dispatch # the commands to schedulers ecm = ExternalCommandManager(None, 'receiver') ecm.load_receiver(self) self.external_command = ecm def add(self, elt): """Add an object to the receiver one Handles brok and externalcommand :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks[elt._id] = elt return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command: %s", str(ExternalCommand.__dict__)) self.unprocessed_external_commands.append(elt) def push_host_names(self, sched_id, hnames): """Link hostnames to scheduler id. Called by alignak.satellite.IForArbiter.push_host_names :param sched_id: scheduler id to link to :type sched_id: int :param hnames: host names list :type hnames: list :return: None """ for h_name in hnames: self.host_assoc[h_name] = sched_id def get_sched_from_hname(self, hname): """Get scheduler linked to the given host_name :param hname: host_name we want the scheduler from :type hname: str :return: scheduler with id corresponding to the mapping table :rtype: dict """ item = self.host_assoc.get(hname, None) sched = self.schedulers.get(item, None) return sched def manage_brok(self, brok): """Send brok to modules. Modules have to implement their own manage_brok function. They usually do if they inherits from basemodule REF: doc/receiver-modules.png (4-5) :param brok: brok to manage :type brok: alignak.brok.Brok :return: None """ to_del = [] # Call all modules if they catch the call for mod in self.modules_manager.get_internal_instances(): try: mod.manage_brok(brok) except Exception, exp: logger.warning("The mod %s raise an exception: %s, I kill it", mod.get_name(), str(exp)) logger.warning("Exception type: %s", type(exp)) logger.warning("Back trace of this kill: %s", traceback.format_exc()) to_del.append(mod) # Now remove mod that raise an exception self.modules_manager.clear_instances(to_del)
class Broker(BaseSatellite): """ Class to manage a Broker daemon A Broker is used to get data from Scheduler and send them to modules. These modules in most cases export to other softwares, databases... """ properties = BaseSatellite.properties.copy() properties.update({ 'pidfile': PathProp(default='brokerd.pid'), 'port': IntegerProp(default=7772), 'local_log': PathProp(default='brokerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''): super(Broker, self).__init__('broker', config_file, is_daemon, do_replace, debug, debug_file) # Our arbiters self.arbiters = {} # Our pollers, reactionners and receivers self.pollers = {} self.reactionners = {} self.receivers = {} # Modules are load one time self.have_modules = False # Can have a queue of external_commands given by modules # will be processed by arbiter self.external_commands = [] # All broks to manage self.broks = [] # broks to manage # broks raised this turn and that needs to be put in self.broks self.broks_internal_raised = [] # broks raised by the arbiters, we need a lock so the push can be in parallel # to our current activities and won't lock the arbiter self.arbiter_broks = [] self.arbiter_broks_lock = threading.RLock() self.timeout = 1.0 self.http_interface = BrokerInterface(self) def add(self, elt): """Add elt to this broker Original comment : Schedulers have some queues. We can simplify the call by adding elements into the proper queue just by looking at their type Brok -> self.broks TODO: better tag ID? External commands -> self.external_commands :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks_internal_raised.append(elt) return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command '%s'", str(ExternalCommand.__dict__)) self.external_commands.append(elt) # Maybe we got a Message from the modules, it's way to ask something # like from now a full data from a scheduler for example. elif cls_type == 'message': # We got a message, great! logger.debug(str(elt.__dict__)) if elt.get_type() == 'NeedData': data = elt.get_data() # Full instance id means: I got no data for this scheduler # so give me all dumbass! if 'full_instance_id' in data: c_id = data['full_instance_id'] source = elt.source logger.info('The module %s is asking me to get all initial data ' 'from the scheduler %d', source, c_id) # so we just reset the connection and the running_id, # it will just get all new things try: self.schedulers[c_id]['con'] = None self.schedulers[c_id]['running_id'] = 0 except KeyError: # maybe this instance was not known, forget it logger.warning("the module %s ask me a full_instance_id " "for an unknown ID (%d)!", source, c_id) # Maybe a module tells me that it's dead, I must log it's last words... if elt.get_type() == 'ICrash': data = elt.get_data() logger.error('the module %s just crash! Please look at the traceback:', data['name']) logger.error(data['trace']) # The module death will be looked for elsewhere and restarted. def get_links_from_type(self, d_type): """If d_type parameter is in list, return this object linked, else None :param d_type: name of object :type d_type: str :return: return the object linked :rtype: object """ s_type = {'scheduler': self.schedulers, 'arbiter': self.arbiters, 'poller': self.pollers, 'reactionner': self.reactionners, 'receiver': self.receivers } if d_type in s_type: return s_type[d_type] return None def is_connection_try_too_close(self, elt): """Check if last_connection has been made very recently :param elt: list with last_connection property :type elt: list :return: True if last connection has been made less than 5 seconds :rtype: bool """ now = time.time() last_connection = elt['last_connection'] if now - last_connection < 5: return True return False def pynag_con_init(self, _id, i_type='scheduler'): """Wrapper function for the real function do_ just for timing the connection :param _id: id :type _id: int :param i_type: type of item :type i_type: str :return: do_pynag_con_init return always True, so we return always True :rtype: bool """ _t0 = time.time() res = self.do_pynag_con_init(_id, i_type) statsmgr.incr('con-init.%s' % i_type, time.time() - _t0) return res def do_pynag_con_init(self, s_id, i_type='scheduler'): """Initialize or re-initialize connection with scheduler or arbiter if type == arbiter :param s_id: s_id :type s_id: int :param i_type: type of item :type i_type: str :return: None """ # Get the good links tab for looping.. links = self.get_links_from_type(i_type) if links is None: logger.debug('Type unknown for connection! %s', i_type) return # default timeout for daemons like pollers/reactionners/... timeout = 3 data_timeout = 120 if i_type == 'scheduler': # If sched is not active, I do not try to init # it is just useless is_active = links[s_id]['active'] if not is_active: return # schedulers also got real timeout to respect timeout = links[s_id]['timeout'] data_timeout = links[s_id]['data_timeout'] # If we try to connect too much, we slow down our tests if self.is_connection_try_too_close(links[s_id]): return # Ok, we can now update it links[s_id]['last_connection'] = time.time() # DBG: print "Init connection with", links[s_id]['uri'] running_id = links[s_id]['running_id'] # DBG: print "Running id before connection", running_id uri = links[s_id]['uri'] try: con = links[s_id]['con'] = HTTPClient(uri=uri, strong_ssl=links[s_id]['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout) except HTTPEXCEPTIONS, exp: # But the multiprocessing module is not compatible with it! # so we must disable it immediately after logger.info("Connection problem to the %s %s: %s", i_type, links[s_id]['name'], str(exp)) links[s_id]['con'] = None return try: # initial ping must be quick con.get('ping') new_run_id = con.get('get_running_id') new_run_id = float(new_run_id) # data transfer can be longer # The schedulers have been restarted: it has a new run_id. # So we clear all verifs, they are obsolete now. if new_run_id != running_id: logger.debug("[%s] New running s_id for the %s %s: %s (was %s)", self.name, i_type, links[s_id]['name'], new_run_id, running_id) links[s_id]['broks'].clear() # we must ask for a new full broks if # it's a scheduler if i_type == 'scheduler': logger.debug("[%s] I ask for a broks generation to the scheduler %s", self.name, links[s_id]['name']) con.get('fill_initial_broks', {'bname': self.name}, wait='long') # Ok all is done, we can save this new running s_id links[s_id]['running_id'] = new_run_id except HTTPEXCEPTIONS, exp: logger.info("Connection problem to the %s %s: %s", i_type, links[s_id]['name'], str(exp)) links[s_id]['con'] = None return
class Alignak(BaseSatellite): """Scheduler class. Referenced as "app" in most Interface """ properties = BaseSatellite.properties.copy() properties.update({ 'daemon_type': StringProp(default='scheduler'), 'pidfile': PathProp(default='schedulerd.pid'), 'port': IntegerProp(default=7768), 'local_log': PathProp(default='schedulerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, port=None, local_log=None, daemon_name=None): self.daemon_name = 'scheduler' if daemon_name: self.daemon_name = daemon_name BaseSatellite.__init__(self, self.daemon_name, config_file, is_daemon, do_replace, debug, debug_file, port, local_log) self.http_interface = SchedulerInterface(self) self.sched = Scheduler(self) self.must_run = True # Now the interface self.uri = None self.uri2 = None # stats part # --- copied from scheduler.py self.nb_pulled_checks = 0 self.nb_pulled_actions = 0 # self.nb_checks_send = 0 self.nb_pushed_checks = 0 self.nb_pushed_actions = 0 self.nb_broks_send = 0 self.nb_pulled_broks = 0 # --- # And possible links for satellites # from now only pollers self.pollers = {} self.reactionners = {} self.brokers = {} def compensate_system_time_change(self, difference, timeperiods): # pragma: no cover, # not with unit tests """Compensate a system time change of difference for all hosts/services/checks/notifs :param difference: difference in seconds :type difference: int :return: None """ logger.warning( "A system time change of %d has been detected. Compensating...", difference) # We only need to change some value self.program_start = max(0, self.program_start + difference) if not hasattr(self.sched, "conf"): # Race condition where time change before getting conf return # Then we compensate all host/services for host in self.sched.hosts: host.compensate_system_time_change(difference) for serv in self.sched.services: serv.compensate_system_time_change(difference) # Now all checks and actions for chk in self.sched.checks.values(): # Already launch checks should not be touch if chk.status == 'scheduled' and chk.t_to_go is not None: t_to_go = chk.t_to_go ref = self.sched.find_item_by_id(chk.ref) new_t = max(0, t_to_go + difference) timeperiod = timeperiods[ref.check_period] if timeperiod is not None: # But it's no so simple, we must match the timeperiod new_t = timeperiod.get_next_valid_time_from_t(new_t) # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: chk.state = 'waitconsume' chk.exit_status = 2 chk.output = '(Error: there is no available check time after time change!)' chk.check_time = time.time() chk.execution_time = 0 else: chk.t_to_go = new_t ref.next_chk = new_t # Now all checks and actions for act in self.sched.actions.values(): # Already launch checks should not be touch if act.status == 'scheduled': t_to_go = act.t_to_go # Event handler do not have ref ref_id = getattr(act, 'ref', None) new_t = max(0, t_to_go + difference) # Notification should be check with notification_period if act.is_a == 'notification': ref = self.sched.find_item_by_id(ref_id) if ref.notification_period: # But it's no so simple, we must match the timeperiod notification_period = self.sched.timeperiods[ ref.notification_period] new_t = notification_period.get_next_valid_time_from_t( new_t) # And got a creation_time variable too act.creation_time += difference # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: act.state = 'waitconsume' act.exit_status = 2 act.output = '(Error: there is no available check time after time change!)' act.check_time = time.time() act.execution_time = 0 else: act.t_to_go = new_t def manage_signal(self, sig, frame): """Manage signals caught by the daemon signal.SIGUSR1 : dump_memory signal.SIGUSR2 : dump_object (nothing) signal.SIGTERM, signal.SIGINT : terminate process :param sig: signal caught by daemon :type sig: str :param frame: current stack frame :type frame: :return: None TODO: Refactor with Daemon one """ logger.info("scheduler process %d received a signal: %s", os.getpid(), str(sig)) # If we got USR1, just dump memory if sig == signal.SIGUSR1: self.sched.need_dump_memory = True elif sig == signal.SIGUSR2: # usr2, dump objects self.sched.need_objects_dump = True else: # if not, die :) logger.info("scheduler process %d is dying...", os.getpid()) self.sched.die() self.must_run = False Daemon.manage_signal(self, sig, frame) def do_loop_turn(self): """Scheduler loop turn Basically wait initial conf and run :return: None """ # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return logger.info("New configuration received") self.setup_new_conf() logger.info( "[%s] New configuration loaded, scheduling for Alignak: %s", self.name, self.sched.alignak_name) self.sched.run() def setup_new_conf(self): # pylint: disable=too-many-statements """Setup new conf received for scheduler :return: None """ with self.conf_lock: self.clean_previous_run() new_conf = self.new_conf logger.info("[%s] Sending us a configuration", self.name) conf_raw = new_conf['conf'] override_conf = new_conf['override_conf'] modules = new_conf['modules'] satellites = new_conf['satellites'] instance_name = new_conf['instance_name'] # Ok now we can save the retention data if hasattr(self.sched, 'conf'): self.sched.update_retention_file(forced=True) # horay, we got a name, we can set it in our stats objects statsmgr.register(instance_name, 'scheduler', statsd_host=new_conf['statsd_host'], statsd_port=new_conf['statsd_port'], statsd_prefix=new_conf['statsd_prefix'], statsd_enabled=new_conf['statsd_enabled']) t00 = time.time() try: conf = unserialize(conf_raw) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error( 'Cannot un-serialize configuration received from arbiter: %s', exp) logger.debug("Conf received at %d. Un-serialized in %d secs", t00, time.time() - t00) self.new_conf = None if 'scheduler_name' in new_conf: name = new_conf['scheduler_name'] else: name = instance_name self.name = name # Set my own process title self.set_proctitle(self.name) logger.info("[%s] Received a new configuration, containing: ", self.name) for key in new_conf: logger.info("[%s] - %s", self.name, key) logger.info("[%s] configuration identifiers: %s (%s)", self.name, new_conf['conf_uuid'], new_conf['push_flavor']) # Tag the conf with our data self.conf = conf self.conf.push_flavor = new_conf['push_flavor'] self.conf.alignak_name = new_conf['alignak_name'] self.conf.instance_name = instance_name self.conf.skip_initial_broks = new_conf['skip_initial_broks'] self.conf.accept_passive_unknown_check_results = \ new_conf['accept_passive_unknown_check_results'] self.cur_conf = conf self.override_conf = override_conf self.modules = unserialize(modules, True) self.satellites = satellites # Now We create our pollers, reactionners and brokers for sat_type in ['pollers', 'reactionners', 'brokers']: if sat_type not in satellites: continue for sat_id in satellites[sat_type]: # Must look if we already have it sats = getattr(self, sat_type) sat = satellites[sat_type][sat_id] sats[sat_id] = sat if sat['name'] in override_conf['satellitemap']: sat = dict(sat) # make a copy sat.update(override_conf['satellitemap'][sat['name']]) proto = 'http' if sat['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, sat['address'], sat['port']) sats[sat_id]['uri'] = uri sats[sat_id]['con'] = None sats[sat_id]['running_id'] = 0 sats[sat_id]['last_connection'] = 0 sats[sat_id]['connection_attempt'] = 0 sats[sat_id]['max_failed_connections'] = 3 setattr(self, sat_type, sats) logger.debug("We have our %s: %s ", sat_type, satellites[sat_type]) logger.info("We have our %s:", sat_type) for daemon in satellites[sat_type].values(): logger.info(" - %s ", daemon['name']) # First mix conf and override_conf to have our definitive conf for prop in self.override_conf: val = self.override_conf[prop] setattr(self.conf, prop, val) if self.conf.use_timezone != '': logger.info("Setting our timezone to %s", str(self.conf.use_timezone)) os.environ['TZ'] = self.conf.use_timezone time.tzset() self.do_load_modules(self.modules) logger.info("Loading configuration.") self.conf.explode_global_conf() # pylint: disable=E1101 # we give sched it's conf self.sched.reset() self.sched.load_conf(self.conf) self.sched.load_satellites(self.pollers, self.reactionners, self.brokers) # We must update our Config dict macro with good value # from the config parameters self.sched.conf.fill_resource_macros_names_macros() # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(self.conf) # self.conf.dump() # self.conf.quick_debug() # Now create the external commands manager # We are an applyer: our role is not to dispatch commands, but to apply them ecm = ExternalCommandManager(self.conf, 'applyer', self.sched) # Scheduler needs to know about this external command manager to use it if necessary self.sched.set_external_commands_manager(ecm) # Update External Commands Manager self.sched.external_commands_manager.accept_passive_unknown_check_results = \ self.sched.conf.accept_passive_unknown_check_results # We clear our schedulers managed (it's us :) ) # and set ourselves in it self.schedulers = {self.conf.uuid: self.sched} # pylint: disable=E1101 # Ok now we can load the retention data self.sched.retention_load() # Create brok new conf brok = Brok({'type': 'new_conf', 'data': {}}) self.sched.add_brok(brok) def what_i_managed(self): # pylint: disable=no-member """Get my managed dict (instance id and push_flavor) :return: dict containing instance_id key and push flavor value :rtype: dict """ if hasattr(self, 'conf'): return {self.conf.uuid: self.conf.push_flavor} # pylint: disable=E1101 return {} def clean_previous_run(self): """Clean variables from previous configuration :return: None """ # Clean all lists self.pollers.clear() self.reactionners.clear() self.brokers.clear() def main(self): """Main function for Scheduler, launch after the init:: * Init daemon * Load module manager * Launch main loop * Catch any Exception that occurs :return: None """ try: self.setup_alignak_logger() # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() # todo: # This function returns False if some problem is detected during initialization # (eg. communication port not free) # Perharps we should stop the initialization process and exit? if not self.do_daemon_init_and_start(): return self.load_modules_manager(self.name) self.uri = self.http_daemon.uri logger.info("[Scheduler] General interface is at: %s", self.uri) self.do_mainloop() except Exception: self.print_unrecoverable(traceback.format_exc()) raise
class Broker(BaseSatellite): """ Class to manage a Broker daemon A Broker is used to get data from Scheduler and send them to modules. These modules in most cases export to other software, databases... """ properties = BaseSatellite.properties.copy() properties.update({ 'daemon_type': StringProp(default='broker'), 'pidfile': PathProp(default='brokerd.pid'), 'port': IntegerProp(default=7772), 'local_log': PathProp(default='brokerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, port=None, local_log=None, daemon_name=None): self.daemon_name = 'broker' if daemon_name: self.daemon_name = daemon_name super(Broker, self).__init__(self.daemon_name, config_file, is_daemon, do_replace, debug, debug_file, port, local_log) # Our arbiters self.arbiters = {} # Our pollers, reactionners and receivers self.pollers = {} self.reactionners = {} self.receivers = {} # Modules are load one time self.have_modules = False # Can have a queue of external_commands given by modules # will be processed by arbiter self.external_commands = [] # All broks to manage self.broks = [] # broks to manage # broks raised this turn and that needs to be put in self.broks self.broks_internal_raised = [] # broks raised by the arbiters, we need a lock so the push can be in parallel # to our current activities and won't lock the arbiter self.arbiter_broks = [] self.arbiter_broks_lock = threading.RLock() self.timeout = 1.0 self.http_interface = BrokerInterface(self) def add(self, elt): # pragma: no cover, seems not to be used """Add elt to this broker Original comment : Schedulers have some queues. We can simplify the call by adding elements into the proper queue just by looking at their type Brok -> self.broks TODO: better tag ID? External commands -> self.external_commands TODO: is it useful? :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # We tag the broks with our instance_id elt.instance_id = self.instance_id self.broks_internal_raised.append(elt) return elif cls_type == 'externalcommand': self.external_commands.append(elt) # Maybe we got a Message from the modules, it's way to ask something # like from now a full data from a scheduler for example. elif cls_type == 'message': # We got a message, great! logger.debug(str(elt.__dict__)) if elt.get_type() == 'NeedData': data = elt.get_data() # Full instance id means: I got no data for this scheduler # so give me all dumb-ass! if 'full_instance_id' in data: c_id = data['full_instance_id'] source = elt.source logger.info('The module %s is asking me to get all initial data ' 'from the scheduler %d', source, c_id) # so we just reset the connection and the running_id, # it will just get all new things try: self.schedulers[c_id]['con'] = None self.schedulers[c_id]['running_id'] = 0 except KeyError: # maybe this instance was not known, forget it logger.warning("the module %s ask me a full_instance_id " "for an unknown ID (%d)!", source, c_id) # Maybe a module tells me that it's dead, I must log it's last words... if elt.get_type() == 'ICrash': data = elt.get_data() logger.error('the module %s just crash! Please look at the traceback:', data['name']) logger.error(data['trace']) # The module death will be looked for elsewhere and restarted. def manage_brok(self, brok): """Get a brok. We put brok data to the modules :param brok: object with data :type brok: object :return: None """ # Call all modules if they catch the call for mod in self.modules_manager.get_internal_instances(): try: _t0 = time.time() mod.manage_brok(brok) statsmgr.timer('core.manage-broks.%s' % mod.get_name(), time.time() - _t0) except Exception as exp: # pylint: disable=broad-except logger.warning("The mod %s raise an exception: %s, I'm tagging it to restart later", mod.get_name(), str(exp)) logger.exception(exp) self.modules_manager.set_to_restart(mod) def add_broks_to_queue(self, broks): """ Add broks to global queue :param broks: some items :type broks: object :return: None """ # Ok now put in queue broks to be managed by # internal modules self.broks.extend(broks) def interger_internal_broks(self): """Get all broks from self.broks_internal_raised and we put them in self.broks :return: None """ self.add_broks_to_queue(self.broks_internal_raised) self.broks_internal_raised = [] def interger_arbiter_broks(self): """We will get in the broks list the broks from the arbiters, but as the arbiter_broks list can be push by arbiter without Global lock, we must protect this with he list lock :return: None """ with self.arbiter_broks_lock: self.add_broks_to_queue(self.arbiter_broks) self.arbiter_broks = [] def get_new_broks(self, s_type='scheduler'): """Get new broks from daemon defined in type parameter :param s_type: type of object :type s_type: str :return: None """ # Get the good links tab for looping.. links = self.get_links_from_type(s_type) if links is None: logger.debug('Type unknown for connection! %s', s_type) return # We check for new check in each schedulers and put # the result in new_checks for s_id in links: logger.debug("Getting broks from %s", links[s_id]['name']) link = links[s_id] logger.debug("Link: %s", link) if not link['active']: logger.debug("The %s '%s' is not active, " "do not get broks from its connection!", s_type, link['name']) continue if link['con'] is None: if not self.daemon_connection_init(s_id, s_type=s_type): if link['connection_attempt'] <= link['max_failed_connections']: logger.warning("The connection for the %s '%s' cannot be established, " "it is not possible to get broks from this daemon.", s_type, link['name']) else: logger.error("The connection for the %s '%s' cannot be established, " "it is not possible to get broks from this daemon.", s_type, link['name']) continue try: _t0 = time.time() tmp_broks = link['con'].get('get_broks', {'bname': self.name}, wait='long') try: tmp_broks = unserialize(tmp_broks, True) except AlignakClassLookupException as exp: # pragma: no cover, # simple protection logger.error('Cannot un-serialize data received from "get_broks" call: %s', exp) continue if tmp_broks: logger.debug("Got %d Broks from %s in %s", len(tmp_broks), link['name'], time.time() - _t0) statsmgr.timer('con-broks-get.%s' % (link['name']), time.time() - _t0) statsmgr.gauge('con-broks-count.%s' % (link['name']), len(tmp_broks.values())) for brok in tmp_broks.values(): brok.instance_id = link['instance_id'] # Ok, we can add theses broks to our queues _t0 = time.time() self.add_broks_to_queue(tmp_broks.values()) statsmgr.timer('con-broks-add.%s' % s_type, time.time() - _t0) except HTTPClientConnectionException as exp: # pragma: no cover, simple protection logger.warning("[%s] %s", link['name'], str(exp)) link['con'] = None return except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning("Connection timeout with the %s '%s' when getting broks: %s", s_type, link['name'], str(exp)) link['con'] = None return except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("Error with the %s '%s' when getting broks: %s", s_type, link['name'], str(exp)) link['con'] = None return # scheduler must not have checks # What the F**k? We do not know what happened, # so.. bye bye :) except Exception as exp: # pylint: disable=broad-except logger.exception(exp) sys.exit(1) def get_retention_data(self): # pragma: no cover, useful? """Get all broks TODO: using retention in the broker is dangerous and do not seem of any utility with Alignak :return: broks container :rtype: object """ return self.broks def restore_retention_data(self, data): # pragma: no cover, useful? """Add data to broks container TODO: using retention in the arbiter is dangerous and do not seem of any utility with Alignak :param data: broks to add :type data: list :return: None """ self.broks.extend(data) def do_stop(self): """Stop all children of this process :return: None """ act = active_children() for child in act: child.terminate() child.join(1) super(Broker, self).do_stop() def setup_new_conf(self): # pylint: disable=R0915,R0912 """Parse new configuration and initialize all required :return: None """ with self.conf_lock: self.clean_previous_run() conf = unserialize(self.new_conf, True) self.new_conf = None self.cur_conf = conf # Got our name from the globals g_conf = conf['global'] if 'broker_name' in g_conf: name = g_conf['broker_name'] else: name = 'Unnamed broker' self.name = name # Set my own process title self.set_proctitle(self.name) logger.info("[%s] Received a new configuration, containing:", self.name) for key in conf: logger.info("[%s] - %s", self.name, key) logger.debug("[%s] global configuration part: %s", self.name, conf['global']) # local statsd self.statsd_host = g_conf['statsd_host'] self.statsd_port = g_conf['statsd_port'] self.statsd_prefix = g_conf['statsd_prefix'] self.statsd_enabled = g_conf['statsd_enabled'] # We got a name so we can update the logger and the stats global objects statsmgr.register(name, 'broker', statsd_host=self.statsd_host, statsd_port=self.statsd_port, statsd_prefix=self.statsd_prefix, statsd_enabled=self.statsd_enabled) # Get our Schedulers for sched_id in conf['schedulers']: # Must look if we already have it to do not overdie our broks old_sched_id = self.get_previous_sched_id(conf['schedulers'][sched_id], sched_id) if old_sched_id: logger.info("[%s] We already got the conf %s (%s)", self.name, old_sched_id, name) broks = self.schedulers[old_sched_id]['broks'] running_id = self.schedulers[old_sched_id]['running_id'] del self.schedulers[old_sched_id] else: broks = {} running_id = 0 sched = conf['schedulers'][sched_id] self.schedulers[sched_id] = sched # replacing scheduler address and port by those defined in satellitemap if sched['name'] in g_conf['satellitemap']: sched = dict(sched) # make a copy sched.update(g_conf['satellitemap'][sched['name']]) # todo: why not using a SatteliteLink object? proto = 'http' if sched['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, sched['address'], sched['port']) self.schedulers[sched_id]['uri'] = uri self.schedulers[sched_id]['broks'] = broks self.schedulers[sched_id]['instance_id'] = sched['instance_id'] self.schedulers[sched_id]['running_id'] = running_id self.schedulers[sched_id]['active'] = sched['active'] self.schedulers[sched_id]['last_connection'] = 0 self.schedulers[sched_id]['timeout'] = sched['timeout'] self.schedulers[sched_id]['data_timeout'] = sched['data_timeout'] self.schedulers[sched_id]['con'] = None self.schedulers[sched_id]['last_connection'] = 0 self.schedulers[sched_id]['connection_attempt'] = 0 self.schedulers[sched_id]['max_failed_connections'] = 3 logger.debug("We have our schedulers: %s", self.schedulers) logger.info("We have our schedulers:") for daemon in self.schedulers.values(): logger.info(" - %s ", daemon['name']) # Now get arbiters for arb_id in conf['arbiters']: # Must look if we already have it already_got = arb_id in self.arbiters if already_got: broks = self.arbiters[arb_id]['broks'] else: broks = {} arb = conf['arbiters'][arb_id] self.arbiters[arb_id] = arb # replacing arbiter address and port by those defined in satellitemap if arb['name'] in g_conf['satellitemap']: arb = dict(arb) # make a copy arb.update(g_conf['satellitemap'][arb['name']]) # todo: why not using a SatteliteLink object? proto = 'http' if arb['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, arb['address'], arb['port']) self.arbiters[arb_id]['uri'] = uri self.arbiters[arb_id]['broks'] = broks self.arbiters[arb_id]['instance_id'] = 0 # No use so all to 0 self.arbiters[arb_id]['running_id'] = 0 self.arbiters[arb_id]['con'] = None self.arbiters[arb_id]['last_connection'] = 0 self.arbiters[arb_id]['connection_attempt'] = 0 self.arbiters[arb_id]['max_failed_connections'] = 3 # We do not connect to the arbiter. Connection hangs logger.debug("We have our arbiters: %s ", self.arbiters) logger.info("We have our arbiters:") for daemon in self.arbiters.values(): logger.info(" - %s ", daemon['name']) # Now for pollers # 658: temporary fix if 'pollers' in conf: for pol_id in conf['pollers']: # Must look if we already have it already_got = pol_id in self.pollers if already_got: broks = self.pollers[pol_id]['broks'] running_id = self.pollers[pol_id]['running_id'] else: broks = {} running_id = 0 poll = conf['pollers'][pol_id] self.pollers[pol_id] = poll # replacing poller address and port by those defined in satellitemap if poll['name'] in g_conf['satellitemap']: poll = dict(poll) # make a copy poll.update(g_conf['satellitemap'][poll['name']]) # todo: why not using a SatteliteLink object? proto = 'http' if poll['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, poll['address'], poll['port']) self.pollers[pol_id]['uri'] = uri self.pollers[pol_id]['broks'] = broks self.pollers[pol_id]['instance_id'] = 0 # No use so all to 0 self.pollers[pol_id]['running_id'] = running_id self.pollers[pol_id]['con'] = None self.pollers[pol_id]['last_connection'] = 0 self.pollers[pol_id]['connection_attempt'] = 0 self.pollers[pol_id]['max_failed_connections'] = 3 else: logger.warning("[%s] no pollers in the received configuration", self.name) logger.debug("We have our pollers: %s", self.pollers) logger.info("We have our pollers:") for daemon in self.pollers.values(): logger.info(" - %s ", daemon['name']) # Now reactionners # 658: temporary fix if 'reactionners' in conf: for rea_id in conf['reactionners']: # Must look if we already have it already_got = rea_id in self.reactionners if already_got: broks = self.reactionners[rea_id]['broks'] running_id = self.reactionners[rea_id]['running_id'] else: broks = {} running_id = 0 reac = conf['reactionners'][rea_id] self.reactionners[rea_id] = reac # replacing reactionner address and port by those defined in satellitemap if reac['name'] in g_conf['satellitemap']: reac = dict(reac) # make a copy reac.update(g_conf['satellitemap'][reac['name']]) # todo: why not using a SatteliteLink object? proto = 'http' if reac['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, reac['address'], reac['port']) self.reactionners[rea_id]['uri'] = uri self.reactionners[rea_id]['broks'] = broks self.reactionners[rea_id]['instance_id'] = 0 # No use so all to 0 self.reactionners[rea_id]['running_id'] = running_id self.reactionners[rea_id]['con'] = None self.reactionners[rea_id]['last_connection'] = 0 self.reactionners[rea_id]['connection_attempt'] = 0 self.reactionners[rea_id]['max_failed_connections'] = 3 else: logger.warning("[%s] no reactionners in the received configuration", self.name) logger.debug("We have our reactionners: %s", self.reactionners) logger.info("We have our reactionners:") for daemon in self.reactionners.values(): logger.info(" - %s ", daemon['name']) # Now receivers # 658: temporary fix if 'receivers' in conf: for rec_id in conf['receivers']: # Must look if we already have it already_got = rec_id in self.receivers if already_got: broks = self.receivers[rec_id]['broks'] running_id = self.receivers[rec_id]['running_id'] else: broks = {} running_id = 0 rec = conf['receivers'][rec_id] self.receivers[rec_id] = rec # replacing reactionner address and port by those defined in satellitemap if rec['name'] in g_conf['satellitemap']: rec = dict(rec) # make a copy rec.update(g_conf['satellitemap'][rec['name']]) # todo: why not using a SatteliteLink object? proto = 'http' if rec['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, rec['address'], rec['port']) self.receivers[rec_id]['uri'] = uri self.receivers[rec_id]['broks'] = broks self.receivers[rec_id]['instance_id'] = rec['instance_id'] self.receivers[rec_id]['running_id'] = running_id self.receivers[rec_id]['con'] = None self.receivers[rec_id]['last_connection'] = 0 self.receivers[rec_id]['connection_attempt'] = 0 self.receivers[rec_id]['max_failed_connections'] = 3 else: logger.warning("[%s] no receivers in the received configuration", self.name) logger.debug("We have our receivers: %s", self.receivers) logger.info("We have our receivers:") for daemon in self.receivers.values(): logger.info(" - %s ", daemon['name']) if not self.have_modules: self.modules = conf['global']['modules'] self.have_modules = True # Ok now start, or restart them! # Set modules, init them and start external ones self.do_load_modules(self.modules) self.modules_manager.start_external_instances() # Set our giving timezone from arbiter use_timezone = conf['global']['use_timezone'] if use_timezone != 'NOTSET': logger.info("Setting our timezone to %s", use_timezone) os.environ['TZ'] = use_timezone time.tzset() # Initialize connection with Schedulers, Pollers and Reactionners for sched_id in self.schedulers: self.daemon_connection_init(sched_id, s_type='scheduler') for pol_id in self.pollers: self.daemon_connection_init(pol_id, s_type='poller') for rea_id in self.reactionners: self.daemon_connection_init(rea_id, s_type='reactionner') def clean_previous_run(self): """Clean all (when we received new conf) :return: None """ # Clean all lists self.schedulers.clear() self.pollers.clear() self.reactionners.clear() self.receivers.clear() self.broks = self.broks[:] self.arbiters.clear() self.broks_internal_raised = self.broks_internal_raised[:] with self.arbiter_broks_lock: self.arbiter_broks = self.arbiter_broks[:] self.external_commands = self.external_commands[:] # And now modules self.have_modules = False self.modules_manager.clear_instances() def get_stats_struct(self): """Get information of modules (internal and external) and add metrics of them :return: dictionary with state of all modules (internal and external) :rtype: dict :return: None """ now = int(time.time()) # call the daemon one res = super(Broker, self).get_stats_struct() res.update({'name': self.name, 'type': 'broker'}) metrics = res['metrics'] # metrics specific metrics.append('broker.%s.external-commands.queue %d %d' % ( self.name, len(self.external_commands), now)) metrics.append('broker.%s.broks.queue %d %d' % (self.name, len(self.broks), now)) return res def do_loop_turn(self): """Loop use to: * check if modules are alive, if not restart them * add broks to queue of each modules :return: None """ logger.debug("Begin Loop: managing old broks (%d)", len(self.broks)) # Dump modules Queues size insts = [inst for inst in self.modules_manager.instances if inst.is_external] for inst in insts: try: logger.debug("External Queue len (%s): %s", inst.get_name(), inst.to_q.qsize()) except Exception, exp: # pylint: disable=W0703 logger.debug("External Queue len (%s): Exception! %s", inst.get_name(), exp) # Begin to clean modules self.check_and_del_zombie_modules() # Now we check if we received a new configuration - no sleep time, we will sleep later... self.watch_for_new_conf() if self.new_conf: self.setup_new_conf() # Maybe the last loop we did raised some broks internally _t0 = time.time() # we should integrate them in broks self.interger_internal_broks() statsmgr.timer('get-new-broks.broker', time.time() - _t0) _t0 = time.time() # Also reap broks sent from the arbiters self.interger_arbiter_broks() statsmgr.timer('get-new-broks.arbiter', time.time() - _t0) # Main job, go get broks in our distant daemons types = ['scheduler', 'poller', 'reactionner', 'receiver'] for _type in types: _t0 = time.time() # And from schedulers self.get_new_broks(s_type=_type) statsmgr.timer('get-new-broks.%s' % _type, time.time() - _t0) # Sort the brok list by id self.broks.sort(sort_by_ids) # and for external queues # REF: doc/broker-modules.png (3) # We put to external queues broks that was not already send t00 = time.time() # We are sending broks as a big list, more efficient than one by one ext_modules = self.modules_manager.get_external_instances() to_send = [brok for brok in self.broks if getattr(brok, 'need_send_to_ext', True)] # Send our pack to all external modules to_q queue so they can get the whole packet # beware, the sub-process/queue can be die/close, so we put to restart the whole module # instead of killing ourselves :) for mod in ext_modules: try: t000 = time.time() mod.to_q.put(to_send) statsmgr.timer('core.put-to-external-queue.%s' % mod.get_name(), time.time() - t000) except Exception as exp: # pylint: disable=broad-except # first we must find the modules logger.warning("The mod %s queue raise an exception: %s, " "I'm tagging it to restart later", mod.get_name(), str(exp)) logger.exception(exp) self.modules_manager.set_to_restart(mod) # No more need to send them for brok in to_send: brok.need_send_to_ext = False statsmgr.timer('core.put-to-external-queue', time.time() - t00) logger.debug("Time to send %s broks (%d secs)", len(to_send), time.time() - t00) # We must add new broks at the end of the list, so we reverse the list self.broks.reverse() start = time.time() while self.broks: now = time.time() # Do not 'manage' more than 1s, we must get new broks # every 1s if now - start > 1: break brok = self.broks.pop() # Ok, we can get the brok, and doing something with it # REF: doc/broker-modules.png (4-5) # We un serialize the brok before consume it brok.prepare() _t0 = time.time() self.manage_brok(brok) statsmgr.timer('core.manage-broks', time.time() - _t0) nb_broks = len(self.broks) # Ok we manage brok, but we still want to listen to arbiter even for a very short time self.make_a_pause(0.01, check_time_change=False) # if we got new broks here from arbiter, we should break the loop # because such broks will not be managed by the # external modules before this loop (we pop them!) if len(self.broks) != nb_broks: break # Maybe external modules raised 'objects' # we should get them self.get_objects_from_from_queues() # Maybe we do not have something to do, so we wait a little # TODO: redone the diff management.... if not self.broks: while self.timeout > 0: begin = time.time() self.watch_for_new_conf(1.0) end = time.time() self.timeout = self.timeout - (end - begin) self.timeout = 1.0 # Say to modules it's a new tick :) self.hook_point('tick')
class Alignak(BaseSatellite): """Scheduler class. Referenced as "app" in most Interface """ properties = BaseSatellite.properties.copy() properties.update({ 'pidfile': PathProp(default='schedulerd.pid'), 'port': IntegerProp(default=7768), 'local_log': PathProp(default='schedulerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''): BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file) self.http_interface = SchedulerInterface(self) self.sched = Scheduler(self) self.must_run = True # Now the interface self.uri = None self.uri2 = None # And possible links for satellites # from now only pollers self.pollers = {} self.reactionners = {} self.brokers = {} def compensate_system_time_change(self, difference): """Compensate a system time change of difference for all hosts/services/checks/notifs :param difference: difference in seconds :type difference: int :return: None """ logger.warning( "A system time change of %d has been detected. Compensating...", difference) # We only need to change some value self.program_start = max(0, self.program_start + difference) if not hasattr(self.sched, "conf"): # Race condition where time change before getting conf return # Then we compensate all host/services for host in self.sched.hosts: host.compensate_system_time_change(difference) for serv in self.sched.services: serv.compensate_system_time_change(difference) # Now all checks and actions for chk in self.sched.checks.values(): # Already launch checks should not be touch if chk.status == 'scheduled' and chk.t_to_go is not None: t_to_go = chk.t_to_go ref = chk.ref new_t = max(0, t_to_go + difference) if ref.check_period is not None: # But it's no so simple, we must match the timeperiod new_t = ref.check_period.get_next_valid_time_from_t(new_t) # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: chk.state = 'waitconsume' chk.exit_status = 2 chk.output = '(Error: there is no available check time after time change!)' chk.check_time = time.time() chk.execution_time = 0 else: chk.t_to_go = new_t ref.next_chk = new_t # Now all checks and actions for act in self.sched.actions.values(): # Already launch checks should not be touch if act.status == 'scheduled': t_to_go = act.t_to_go # Event handler do not have ref ref = getattr(act, 'ref', None) new_t = max(0, t_to_go + difference) # Notification should be check with notification_period if act.is_a == 'notification': if ref.notification_period: # But it's no so simple, we must match the timeperiod new_t = ref.notification_period.get_next_valid_time_from_t( new_t) # And got a creation_time variable too act.creation_time += difference # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: act.state = 'waitconsume' act.exit_status = 2 act.output = '(Error: there is no available check time after time change!)' act.check_time = time.time() act.execution_time = 0 else: act.t_to_go = new_t def manage_signal(self, sig, frame): """Manage signals caught by the daemon signal.SIGUSR1 : dump_memory signal.SIGUSR2 : dump_object (nothing) signal.SIGTERM, signal.SIGINT : terminate process :param sig: signal caught by daemon :type sig: str :param frame: current stack frame :type frame: :return: None TODO: Refactor with Daemon one """ logger.warning("%s > Received a SIGNAL %s", process.current_process(), sig) # If we got USR1, just dump memory if sig == signal.SIGUSR1: self.sched.need_dump_memory = True elif sig == signal.SIGUSR2: # usr2, dump objects self.sched.need_objects_dump = True else: # if not, die :) self.sched.die() self.must_run = False Daemon.manage_signal(self, sig, frame) def do_loop_turn(self): """Scheduler loop turn Basically wait initial conf and run :return: None """ # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return logger.info("New configuration received") self.setup_new_conf() logger.info("New configuration loaded") self.sched.run() def setup_new_conf(self): """Setup new conf received for scheduler :return: None """ with self.conf_lock: new_c = self.new_conf conf_raw = new_c['conf'] override_conf = new_c['override_conf'] modules = new_c['modules'] satellites = new_c['satellites'] instance_name = new_c['instance_name'] push_flavor = new_c['push_flavor'] skip_initial_broks = new_c['skip_initial_broks'] accept_passive_unknown_chk_res = new_c[ 'accept_passive_unknown_check_results'] api_key = new_c['api_key'] secret = new_c['secret'] http_proxy = new_c['http_proxy'] statsd_host = new_c['statsd_host'] statsd_port = new_c['statsd_port'] statsd_prefix = new_c['statsd_prefix'] statsd_enabled = new_c['statsd_enabled'] # horay, we got a name, we can set it in our stats objects statsmgr.register(self.sched, instance_name, 'scheduler', api_key=api_key, secret=secret, http_proxy=http_proxy, statsd_host=statsd_host, statsd_port=statsd_port, statsd_prefix=statsd_prefix, statsd_enabled=statsd_enabled) t00 = time.time() conf = cPickle.loads(conf_raw) logger.debug("Conf received at %d. Unserialized in %d secs", t00, time.time() - t00) self.new_conf = None # Tag the conf with our data self.conf = conf self.conf.push_flavor = push_flavor self.conf.instance_name = instance_name self.conf.skip_initial_broks = skip_initial_broks self.conf.accept_passive_unknown_check_results = accept_passive_unknown_chk_res self.cur_conf = conf self.override_conf = override_conf self.modules = modules self.satellites = satellites # self.pollers = self.app.pollers if self.conf.human_timestamp_log: # pylint: disable=E1101 logger.set_human_format() # Now We create our pollers for pol_id in satellites['pollers']: # Must look if we already have it already_got = pol_id in self.pollers poll = satellites['pollers'][pol_id] self.pollers[pol_id] = poll if poll['name'] in override_conf['satellitemap']: poll = dict(poll) # make a copy poll.update(override_conf['satellitemap'][poll['name']]) proto = 'http' if poll['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, poll['address'], poll['port']) self.pollers[pol_id]['uri'] = uri self.pollers[pol_id]['last_connection'] = 0 # Now We create our reactionners for reac_id in satellites['reactionners']: # Must look if we already have it already_got = reac_id in self.reactionners reac = satellites['reactionners'][reac_id] self.reactionners[reac_id] = reac if reac['name'] in override_conf['satellitemap']: reac = dict(reac) # make a copy reac.update(override_conf['satellitemap'][reac['name']]) proto = 'http' if poll['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, reac['address'], reac['port']) self.reactionners[reac_id]['uri'] = uri self.reactionners[reac_id]['last_connection'] = 0 # First mix conf and override_conf to have our definitive conf for prop in self.override_conf: val = self.override_conf[prop] setattr(self.conf, prop, val) if self.conf.use_timezone != '': logger.debug("Setting our timezone to %s", str(self.conf.use_timezone)) os.environ['TZ'] = self.conf.use_timezone time.tzset() if len(self.modules) != 0: logger.debug("I've got %s modules", str(self.modules)) # TODO: if scheduler had previous modules instanciated it must clean them! self.do_load_modules(self.modules) logger.info("Loading configuration.") self.conf.explode_global_conf() # we give sched it's conf self.sched.reset() self.sched.load_conf(self.conf) self.sched.load_satellites(self.pollers, self.reactionners) # We must update our Config dict macro with good value # from the config parameters self.sched.conf.fill_resource_macros_names_macros() # print "DBG: got macros", self.sched.conf.macros # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(self.conf) # self.conf.dump() # self.conf.quick_debug() # Now create the external commander # it's a applyer: it role is not to dispatch commands, # but to apply them ecm = ExternalCommandManager(self.conf, 'applyer') # Scheduler need to know about external command to # activate it if necessary self.sched.load_external_command(ecm) # External command need the sched because he can raise checks ecm.load_scheduler(self.sched) # We clear our schedulers managed (it's us :) ) # and set ourself in it self.schedulers = {self.conf.instance_id: self.sched} def what_i_managed(self): """Get my managed dict (instance id and push_flavor) :return: dict containing instance_id key and push flavor value :rtype: dict """ if hasattr(self, 'conf'): return {self.conf.instance_id: self.conf.push_flavor} else: return {} def main(self): """Main function for Scheduler, launch after the init:: * Init daemon * Load module manager * Launch main loop * Catch any Exception that occurs :return: None """ try: self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') self.look_for_early_exit() self.do_daemon_init_and_start() self.load_modules_manager() self.uri = self.http_daemon.uri logger.info("[scheduler] General interface is at: %s", self.uri) self.do_mainloop() except Exception, exp: self.print_unrecoverable(traceback.format_exc()) raise