def _any_brok_match(self, pattern, level, assert_not): """ Search if any brok message in the Scheduler broks matches the requested pattern and requested level @verified :param pattern: :param assert_not: :return: """ regex = re.compile(pattern) monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) if re.search(regex, data['message']) and (level is None or data['level'] == level): self.assertTrue( not assert_not, "Found matching brok:\n" "pattern = %r\nbrok message = %r" % (pattern, data['message'])) return self.assertTrue( assert_not, "No matching brok found:\n" "pattern = %r\n" "brok message = %r" % (pattern, monitoring_logs))
def put_results(self): """Put results to scheduler, used by poller or reactionner when they are in active mode (passive = False) This function is not intended for external use. Let the poller and reactionner manage all this stuff by themselves ;) :param from: poller/reactionner identification :type from: str :param results: list of actions results :type results: list :return: True :rtype: bool """ res = cherrypy.request.json who_sent = res['from'] results = res['results'] results = unserialize(results, no_load=True) if results: logger.debug("Got some results: %d results from %s", len(results), who_sent) else: logger.debug("-> no results") for result in results: logger.debug("-> result: %s", result) # Append to the scheduler result queue self.app.sched.waiting_results.put(result) return True
def check_monitoring_logs(self, expected_logs, dump=False): """ :param expected_logs: expected monitoring logs :param dump: True to print out the monitoring logs :return: """ # Our scheduler self._sched = self.schedulers['scheduler-master'].sched # Our broker self._broker = self._sched.brokers['broker-master'] # We got 'monitoring_log' broks for logging to the monitoring logs... monitoring_logs = [] for brok in sorted(self._broker['broks'].itervalues(), key=lambda x: x.creation_time): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) if dump: print("Monitoring logs: %s" % monitoring_logs) for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs assert len(expected_logs) == len(monitoring_logs), monitoring_logs
def test_flexible_downtime_service(self): """Test broks when downtime :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") # To make tests quicker we make notifications send very quickly svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 0, 'OK']]) time.sleep(0.1) # schedule a 5 seconds downtime duration = 5 now = int(time.time()) # downtime valid for 5 seconds from now cmd = "[%lu] SCHEDULE_SVC_DOWNTIME;test_host_0;test_ok_0;%d;%d;0;0;%d;" \ "downtime author;downtime comment" % (now, now, now + 3600, duration) self._scheduler.run_external_commands([cmd]) self.scheduler_loop(2, [[host, 0, 'UP'], [svc, 0, 'OK']]) brok_downtime_raise = [] brok_downtime_expire = [] for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 0 assert len(brok_downtime_expire) == 0 time.sleep(1) self._main_broker.broks = [] self.scheduler_loop(3, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 1 assert len(brok_downtime_expire) == 0 hdata = unserialize(brok_downtime_raise[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0'
def test_cancel_service(self): """Test broks when cancel downtime :return: None """ self.setup_with_file('cfg/cfg_default.cfg') host = self.schedulers['scheduler-master'].sched.hosts.find_by_name( "test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False self.scheduler_loop(1, [[host, 0, 'UP']]) duration = 5 now = int(time.time()) # downtime valid for 5 seconds from now cmd = "[%lu] SCHEDULE_SVC_DOWNTIME;test_host_0;test_ok_0;%d;%d;1;0;%d;" \ "downtime author;downtime comment" % (now, now, now + duration, duration) self.schedulers['scheduler-master'].sched.run_external_command(cmd) self.external_command_loop() brok_downtime_raise = [] brok_downtime_expire = [] for brok in self.schedulers['scheduler-master'].sched.brokers[ 'broker-master']['broks'].itervalues(): if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 1 assert len(brok_downtime_expire) == 0 # External command: delete all host downtime now = int(time.time()) self.schedulers['scheduler-master'].sched.brokers['broker-master'][ 'broks'] = {} cmd = '[%d] DEL_ALL_SVC_DOWNTIMES;test_host_0;test_ok_0' % now self.schedulers['scheduler-master'].sched.run_external_command(cmd) self.external_command_loop() brok_downtime_raise = [] brok_downtime_expire = [] for brok in self.schedulers['scheduler-master'].sched.brokers[ 'broker-master']['broks'].itervalues(): if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 0 assert len(brok_downtime_expire) == 1 hdata = unserialize(brok_downtime_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0'
def setup_new_conf(self): # pylint: disable=too-many-branches """Setup the new configuration received from Arbiter This function calls the base satellite treatment and manages the configuration needed for a simple satellite daemon that executes some actions (eg. poller or reactionner): - configure the passive mode - configure the workers - configure the tags - configure the modules :return: None """ # Execute the base class treatment... super(Satellite, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: logger.info("Received a new configuration") # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] # Now manage modules if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error( 'Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True for module in self.modules: if module.name not in self.q_by_mod: self.q_by_mod[module.name] = {} self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def test_cancel_service(self): """Test broks when cancel downtime :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False self.scheduler_loop(1, [[host, 0, 'UP']]) duration = 5 now = int(time.time()) # downtime valid for 5 seconds from now cmd = "[%lu] SCHEDULE_SVC_DOWNTIME;test_host_0;test_ok_0;%d;%d;1;0;%d;" \ "downtime author;downtime comment" % (now, now, now + duration, duration) self._scheduler.run_external_commands([cmd]) self.external_command_loop() brok_downtime_raise = [] brok_downtime_expire = [] for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 1 assert len(brok_downtime_expire) == 0 # External command: delete all host downtime now = int(time.time()) self._main_broker.broks = [] cmd = '[%d] DEL_ALL_SVC_DOWNTIMES;test_host_0;test_ok_0' % now self._scheduler.run_external_commands([cmd]) self.external_command_loop() brok_downtime_raise = [] brok_downtime_expire = [] for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 0 assert len(brok_downtime_expire) == 1 hdata = unserialize(brok_downtime_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0'
def get_external_commands(self): """Send a HTTP request to the satellite (GET /_external_commands) to get the external commands from the satellite. :return: External Command list on success, [] on failure :rtype: list """ res = self.con.get('_external_commands', wait=False) logger.debug("Got %d external commands from %s: %s", len(res), self.name, res) return unserialize(res, True)
def push_broks(self): """Push broks objects to the daemon (internal) Only used on a Broker daemon by the Arbiter :return: None """ broks = cherrypy.request.json with self.app.arbiter_broks_lock: self.app.arbiter_broks.extend( [unserialize(elem, True) for elem in broks['broks'].values()])
def test_brok_checks_results(self): """Test broks checks results :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname("test_host_0", "test_ok_0") # To make tests quicker we make notifications send very quickly svc.notification_interval = 0.001 svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 0, 'OK']]) time.sleep(0.1) host_check_results = [] service_check_results = [] for brok in self._main_broker.broks: if brok.type == 'host_check_result': print(("Brok %s: %s" % (brok.type, brok))) host_check_results.append(brok) elif brok.type == 'service_check_result': print(("Brok %s: %s" % (brok.type, brok))) service_check_results.append(brok) assert len(host_check_results) == 1 assert len(service_check_results) == 1 hdata = unserialize(host_check_results[0].data) assert hdata['state'] == 'DOWN' assert hdata['state_type'] == 'SOFT' sdata = unserialize(service_check_results[0].data) assert sdata['state'] == 'OK' assert sdata['state_type'] == 'HARD'
def get_events(self): """Send a HTTP request to the satellite (GET /_events) Get monitoring events from the satellite. :return: Broks list on success, [] on failure :rtype: list """ res = self.con.get('_events', wait=False) logger.debug("Got events from %s: %s", self.name, res) return unserialize(res, True)
def setup_new_conf(self): # pylint: disable=too-many-branches """Setup the new configuration received from Arbiter This function calls the base satellite treatment and manages the configuration needed for a simple satellite daemon that executes some actions (eg. poller or reactionner): - configure the passive mode - configure the workers - configure the tags - configure the modules :return: None """ # Execute the base class treatment... super(Satellite, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: logger.info("Received a new configuration") # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] # Now manage modules if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True for module in self.modules: if module.name not in self.q_by_mod: self.q_by_mod[module.name] = {} self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def prepare(self): """Un-serialize data from data attribute and add instance_id key if necessary :return: None """ # Maybe the Brok is a old daemon one or was already prepared # if so, the data is already ok if hasattr(self, 'prepared') and not self.prepared: self.data = unserialize(self.data) if self.instance_id: self.data['instance_id'] = self.instance_id self.prepared = True
def _push_broks(self): """Push the provided broks objects to the broker daemon Only used on a Broker daemon by the Arbiter :param: broks :type: list :return: None """ data = cherrypy.request.json with self.app.arbiter_broks_lock: logger.debug("Pushing %d broks", len(data['broks'])) self.app.arbiter_broks.extend([unserialize(elem, True) for elem in data['broks']])
def get_broks(self, broker_name): """Send a HTTP request to the satellite (GET /_broks) Get broks from the satellite. Un-serialize data received. :param broker_name: the concerned broker link :type broker_name: BrokerLink :return: Broks list on success, [] on failure :rtype: list """ res = self.con.get('_broks', {'broker_name': broker_name}, wait=False) logger.debug("Got broks from %s: %s", self.name, res) return unserialize(res, True)
def test_unserialize_check(self): """ Test unserialize checks :return: None """ var = ''' {"content": {"check_type":0,"exit_status":3,"creation_time":1469152287.6731250286, "reactionner_tag":"None","s_time":0.0, "uuid":"5f1b16fa809c43379822c7acfe789660","check_time":0,"long_output":"", "state":0,"internal":false,"u_time":0.0,"env":{},"depend_on_me":[], "ref":"1fe5184ea05d439eb045399d26ed3337","from_trigger":false, "status":"scheduled","execution_time":0.0,"worker":"none","t_to_go":1469152290, "module_type":"echo","_in_timeout":false,"dependency_check":false,"type":"", "depend_on":[],"is_a":"check","poller_tag":"None","command":"_echo", "timeout":30,"output":"","perf_data":""}, "__sys_python_module__":"alignak.check.Check" } ''' unserialize(var) assert True
def get_actions(self, params): """Send a HTTP request to the satellite (GET /_checks) Get actions from the scheduler. Un-serialize data received. :param params: the request parameters :type params: str :return: Actions list on success, [] on failure :rtype: list """ res = self.con.get('_checks', params, wait=True) logger.debug("Got checks to execute from %s: %s", self.name, res) return unserialize(res, True)
def _push_broks(self): """Push the provided broks objects to the broker daemon Only used on a Broker daemon by the Arbiter :param: broks :type: list :return: None """ data = cherrypy.request.json with self.app.arbiter_broks_lock: logger.debug("Pushing %d broks", len(data['broks'])) self.app.arbiter_broks.extend( [unserialize(elem, True) for elem in data['broks']])
def test_timeperiod_transition_log(self): self.setup_with_file('cfg/cfg_default.cfg') tp = self._scheduler.timeperiods.find_by_name('24x7') self.assertIsNot(tp, None) data = unserialize(tp.check_and_log_activation_change().data) assert data['level'] == 'info' assert data['message'] == 'TIMEPERIOD TRANSITION: 24x7;-1;1' # Now make this tp unable to be active again by removing al it's daterange dr = tp.dateranges tp.dateranges = [] data = unserialize(tp.check_and_log_activation_change().data) assert data['level'] == 'info' assert data['message'] == 'TIMEPERIOD TRANSITION: 24x7;1;0' # Ok, let get back to work tp.dateranges = dr data = unserialize(tp.check_and_log_activation_change().data) assert data['level'] == 'info' assert data['message'] == 'TIMEPERIOD TRANSITION: 24x7;0;1'
def prepare(self): """Un-serialize data from data attribute and add instance_id key if necessary :return: None """ # Maybe the brok is a old daemon one or was already prepared # if so, the data is already ok if hasattr(self, 'prepared') and not self.prepared: try: self.data = unserialize(self.data) except AlignakClassLookupException: # pragma: no cover, should never happen... raise if self.instance_id: self.data['instance_id'] = self.instance_id self.prepared = True
def add_actions(self, actions_list, scheduler_instance_id): """Add a list of actions to the satellite queues :param actions_list: Actions list to add :type actions_list: list :param scheduler_instance_id: sheduler link to assign the actions to :type scheduler_instance_id: SchedulerLink :return: None """ # We check for new check in each schedulers and put the result in new_checks scheduler_link = None for scheduler_id in self.schedulers: logger.debug("Trying to add an action, scheduler: %s", self.schedulers[scheduler_id]) if scheduler_instance_id == self.schedulers[ scheduler_id].instance_id: scheduler_link = self.schedulers[scheduler_id] break else: logger.error( "Trying to add actions from an unknwown scheduler: %s", scheduler_instance_id) return if not scheduler_link: logger.error( "Trying to add actions, but scheduler link is not found for: %s, " "actions: %s", scheduler_instance_id, actions_list) return logger.debug("Found scheduler link: %s", scheduler_link) for action in actions_list: # First we look if the action is identified uuid = getattr(action, 'uuid', None) if uuid is None: try: action = unserialize(action, no_load=True) uuid = action.uuid except AlignakClassLookupException: logger.error('Cannot un-serialize action: %s', action) continue # If we already have this action, we are already working for it! if uuid in scheduler_link.actions: continue # Action is attached to a scheduler action.my_scheduler = scheduler_link.uuid scheduler_link.actions[action.uuid] = action self.assign_to_a_queue(action)
def get_external_commands(self): """Send a HTTP request to the satellite (GET /ping) and THEN send a HTTP request to the satellite (GET /get_external_commands) Get external commands from satellite. Un-serialize data received. :return: External Command list on success, [] on failure :rtype: list """ if not self.reachable: logger.warning("Not reachable for get_external_commands: %s", self.get_name()) return [] try: res = self.con.get('get_external_commands', wait='long') tab = unserialize(str(res)) # Protect against bad return if not isinstance(tab, list): self.con = None return [] return tab except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when getting external commands: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when getting external commands: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when getting external commands: %s", self.get_name(), str(exp)) self.con = None except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error( "[%s] get_external_commands - Connection does not exist!", self.get_name()) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize external commands received: %s', exp) return []
def put_results(self): """Put results to scheduler, used by poller and reactionners :return: True or ?? (if lock acquire fails) :rtype: bool """ res = cherrypy.request.json who_sent = res['from'] results = res['results'] results = unserialize(results, no_load=True) if results: logger.debug("Got some results: %d results from %s", len(results), who_sent) else: logger.debug("-> no results") self.app.sched.nb_checks_results += len(results) for result in results: logger.debug("-> result: %s", result) # resultobj = unserialize(result, True) result.set_type_active() # Update scheduler counters self.app.sched.counters[ result.is_a]["total"]["results"]["total"] += 1 if result.status not in \ self.app.sched.counters[result.is_a]["total"]["results"]: self.app.sched.counters[result.is_a]["total"]["results"][ result.status] = 0 self.app.sched.counters[result.is_a]["total"]["results"][ result.status] += 1 self.app.sched.counters[ result.is_a]["active"]["results"]["total"] += 1 if result.status not in \ self.app.sched.counters[result.is_a]["active"]["results"]: self.app.sched.counters[result.is_a]["active"]["results"][ result.status] = 0 self.app.sched.counters[result.is_a]["active"]["results"][ result.status] += 1 # Append to the scheduler result queue self.app.sched.waiting_results.put(result) return True
def add_actions(self, actions_list, scheduler_instance_id): """Add a list of actions to the satellite queues :param actions_list: Actions list to add :type actions_list: list :param scheduler_instance_id: sheduler link to assign the actions to :type scheduler_instance_id: SchedulerLink :return: None """ # We check for new check in each schedulers and put the result in new_checks scheduler_link = None for scheduler_id in self.schedulers: logger.debug("Trying to add an action, scheduler: %s", self.schedulers[scheduler_id]) if scheduler_instance_id == self.schedulers[scheduler_id].instance_id: scheduler_link = self.schedulers[scheduler_id] break else: logger.error("Trying to add actions from an unknwown scheduler: %s", scheduler_instance_id) return if not scheduler_link: logger.error("Trying to add actions, but scheduler link is not found for: %s, " "actions: %s", scheduler_instance_id, actions_list) return logger.debug("Found scheduler link: %s", scheduler_link) for action in actions_list: # First we look if the action is identified uuid = getattr(action, 'uuid', None) if uuid is None: try: action = unserialize(action, no_load=True) uuid = action.uuid except AlignakClassLookupException: logger.error('Cannot un-serialize action: %s', action) continue # If we already have this action, we are already working for it! if uuid in scheduler_link.actions: continue # Action is attached to a scheduler action.my_scheduler = scheduler_link.uuid scheduler_link.actions[action.uuid] = action self.assign_to_a_queue(action)
def __init__(self, params=None, parsing=False): # pylint: disable=unused-argument self.operand = None self.sons = [] # Of: values are a triple OK,WARN,CRIT self.of_values = ('0', '0', '0') self.is_of_mul = False self.configuration_errors = [] self.not_value = False if params is not None: if 'operand' in params: self.operand = params['operand'] if 'sons' in params: self.sons = [unserialize(elem) for elem in params['sons']] # Of: values are a triple OK,WARN,CRIT if 'of_values' in params: self.of_values = tuple(params['of_values']) if 'is_of_mul' in params: self.is_of_mul = params['is_of_mul'] if 'not_value' in params: self.not_value = params['not_value']
def zlib_processor( entity): # pragma: no cover, not used in the testing environment... """Read application/zlib data and put content into entity.params for later use. :param entity: cherrypy entity :type entity: cherrypy._cpreqbody.Entity :return: None """ if not entity.headers.get(ntou("Content-Length"), ntou("")): raise cherrypy.HTTPError(411) body = entity.fp.read() try: body = zlib.decompress(body) except zlib.error: raise cherrypy.HTTPError(400, 'Invalid zlib data') try: raw_params = json.loads(body) except ValueError: raise cherrypy.HTTPError(400, 'Invalid JSON document in zlib data') try: params = {} for key, value in list(raw_params.items()): params[key] = unserialize(value.encode("utf8")) except TypeError: raise cherrypy.HTTPError(400, 'Invalid serialized data in JSON document') except AlignakClassLookupException as exp: cherrypy.HTTPError(400, 'Cannot un-serialize data received: %s' % exp) # Now that all values have been successfully parsed and decoded, # apply them to the entity.params dict. for key, value in list(params.items()): if key in entity.params: if not isinstance(entity.params[key], list): entity.params[key] = [entity.params[key]] entity.params[key].append(value) else: entity.params[key] = value
def test_external_commands(self): """ Test logs for external commands :return: """ self.print_header() self.setup_with_file('cfg/cfg_monitoring_logs.cfg') assert self.conf_is_correct self._sched = self.schedulers['scheduler-master'].sched now = int(time.time()) host = self._sched.hosts.find_by_name("test_host_0") # Receiver receives unknown host external command excmd = '[%d] CHANGE_SVC_MODATTR;test_host_0;test_ok_0;1' % time.time() self._sched.run_external_command(excmd) self.external_command_loop() excmd = '[%d] CHANGE_RETRY_HOST_CHECK_INTERVAL;test_host_0;42' % now self._sched.run_external_command(excmd) self.external_command_loop() monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) expected_logs = [ (u'info', u'EXTERNAL COMMAND: [%s] CHANGE_RETRY_HOST_CHECK_INTERVAL;test_host_0;42' % now), (u'info', u'EXTERNAL COMMAND: [%s] CHANGE_SVC_MODATTR;test_host_0;test_ok_0;1' % now) ] for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs
def zlib_processor(entity): # pragma: no cover, not used in the testing environment... """Read application/zlib data and put content into entity.params for later use. :param entity: cherrypy entity :type entity: cherrypy._cpreqbody.Entity :return: None """ if not entity.headers.get(ntou("Content-Length"), ntou("")): raise cherrypy.HTTPError(411) body = entity.fp.read() try: body = zlib.decompress(body) except zlib.error: raise cherrypy.HTTPError(400, 'Invalid zlib data') try: raw_params = json.loads(body) except ValueError: raise cherrypy.HTTPError(400, 'Invalid JSON document in zlib data') try: params = {} for key, value in list(raw_params.items()): params[key] = unserialize(value.encode("utf8")) except TypeError: raise cherrypy.HTTPError(400, 'Invalid serialized data in JSON document') except AlignakClassLookupException as exp: cherrypy.HTTPError(400, 'Cannot un-serialize data received: %s' % exp) # Now that all values have been successfully parsed and decoded, # apply them to the entity.params dict. for key, value in list(params.items()): if key in entity.params: if not isinstance(entity.params[key], list): entity.params[key] = [entity.params[key]] entity.params[key].append(value) else: entity.params[key] = value
def check(self, item, state_id, state, expected_logs): """ :param item: concerned item :param state_id: state identifier :param state: state text :param expected_logs: expected monitoring logs :return: """ self._sched.brokers['broker-master']['broks'] = {} self.scheduler_loop(1, [[item, state_id, state]]) time.sleep(0.1) monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs assert len(expected_logs) == len(monitoring_logs), monitoring_logs time.sleep(0.1)
def test_arbiter_spare(self): """ Test with arbiter spare :return: None """ self.print_header() with requests_mock.mock() as mockreq: mockreq.get('http://localhost:8770/ping', json='pong') mockreq.get('http://localhost:8770/what_i_managed', json='{}') mockreq.post('http://localhost:8770/put_conf', json='true') self.setup_with_file('cfg/cfg_dispatcher_arbiter_spare.cfg') self.arbiter.dispatcher.check_alive() # for arb in self.arbiter.dispatcher.arbiters: # If not me and I'm a master # if arb != self.arbiter.dispatcher.arbiter: # assert 0 == arb.attempt # assert {} == arb.managed_confs # else: # assert 0 == arb.attempt # assert arb.managed_confs is not {} print("start") self.arbiter.dispatcher.check_dispatch() print("dispatched") # need time to have history filled time.sleep(2) history = mockreq.request_history history_index = 0 for index, hist in enumerate(history): if hist.url == 'http://localhost:8770/put_conf': history_index = index conf_received = history[history_index].json() assert ['conf'] == conf_received.keys() spare_conf = unserialize(conf_received['conf']) # Test a property to be sure conf loaded correctly assert 5 == spare_conf.perfdata_timeout
def setup_new_conf(self): # pylint: disable=too-many-branches, too-many-locals """Broker custom setup_new_conf method This function calls the base satellite treatment and manages the configuration needed for a broker daemon: - get and configure its pollers, reactionners and receivers relation - configure the modules :return: None """ # Execute the base class treatment... super(Broker, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] self.got_initial_broks = False # Now we create our pollers, reactionners and receivers for link_type in ['pollers', 'reactionners', 'receivers']: if link_type not in self.cur_conf['satellites']: logger.error("No %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link(link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replace satellite address and port by those defined in satellite_map # todo: check if it is really necessary! Add a unit test for this # Not sure about this because of the daemons/satellites configuration # if new_link.name in self_conf.get('satellite_map', {}): # new_link = dict(new_link) # make a copy # new_link.update(self_conf.get('satellite_map', {})[new_link.name]) if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True # Ok now start, or restart them! # Set modules, init them and start external ones self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Initialize connection with my schedulers first logger.info("Initializing connection with my schedulers:") my_satellites = self.get_links_of_type(s_type='scheduler') for satellite in list(my_satellites.values()): logger.info("- %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") for sat_type in ['arbiter', 'reactionner', 'poller', 'receiver']: my_satellites = self.get_links_of_type(s_type=sat_type) for satellite in list(my_satellites.values()): logger.info("- %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def setup_new_conf(self): """Receiver custom setup_new_conf method This function calls the base satellite treatment and manages the configuration needed for a receiver daemon: - get and configure its satellites - configure the modules :return: None """ # Execute the base class treatment... super(Receiver, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] logger.debug("Got config: %s", self.cur_conf) # Configure and start our modules if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Now create the external commands manager # We are a receiver: our role is to get and dispatch commands to the schedulers global_conf = self.cur_conf.get('global_conf', None) if not global_conf: logger.error("Received a configuration without any global_conf! " "This may hide a configuration problem with the " "realms and the manage_sub_realms of the satellites!") global_conf = { 'accept_passive_unknown_check_results': False, 'log_external_commands': True } self.external_commands_manager = \ ExternalCommandManager(None, 'receiver', self, global_conf.get( 'accept_passive_unknown_check_results', False), global_conf.get( 'log_external_commands', False)) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def setup_new_conf(self): # pylint: disable=too-many-branches, too-many-locals """Broker custom setup_new_conf method This function calls the base satellite treatment and manages the configuration needed for a broker daemon: - get and configure its pollers, reactionners and receivers relation - configure the modules :return: None """ # Execute the base class treatment... super(Broker, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] self.got_initial_broks = False # Now we create our pollers, reactionners and receivers for link_type in ['pollers', 'reactionners', 'receivers']: if link_type not in self.cur_conf['satellites']: logger.error("No %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link( link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replace satellite address and port by those defined in satellite_map # todo: check if it is really necessary! Add a unit test for this # Not sure about this because of the daemons/satellites configuration # if new_link.name in self_conf.get('satellite_map', {}): # new_link = dict(new_link) # make a copy # new_link.update(self_conf.get('satellite_map', {})[new_link.name]) if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error( 'Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True # Ok now start, or restart them! # Set modules, init them and start external ones self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Initialize connection with my schedulers first logger.info("Initializing connection with my schedulers:") my_satellites = self.get_links_of_type(s_type='scheduler') for satellite in list(my_satellites.values()): logger.info("- %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") for sat_type in ['arbiter', 'reactionner', 'poller', 'receiver']: my_satellites = self.get_links_of_type(s_type=sat_type) for satellite in list(my_satellites.values()): logger.info("- %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def test_acknowledge_host(self): """Test broks when acknowledge :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname("test_host_0", "test_ok_0") svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_HOST_PROBLEM_EXPIRE;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 1, 0, 1, (now + 2), 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.external_command_loop(2) # self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) brok_ack = [] print("Broker uuid: %s" % self._main_broker.uuid) print("Broker broks: %s" % self._main_broker.broks) for brok in self._main_broker.broks: print("Broker brok: %s" % brok) if brok.type == 'acknowledge_raise': print("Brok: %s" % brok) brok_ack.append(brok) print("***Scheduler: %s" % self._scheduler) print("***Scheduler daemon: %s" % self._scheduler.my_daemon) print("***Scheduler daemon brokers: %s" % self._scheduler.my_daemon.brokers) for broker_link_uuid in self._scheduler.my_daemon.brokers: print("*** %s - broks: %s" % (broker_link_uuid, self._scheduler.my_daemon.brokers[broker_link_uuid].broks)) # Got one brok for the host ack and one brok for the service ack assert len(brok_ack) == 2 host_brok = False service_brok = False hdata = unserialize(brok_ack[0].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True hdata = unserialize(brok_ack[1].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True assert host_brok and service_brok # return host in UP mode, so the acknowledge will be removed by the scheduler self._main_broker.broks = [] self.scheduler_loop(2, [[host, 0, 'UP'], [svc, 0, 'OK']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 0 assert len(brok_ack_expire) == 2 host_brok = False service_brok = False hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True hdata = unserialize(brok_ack_expire[1].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True assert host_brok and service_brok # Do the same but remove acknowledge with external commands: self._main_broker.broks = [] self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_HOST_PROBLEM_EXPIRE;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 1, 0, 1, (now + 2), 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) cmd = "[{0}] REMOVE_HOST_ACKNOWLEDGEMENT;{1}\n". \ format(int(now), 'test_host_0') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(3, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: print("Brok: %s" % brok) if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 2 assert len(brok_ack_expire) == 1 hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' assert 'service' not in hdata
def test_flapping(self): """Test host/service flapping detection :return: """ # Get the hosts and services" host = self._scheduler.hosts.find_by_name("test_host_0") host.act_depend_of = [] assert host.flap_detection_enabled router = self._scheduler.hosts.find_by_name("test_router_0") router.act_depend_of = [] assert router.flap_detection_enabled svc = self._scheduler.services.find_srv_by_name_and_hostname("test_host_0", "test_ok_0") svc.event_handler_enabled = False svc.act_depend_of = [] # Force because the default configuration disables the flapping detection svc.flap_detection_enabled = True self.scheduler_loop(2, [ [host, 0, 'UP | value1=1 value2=2'], [router, 0, 'UP | rtt=10'], [svc, 0, 'OK'] ]) assert 'UP' == host.state assert 'HARD' == host.state_type assert 'UP' == router.state assert 'HARD' == router.state_type assert 'OK' == svc.state assert 'HARD' == svc.state_type assert 25 == svc.low_flap_threshold # Set the service as a problem self.scheduler_loop(3, [ [svc, 2, 'Crit'] ]) assert 'CRITICAL' == svc.state assert 'HARD' == svc.state_type # Ok, now go in flap! for i in range(1, 10): self.scheduler_loop(1, [[svc, 0, 'Ok']]) self.scheduler_loop(1, [[svc, 2, 'Crit']]) # Should be in flapping state now assert svc.is_flapping # We got 'monitoring_log' broks for logging to the monitoring logs... monitoring_logs = [] for brok in sorted(iter(self._main_broker.broks.values()), key=lambda x: x.creation_time): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) expected_logs = [ ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;HARD;2;Crit'), ('error', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;CRITICAL;' 'notify-service;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;HARD;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;OK;' 'notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE FLAPPING ALERT: test_host_0;test_ok_0;STARTED; ' 'Service appears to have started flapping (83.8% change >= 50.0% threshold)'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;' 'FLAPPINGSTART (OK);notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ] for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs # Now we put it as back :) # 10 is not enouth to get back as normal for i in range(1, 11): self.scheduler_loop(1, [[svc, 0, 'Ok']]) assert svc.is_flapping # 10 others can be good (near 4.1 %) for i in range(1, 11): self.scheduler_loop(1, [[svc, 0, 'Ok']]) assert not svc.is_flapping # We got 'monitoring_log' broks for logging to the monitoring logs... monitoring_logs = [] for brok in sorted(iter(self._main_broker.broks.values()), key=lambda x: x.creation_time): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) print(("Logs: %s" % monitoring_logs)) expected_logs = [ ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;HARD;2;Crit'), ('error', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;CRITICAL;' 'notify-service;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;HARD;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;OK;' 'notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE FLAPPING ALERT: test_host_0;test_ok_0;STARTED; ' 'Service appears to have started flapping ' '(83.8% change >= 50.0% threshold)'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;' 'FLAPPINGSTART (OK);notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE FLAPPING ALERT: test_host_0;test_ok_0;STOPPED; ' 'Service appears to have stopped flapping ' '(21.5% change < 25.0% threshold)'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;' 'FLAPPINGSTOP (OK);notify-service;Ok') ] for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs
def test_special_external_commands(self): """ Test logs for special external commands :return: """ self.print_header() self.setup_with_file('cfg/cfg_monitoring_logs.cfg') assert self.conf_is_correct self._sched = self.schedulers['scheduler-master'].sched now = int(time.time()) # RESTART_PROGRAM excmd = '[%d] RESTART_PROGRAM' % now self._sched.run_external_command(excmd) self.external_command_loop() self.assert_any_log_match( 'RESTART command : libexec/sleep_command.sh 3') # RELOAD_CONFIG excmd = '[%d] RELOAD_CONFIG' % now self._sched.run_external_command(excmd) self.external_command_loop() self.assert_any_log_match( 'RELOAD command : libexec/sleep_command.sh 2') # UNKNOWN COMMAND excmd = '[%d] UNKNOWN_COMMAND' % now self._sched.run_external_command(excmd) self.external_command_loop() # Malformed command excmd = '[%d] MALFORMED COMMAND' % now self._sched.run_external_command(excmd) self.external_command_loop() monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) # The messages are echoed by the launched scripts expected_logs = [ (u'info', u'I awoke after sleeping 3 seconds | sleep=3\n'), (u'info', u'I awoke after sleeping 2 seconds | sleep=2\n'), (u'error', u"Malformed command: '[%s] MALFORMED COMMAND'" % now), (u'error', u"Command '[%s] UNKNOWN_COMMAND' is not recognized, sorry" % now) ] for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs # Now with disabled log of external commands self.setup_with_file('cfg/cfg_monitoring_logs_disabled.cfg') assert self.conf_is_correct self._sched = self.schedulers['scheduler-master'].sched # RESTART_PROGRAM excmd = '[%d] RESTART_PROGRAM' % int(time.time()) self._sched.run_external_command(excmd) self.external_command_loop() self.assert_any_log_match( 'RESTART command : libexec/sleep_command.sh 3') # RELOAD_CONFIG excmd = '[%d] RELOAD_CONFIG' % int(time.time()) self._sched.run_external_command(excmd) self.external_command_loop() self.assert_any_log_match( 'RELOAD command : libexec/sleep_command.sh 2') monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) # No monitoring logs assert [] == monitoring_logs
def passive_checks_service(self, log_passive_checks): """ Test logs for external commands :return: """ self.print_header() self.setup_with_file('cfg/cfg_monitoring_logs.cfg') assert self.conf_is_correct self._sched = self.schedulers['scheduler-master'].sched # Force the log passive checks configuration parameter self._sched.conf.log_passive_checks = log_passive_checks now = int(time.time()) # ----------------------------- # Service part # ----------------------------- # Get host host = self._sched.hosts.find_by_name('test_host_0') host.checks_in_progress = [] host.event_handler_enabled = False host.active_checks_enabled = True host.passive_checks_enabled = True assert host is not None # Get service svc = self._sched.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") svc.checks_in_progress = [] svc.event_handler_enabled = False svc.active_checks_enabled = True svc.passive_checks_enabled = True assert svc is not None # Passive checks for host and service # --------------------------------------------- # Receive passive host check Up excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;0;Host is UP' % time.time( ) self.schedulers['scheduler-master'].sched.run_external_command(excmd) self.external_command_loop() assert 'UP' == host.state assert 'Host is UP' == host.output # Service is going ok ... excmd = '[%d] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;' \ 'Service is OK|rtt=9999;5;10;0;10000' % now self._sched.run_external_command(excmd) self.external_command_loop() assert 'OK' == svc.state assert 'Service is OK' == svc.output assert 'rtt=9999;5;10;0;10000' == svc.perf_data # Service is going ok ... with long output excmd = '[%d] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;' \ 'Service is OK and have some special characters: àéèüäï' \ '|rtt=9999;5;10;0;10000' \ '\r\nLong output... also some specials: àéèüäï' % now self._sched.run_external_command(excmd) self.external_command_loop() assert 'OK' == svc.state assert u'Service is OK and have some special characters: àéèüäï' == svc.output assert 'rtt=9999;5;10;0;10000' == svc.perf_data assert u'Long output... also some specials: àéèüäï' == svc.long_output # Extract monitoring logs monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) print("Log (unicode: %s): %s" % (isinstance(data['message'], unicode), data['message'])) # Passive service check log contains: # - host name, # - host status, # - output, # - performance data and # - long output # All are separated with a semi-colon if log_passive_checks: expected_logs = [ (u'info', u'PASSIVE SERVICE CHECK: test_host_0;test_ok_0;0;Service is OK;;rtt=9999;5;10;0;10000' ), (u'info', u'PASSIVE SERVICE CHECK: test_host_0;test_ok_0;0;' u'Service is OK and have some special characters: àéèüäï;' u'Long output... also some specials: àéèüäï;' u'rtt=9999;5;10;0;10000') ] else: expected_logs = [ (u'info', u'EXTERNAL COMMAND: [%s] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;' u'Service is OK|rtt=9999;5;10;0;10000' % now), (u'info', u'EXTERNAL COMMAND: [%s] PROCESS_SERVICE_CHECK_RESULT;test_host_0;test_ok_0;0;' u'Service is OK and have some special characters: àéèüäï' u'|rtt=9999;5;10;0;10000' u'\r\nLong output... also some specials: àéèüäï' % now), ] for log_level, log_message in expected_logs: print("Msg: %s" % log_message) assert (log_level, log_message) in monitoring_logs
def passive_checks_host(self, log_passive_checks): """ Test logs for external commands :return: """ self.print_header() self.setup_with_file('cfg/cfg_monitoring_logs.cfg') assert self.conf_is_correct self._sched = self.schedulers['scheduler-master'].sched # Force the log passive checks configuration parameter self._sched.conf.log_passive_checks = log_passive_checks # ----------------------------- # Host part # ----------------------------- # Get and configure host host = self._sched.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router which we depend of host.event_handler_enabled = False assert host is not None now = int(time.time()) # Receive passive host check Down excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now self._sched.run_external_command(excmd) self.external_command_loop() assert 'DOWN' == host.state assert 'SOFT' == host.state_type assert 'Host is dead' == host.output excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now self._sched.run_external_command(excmd) self.external_command_loop() assert 'DOWN' == host.state assert 'SOFT' == host.state_type assert 'Host is dead' == host.output excmd = '[%d] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now self._sched.run_external_command(excmd) self.external_command_loop() assert 'DOWN' == host.state assert 'HARD' == host.state_type assert 'Host is dead' == host.output # Extract monitoring logs monitoring_logs = [] for brok in self._sched.brokers['broker-master']['broks'].itervalues(): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) print("Log (unicode: %s): %s" % (isinstance(data['message'], unicode), data['message'])) # Passive host check log contains: # - host name, # - host status, # - output, # - performance data and # - long output # All are separated with a semi-colon expected_logs = [ (u'error', u'HOST ALERT: test_host_0;DOWN;SOFT;1;Host is dead'), (u'error', u'HOST ALERT: test_host_0;DOWN;SOFT;2;Host is dead'), (u'error', u'HOST ALERT: test_host_0;DOWN;HARD;3;Host is dead'), (u'error', u'HOST NOTIFICATION: test_contact;test_host_0;DOWN;notify-host;Host is dead' ) ] if log_passive_checks: expected_logs.extend([ (u'warning', u'PASSIVE HOST CHECK: test_host_0;2;Host is dead;;'), (u'warning', u'PASSIVE HOST CHECK: test_host_0;2;Host is dead;;'), (u'warning', u'PASSIVE HOST CHECK: test_host_0;2;Host is dead;;'), ]) else: expected_logs.extend([ (u'info', u'EXTERNAL COMMAND: [%s] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now), (u'info', u'EXTERNAL COMMAND: [%s] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now), (u'info', u'EXTERNAL COMMAND: [%s] PROCESS_HOST_CHECK_RESULT;test_host_0;2;Host is dead' % now) ]) for log_level, log_message in expected_logs: print("Msg: %s" % log_message) assert (log_level, log_message) in monitoring_logs
def test_unserialize_notif(self): """ Test unserialize notifications :return: None """ var = ''' {"98a76354619746fa8e6d2637a5ef94cb": { "content": { "reason_type": 1, "exit_status": 3, "creation_time":1468522950.2828259468, "command_call": { "args": [], "call": "notify-service", "command": { "command_line": "$USER1$\/notifier.pl --hostname $HOSTNAME$ --servicedesc $SERVICEDESC$ --notificationtype $NOTIFICATIONTYPE$ --servicestate $SERVICESTATE$ --serviceoutput $SERVICEOUTPUT$ --longdatetime $LONGDATETIME$ --serviceattempt $SERVICEATTEMPT$ --servicestatetype $SERVICESTATETYPE$", "command_name": "notify-service", "configuration_errors":[], "configuration_warnings":[], "enable_environment_macros": false, "id": "487aa432ddf646079ec6c07803333eac", "imported_from": "cfg\/default\/commands.cfg:14", "macros":{}, "module_type": "fork", "my_type":"command", "ok_up":"", "poller_tag": "None", "properties":{ "use":{ "brok_transformation": null, "class_inherit": [], "conf_send_preparation": null, "default":[], "fill_brok":[], "has_default":true, "help":"", "keep_empty":false, "managed":true, "merging":"uniq", "no_slots":false, "override":false, "required":false, "retention":false, "retention_preparation":null, "special":false, "split_on_coma":true, "to_send":false, "unmanaged":false, "unused":false}, "name":{ "brok_transformation":null, "class_inherit":[], "conf_send_preparation":null, "default":"", "fill_brok":[], "has_default":true, "help":"", "keep_empty":false, "managed":true, "merging":"uniq", "no_slots":false, "override":false, "required":false, "retention":false, "retention_preparation":null, "special":false, "split_on_coma":true, "to_send":false, "unmanaged":false, "unused":false}, }, "reactionner_tag":"None", "running_properties":{ "configuration_errors":{ "brok_transformation":null, "class_inherit":[], "conf_send_preparation":null, "default":[],"fill_brok":[], "has_default":true,"help":"","keep_empty":false, "managed":true,"merging":"uniq","no_slots":false,"override":false, "required":false,"retention":false,"retention_preparation":null, "special":false,"split_on_coma":true,"to_send":false, "unmanaged":false,"unused":false}, }, "tags":[], "timeout":-1, "uuid":"487aa432ddf646079ec6c07803333eac"}, "enable_environment_macros":false, "late_relink_done":false, "macros":{}, "module_type":"fork", "my_type":"CommandCall", "poller_tag":"None", "properties":{}, "reactionner_tag":"None", "timeout":-1, "uuid":"cfcaf0fc232b4f59a7d8bb5bd1d83fef", "valid":true}, "escalated":false, "reactionner_tag":"None", "s_time":0.0, "notification_type":0, "contact_name":"test_contact", "type":"PROBLEM", "uuid":"98a76354619746fa8e6d2637a5ef94cb", "check_time":0,"ack_data":"", "state":0,"u_time":0.0, "env":{ "NAGIOS_SERVICEDOWNTIME":"0", "NAGIOS_TOTALSERVICESUNKNOWN":"", "NAGIOS_LONGHOSTOUTPUT":"", "NAGIOS_HOSTDURATIONSEC":"1468522950", "NAGIOS_HOSTDISPLAYNAME":"test_host_0", }, "notif_nb":1,"_in_timeout":false,"enable_environment_macros":false, "host_name":"test_host_0", "status":"scheduled", "execution_time":0.0,"start_time":0,"worker":"none","t_to_go":1468522950, "module_type":"fork","service_description":"test_ok_0","sched_id":0,"ack_author":"", "ref":"272e89c1de854bad85987a7583e6c46b", "is_a":"notification", "contact":"4e7c4076c372457694684bdd5ba47e94", "command":"\/notifier.pl --hostname test_host_0 --servicedesc test_ok_0 --notificationtype PROBLEM --servicestate CRITICAL --serviceoutput CRITICAL --longdatetime Thu 14 Jul 21:02:30 CEST 2016 --serviceattempt 2 --servicestatetype HARD", "end_time":0,"timeout":30,"output":"", "already_start_escalations":[]}, "__sys_python_module__":"alignak.notification.Notification" } } ''' unserialize(var) assert True
def test_multibroker_onesched(self): """ Test with 2 brokers and 1 scheduler :return: None """ self.setup_with_file('cfg/multibroker/cfg_multi_broker_one_scheduler.cfg') my_scheduler = self._scheduler assert 2 == len(my_scheduler.my_daemon.brokers) # create broks host = my_scheduler.pushed_conf.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router svc = my_scheduler.pushed_conf.services.find_srv_by_name_and_hostname("test_host_0", "test_ok_0") svc.checks_in_progress = [] svc.act_depend_of = [] # no raised host check on critical service check result self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 0, 'OK']]) # Count broks in each broker broker_broks_count = {} broker1_link_uuid = None broker2_link_uuid = None for broker_link_uuid in my_scheduler.my_daemon.brokers: if my_scheduler.my_daemon.brokers[broker_link_uuid].name == 'broker-master': broker1_link_uuid = broker_link_uuid else: broker2_link_uuid = broker_link_uuid broker_broks_count[my_scheduler.my_daemon.brokers[broker_link_uuid].name] = 0 print(("Broker %s:" % (my_scheduler.my_daemon.brokers[broker_link_uuid]))) for brok in my_scheduler.my_daemon.brokers[broker_link_uuid].broks: broker_broks_count[my_scheduler.my_daemon.brokers[broker_link_uuid].name] += 1 print("- %s" % brok) # Same list of broks in the two brokers self.assertItemsEqual(my_scheduler.my_daemon.brokers[broker1_link_uuid].broks, my_scheduler.my_daemon.brokers[broker2_link_uuid].broks) # Scheduler HTTP interface sched_interface = SchedulerInterface(my_scheduler.my_daemon) # Test broker-master that gets its broks from the scheduler # Get the scheduler broks to be sent ... print("Broks to be sent:") to_send = [b for b in my_scheduler.my_daemon.brokers[broker1_link_uuid].broks if getattr(b, 'sent_to_externals', False)] for brok in to_send: print(("- %s" % (brok))) assert 6 == len(to_send) broks_list = sched_interface._broks('broker-master') broks_list = unserialize(broks_list, True) assert 6 == len(broks_list) assert broker_broks_count['broker-master'] == len(broks_list) # No more broks to get # Get the scheduler broks to be sent ... to_send = [b for b in my_scheduler.my_daemon.brokers[broker1_link_uuid].broks if not getattr(b, 'got', False)] assert 0 == len(to_send), "Still some broks to be sent!" # Test broker-master 2 that gets its broks from the scheduler # Get the scheduler broks to be sent ... to_send = [b for b in my_scheduler.my_daemon.brokers[broker2_link_uuid].broks if getattr(b, 'sent_to_externals', False)] print("Broks to be sent:") for brok in to_send: print(("- %s" % (brok))) assert 6 == len(to_send) broks_list = sched_interface._broks('broker-master2') broks_list = unserialize(broks_list, True) assert 6 == len(broks_list) assert broker_broks_count['broker-master2'] == len(broks_list) # No more broks to get # Get the scheduler broks to be sent ... to_send = [b for b in my_scheduler.my_daemon.brokers[broker2_link_uuid].broks if not getattr(b, 'got', False)] assert 0 == len(to_send), "Still some broks to be sent!" # Test unknown broker that gets its broks from the scheduler broks_list = sched_interface._broks('broker-unknown') broks_list = unserialize(broks_list, True) assert 0 == len(broks_list) # Re-get broks # Test broker-master that gets its broks from the scheduler broks_list = sched_interface._broks('broker-master') broks_list = unserialize(broks_list, True) # No broks ! assert 0 == len(broks_list) # Test broker-master 2 that gets its broks from the scheduler broks_list = sched_interface._broks('broker-master2') broks_list = unserialize(broks_list, True) # No broks ! assert 0 == len(broks_list) # Some new broks self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 0, 'OK']]) # Same list of broks in the two brokers self.assertItemsEqual(my_scheduler.my_daemon.brokers[broker1_link_uuid].broks, my_scheduler.my_daemon.brokers[broker2_link_uuid].broks) assert len(my_scheduler.my_daemon.brokers[broker1_link_uuid].broks) > 1 assert len(my_scheduler.my_daemon.brokers[broker2_link_uuid].broks) > 1
def test_acknowledge_service(self): """Test broks when acknowledge :return: None """ self.setup_with_file('cfg/cfg_default.cfg') host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname("test_host_0", "test_ok_0") # To make tests quicker we make notifications send very quickly svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self._main_broker.broks = [] self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_SVC_PROBLEM;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 'test_ok_0', 2, 0, 1, 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(3, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: print("Brok: %s" % brok) if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 1 assert len(brok_ack_expire) == 0 hdata = unserialize(brok_ack_raise[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0' assert hdata['comment'] == 'normal process' # return service in OK mode, so the acknowledge will be removed by the scheduler self._main_broker.broks = [] self.scheduler_loop(2, [[host, 0, 'UP'], [svc, 0, 'OK']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 0 assert len(brok_ack_expire) == 1 hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0' # Do the same but remove acknowledge with external commands: self._main_broker.broks = [] self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_SVC_PROBLEM;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 'test_ok_0', 2, 0, 1, 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) cmd = "[{0}] REMOVE_SVC_ACKNOWLEDGEMENT;{1};{2}\n". \ format(int(now), 'test_host_0', 'test_ok_0') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: print(("Brok: %s" % brok)) if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 1 assert len(brok_ack_expire) == 1 hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0' assert hdata['comment'] == 'normal process'
def setup_new_conf(self): # pylint: disable=too-many-statements, too-many-branches, too-many-locals """Setup new conf received for scheduler :return: None """ # Execute the base class treatment... super(Alignak, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] logger.debug("Got config: %s", self.cur_conf) if 'conf_part' not in self.cur_conf: self.cur_conf['conf_part'] = None conf_part = self.cur_conf['conf_part'] # Ok now we can save the retention data if self.sched.pushed_conf is not None: self.sched.update_retention() # Get the monitored objects configuration t00 = time.time() received_conf_part = None try: received_conf_part = unserialize(conf_part) assert received_conf_part is not None except AssertionError as exp: # This to indicate that no configuration is managed by this scheduler... logger.warning( "No managed configuration received from arbiter") except AlignakClassLookupException as exp: # pragma: no cover # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) logger.error("Back trace of the error:\n%s", traceback.format_exc()) return except Exception as exp: # pylint: disable=broad-except # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) self.exit_on_exception(exp, str(self.new_conf)) # if not received_conf_part: # return logger.info( "Monitored configuration %s received at %d. Un-serialized in %d secs", received_conf_part, t00, time.time() - t00) logger.info("Scheduler received configuration : %s", received_conf_part) # Now we create our pollers, reactionners and brokers for link_type in ['pollers', 'reactionners', 'brokers']: if link_type not in self.cur_conf['satellites']: logger.error("Missing %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link( link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replacing the satellite address and port by those defined in satellite_map if new_link.name in self.cur_conf['override_conf'].get( 'satellite_map', {}): override_conf = self.cur_conf['override_conf'] overriding = override_conf.get('satellite_map')[ new_link.name] logger.warning( "Do not override the configuration for: %s, with: %s. " "Please check whether this is necessary!", new_link.name, overriding) # First mix conf and override_conf to have our definitive conf for prop in getattr(self.cur_conf, 'override_conf', []): logger.debug("Overriden: %s / %s ", prop, getattr(received_conf_part, prop, None)) logger.debug("Overriding: %s / %s ", prop, self.cur_conf['override_conf']) setattr(received_conf_part, prop, self.cur_conf['override_conf'].get(prop, None)) # Scheduler modules if not self.have_modules: try: logger.debug("Modules configuration: %s", self.cur_conf['modules']) self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error( 'Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.debug("I received some modules configuration: %s", self.modules) self.have_modules = True self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") if received_conf_part: logger.info("Loading configuration...") # Propagate the global parameters to the configuration items received_conf_part.explode_global_conf() # We give the configuration to our scheduler self.sched.reset() self.sched.load_conf(self.cur_conf['instance_id'], self.cur_conf['instance_name'], received_conf_part) # Once loaded, the scheduler has an inner pushed_conf object logger.info("Loaded: %s", self.sched.pushed_conf) # Update the scheduler ticks according to the daemon configuration self.sched.update_recurrent_works_tick(self) # We must update our pushed configuration macros with correct values # from the configuration parameters # self.sched.pushed_conf.fill_resource_macros_names_macros() # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(received_conf_part) # Now create the external commands manager # We are an applyer: our role is not to dispatch commands, but to apply them ecm = ExternalCommandManager( received_conf_part, 'applyer', self.sched, received_conf_part.accept_passive_unknown_check_results, received_conf_part.log_external_commands) # Scheduler needs to know about this external command manager to use it if necessary self.sched.external_commands_manager = ecm # Ok now we can load the retention data self.sched.retention_load() # Log hosts/services initial states self.sched.log_initial_states() # Create brok new conf brok = Brok({'type': 'new_conf', 'data': {}}) self.sched.add_brok(brok) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) if received_conf_part: # Enable the scheduling process logger.info("Loaded: %s", self.sched.pushed_conf) self.sched.start_scheduling() # Now I have a configuration! self.have_conf = True
def setup_new_conf(self): # pylint: disable=too-many-statements, too-many-branches, too-many-locals """Setup new conf received for scheduler :return: None """ # Execute the base class treatment... super(Alignak, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # self_conf is our own configuration from the alignak environment # self_conf = self.cur_conf['self_conf'] logger.debug("Got config: %s", self.cur_conf) if 'conf_part' not in self.cur_conf: self.cur_conf['conf_part'] = None conf_part = self.cur_conf['conf_part'] # Ok now we can save the retention data if self.sched.pushed_conf is not None: self.sched.update_retention() # Get the monitored objects configuration t00 = time.time() received_conf_part = None try: received_conf_part = unserialize(conf_part) assert received_conf_part is not None except AssertionError as exp: # This to indicate that no configuration is managed by this scheduler... logger.warning("No managed configuration received from arbiter") except AlignakClassLookupException as exp: # pragma: no cover # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) logger.error("Back trace of the error:\n%s", traceback.format_exc()) return except Exception as exp: # pylint: disable=broad-except # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) self.exit_on_exception(exp, str(self.new_conf)) # if not received_conf_part: # return logger.info("Monitored configuration %s received at %d. Un-serialized in %d secs", received_conf_part, t00, time.time() - t00) logger.info("Scheduler received configuration : %s", received_conf_part) # Now we create our pollers, reactionners and brokers for link_type in ['pollers', 'reactionners', 'brokers']: if link_type not in self.cur_conf['satellites']: logger.error("Missing %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link(link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replacing the satellite address and port by those defined in satellite_map if new_link.name in self.cur_conf['override_conf'].get('satellite_map', {}): override_conf = self.cur_conf['override_conf'] overriding = override_conf.get('satellite_map')[new_link.name] logger.warning("Do not override the configuration for: %s, with: %s. " "Please check whether this is necessary!", new_link.name, overriding) # First mix conf and override_conf to have our definitive conf for prop in getattr(self.cur_conf, 'override_conf', []): logger.debug("Overriden: %s / %s ", prop, getattr(received_conf_part, prop, None)) logger.debug("Overriding: %s / %s ", prop, self.cur_conf['override_conf']) setattr(received_conf_part, prop, self.cur_conf['override_conf'].get(prop, None)) # Scheduler modules if not self.have_modules: try: logger.debug("Modules configuration: %s", self.cur_conf['modules']) self.modules = unserialize(self.cur_conf['modules'], no_load=True) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.debug("I received some modules configuration: %s", self.modules) self.have_modules = True self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") if received_conf_part: logger.info("Loading configuration...") # Propagate the global parameters to the configuration items received_conf_part.explode_global_conf() # We give the configuration to our scheduler self.sched.reset() self.sched.load_conf(self.cur_conf['instance_id'], self.cur_conf['instance_name'], received_conf_part) # Once loaded, the scheduler has an inner pushed_conf object logger.info("Loaded: %s", self.sched.pushed_conf) # Update the scheduler ticks according to the daemon configuration self.sched.update_recurrent_works_tick(self) # We must update our pushed configuration macros with correct values # from the configuration parameters # self.sched.pushed_conf.fill_resource_macros_names_macros() # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(received_conf_part) # Now create the external commands manager # We are an applyer: our role is not to dispatch commands, but to apply them ecm = ExternalCommandManager( received_conf_part, 'applyer', self.sched, received_conf_part.accept_passive_unknown_check_results, received_conf_part.log_external_commands) # Scheduler needs to know about this external command manager to use it if necessary self.sched.external_commands_manager = ecm # Ok now we can load the retention data self.sched.retention_load() # Log hosts/services initial states self.sched.log_initial_states() # Create brok new conf brok = Brok({'type': 'new_conf', 'data': {}}) self.sched.add_brok(brok) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) if received_conf_part: # Enable the scheduling process logger.info("Loaded: %s", self.sched.pushed_conf) self.sched.start_scheduling() # Now I have a configuration! self.have_conf = True