def get_initial_status_brok(self): """Get a initial status brok :return: brok with wanted data :rtype: fusionsupervision.brok.Brok """ data = {'uuid': self.uuid} self.fill_data_brok_from(data, 'full_status') return Brok({'type': 'notification_raise', 'data': data})
def make_monitoring_log(level, message, timestamp=None, to_logger=False): """ Function used to build the monitoring log. Emit a log message with the provided level to the monitoring log logger. Build a Brok typed as monitoring_log with the provided message When to_logger is True, the information is sent to the python logger, else a monitoring_log Brok is returned. The Brok is managed by the daemons to build an Event that will br logged by the Arbiter when it collects all the events. TODO: replace with dedicated brok for each event to log - really useful? :param level: log level as defined in logging :type level: str :param message: message to send to the monitoring log logger :type message: str :param to_logger: when set, send to the logger, else raise a brok :type to_logger: bool :param timestamp: if set, force the log event timestamp :return: a monitoring_log Brok :rtype: fusionsupervision.brok.Brok """ level = level.lower() if level not in ['debug', 'info', 'warning', 'error', 'critical']: return False if to_logger: logging.getLogger(ALIGNAK_LOGGER_NAME).debug("Monitoring log: %s / %s", level, message) # Emit to our monitoring log logger message = message.replace('\r', '\\r') message = message.replace('\n', '\\n') logger_ = logging.getLogger(MONITORING_LOGGER_NAME) logging_function = getattr(logger_, level) try: message = message.decode('utf8', 'ignore') except UnicodeEncodeError: pass except AttributeError: # Python 3 raises an exception! pass if timestamp: st = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') logging_function(message, extra={'my_date': st}) else: logging_function(message) return True # ... and returns a brok return Brok({'type': 'monitoring_log', 'data': {'level': level, 'message': message}})
def get_expire_brok(self, host_name, service_name=''): """Get an expire acknowledge brok :type item: item :return: brok with wanted data :rtype: fusionsupervision.brok.Brok """ data = self.serialize() data['host'] = host_name if service_name != '': data['service'] = service_name return Brok({'type': 'acknowledge_expire', 'data': data})
def get_comment_brok(self, host_name, service_name=''): """Get a comment brok :param host_name: :param service_name: :return: brok with wanted data :rtype: fusionsupervision.brok.Brok """ data = self.serialize() data['host'] = host_name if service_name: data['service'] = service_name return Brok({'type': 'comment', 'data': data})
def get_raise_brok(self, host_name, service_name=''): """Get a start acknowledge brok :param host_name: :param service_name: :return: brok with wanted data :rtype: fusionsupervision.brok.Brok """ data = self.serialize() data['host'] = host_name if service_name != '': data['service'] = service_name return Brok({'type': 'acknowledge_raise', 'data': data})
def get_expire_brok(self, host_name, service_name=''): """Get an expire downtime brok :param host_name: host concerned by the downtime :type host_name :param service_name: service concerned by the downtime :type service_name :return: brok with wanted data :rtype: fusionsupervision.brok.Brok """ data = self.serialize() data['host'] = host_name if service_name != '': data['service'] = service_name return Brok({'type': 'downtime_expire', 'data': data})
def test_inner_module_checks_results(self): """ Test that inner metrics module is pushing data to Graphite :return: None """ self.setup_with_file('cfg/cfg_metrics.cfg') # self.clear_logs() # Module is an internal one (no external process) in the broker daemon modules manager my_module = self._broker_daemon.modules_manager.instances[0] assert my_module.is_external is False my_module.metrics_flush_count = 1 # When the broker daemon receives a Brok, it is propagated to the module # Host check result self.clear_logs() hcr = { "host_name": "srv001", "last_time_unreachable": 0, "last_problem_id": 0, "passive_check": False, "retry_interval": 1, "last_event_id": 0, "problem_has_been_acknowledged": False, "command_name": "pm-check_linux_host_alive", "last_state": "UP", "latency": 0.2317881584, "last_state_type": "HARD", "last_hard_state_change": 1444427108, "last_time_up": 0, "percent_state_change": 0.0, "state": "DOWN", "last_chk": 1444427104, "last_state_id": 0, "end_time": 0, "timeout": 0, "current_event_id": 10, "execution_time": 3.1496069431000002, "start_time": 0, "return_code": 2, "state_type": "SOFT", "output": "CRITICAL - Plugin timed out after 10 seconds", "in_checking": True, "early_timeout": 0, "in_scheduled_downtime": False, "attempt": 0, "state_type_id": 1, "acknowledgement_type": 1, "last_state_change": 1444427108.040841, "last_time_down": 1444427108, "instance_id": 0, "long_output": "", "current_problem_id": 0, "check_interval": 5, "state_id": 2, "has_been_checked": 1, "perf_data": "uptime=1200;rta=0.049000ms;2.000000;3.000000;0.000000 pl=0%;50;80;0" } b = Brok({'data': hcr, 'type': 'host_check_result'}, False) self._broker_daemon.manage_brok(b) self.show_logs() self.assert_log_count(0) # Service check result self.clear_logs() scr = { "host_name": "srv001", "service_description": "ping", "command_name": "ping", "attempt": 1, "execution_time": 3.1496069431000002, "latency": 0.2317881584, "return_code": 2, "state": "OK", "state_type": "HARD", "state_id": 0, "state_type_id": 1, "output": "PING OK - Packet loss = 0%, RTA = 0.05 ms", "long_output": "Long output ...", "perf_data": "rta=0.049000ms;2.000000;3.000000;0.000000 pl=0%;50;80;0", "passive_check": False, "problem_has_been_acknowledged": False, "acknowledgement_type": 1, "in_scheduled_downtime": False, "last_chk": 1473597375, "last_state_change": 1444427108.147903, "last_state_id": 0, "last_state": "UNKNOWN", "last_state_type": "HARD", "last_hard_state_change": 0.0, "last_time_unknown": 0, "last_time_unreachable": 0, "last_time_critical": 1473597376, "last_time_warning": 0, "last_time_ok": 0, "retry_interval": 2, "percent_state_change": 4.1, "check_interval": 5, "in_checking": False, "early_timeout": 0, "instance_id": "3ac88dd0c1c04b37a5d181622e93b5bc", "current_event_id": 1, "last_event_id": 0, "current_problem_id": 1, "last_problem_id": 0, "timeout": 0, "has_been_checked": 1, "start_time": 0, "end_time": 0 } b = Brok({'data': scr, 'type': 'service_check_result'}, False) self._broker_daemon.manage_brok(b) self.show_logs() self.assert_log_count(0) print(my_module.my_metrics)
def test_inner_module_configuration(self): """ Test that inner metrics module may be configured in Alignak configuration With this configuration, hosts/services cache is enabled and tested. Broks for unknown hosts/services are ignored. :return: None """ with requests_mock.mock() as mr: mr.get("http://localhost:8086/ping", json={"results": [{ "statement_id": 0, "version": "1.7.2" }]}, status_code=204, headers={"x-influxdb-version": "1.7.2"}) mr.get( "http://localhost:8086/query?q=SHOW+DATABASES&db=fusionsupervision", json={ "results": [{ "statement_id": 0, "series": [{ "name": "databases", "columns": ["name"], "values": [["_internal"]] }] }] }) mr.get( "http://localhost:8086/query?q=SHOW+DATABASES&db=fusionsupervision", json={"results": [{ "statement_id": 0 }]}) mr.post( "http://localhost:8086/query?q=CREATE+DATABASE+%22fusionsupervision%22&db=fusionsupervision", json={"results": [{ "statement_id": 0 }]}) mr.post( "http://localhost:8086/query?q=CREATE+RETENTION+POLICY+%22fusionsupervision%22+ON+%22fusionsupervision%22+DURATION+1y+REPLICATION+1+SHARD+DURATION+0s&db=fusionsupervision", json={"results": [{ "statement_id": 0 }]}) mr.post("http://localhost:8086/write?db=fusionsupervision", status_code=204, json={"results": [{ "statement_id": 0 }]}) self.setup_with_file('cfg/cfg_metrics.cfg', 'cfg/inner_metrics/fusionsupervision.ini') # Specific configuration enables the module assert self._scheduler.pushed_conf.process_performance_data is True assert self._scheduler.pushed_conf.host_perfdata_file == 'go-hosts' assert self._scheduler.pushed_conf.service_perfdata_file == 'go-services' assert 1 == len(self._broker_daemon.modules) self.show_logs() # The declared module instance my_module = self._broker_daemon.modules[0] print(my_module) # Generic stuff assert my_module.python_name == 'fusionsupervision.modules.inner_metrics' assert my_module.type == 'metrics' # assert my_module.alias == 'inner-metrics' assert my_module.enabled is True # Specific stuff - the content of the configuration parameters # When the module is configured in Alignak configuration, it does not exist! # assert my_module.host_perfdata_file == 'go-hosts' # assert my_module.service_perfdata_file == 'go-services' assert my_module.output_file == '/tmp/fusionsupervision-metrics.log' self.clear_logs() # Module is not yet initialized, let's do it in place of the daemon. # Create the modules manager for a daemon type self.modules_manager = ModulesManager(self._broker_daemon) # Load an initialize the modules: # - load python module # - get module properties and instances self.modules_manager.load_and_init([my_module]) self.show_logs() # self.assert_log_match( # "Targets configuration: graphite: True, influxdb: True, " # "file: /tmp/fusionsupervision-metrics.log", 10) # self.assert_log_match( "targets configuration: graphite: True, influxdb: True, " "file: /tmp/fusionsupervision-metrics.log", 11) self.assert_log_match( "Storing metrics in an output file is configured. Do not forget " "to regularly clean this file to avoid important disk usage!", 12) self.assert_log_match("Trying to initialize module: inner-metrics", 24) self.assert_log_match( "testing storage to /tmp/fusionsupervision-metrics.log ...", 25) self.assert_log_match("Ok", 26) self.assert_log_match( "testing connection to InfluxDB localhost:8086 ...", 27) self.assert_log_match("connected, InfluxDB version 1.7.2", 28) self.assert_log_match( "testing connection to Graphite localhost:2004 ...", 29) self.assert_log_match("Ok", 30) self.assert_log_match("creating database fusionsupervision...", 31) # self.assert_log_match("creating database retention policy: fusionsupervision - 1y - 1...", 32) # self.assert_log_match("Ok", 33) self.assert_log_match("Module inner-metrics is initialized.", 32) # Module is an internal one (no external process) in the broker daemon modules manager my_module = self._broker_daemon.modules_manager.instances[0] assert my_module.is_external is False # Known hosts/services cache is empty assert my_module.hosts_cache == {} assert my_module.services_cache == {} # File output - we still got a metric for the connection test! assert os.path.exists('/tmp/fusionsupervision-metrics.log') with open('/tmp/fusionsupervision-metrics.log') as f: lines = f.readlines() first_line = False for line in lines: assert 3 == len(line.split(';')) if not first_line: line = line.strip() metric = line.split(';') assert metric[0] == metric[2] assert metric[1] == 'connection-test' print(line) # Some metrics were stored assert 2 == len(lines) # When the broker daemon receives a Brok, it is propagated to the module # Host check result self.clear_logs() hcr = { "host_name": "srv001", "last_time_unreachable": 0, "last_problem_id": 0, "passive_check": False, "retry_interval": 1, "last_event_id": 0, "problem_has_been_acknowledged": False, "command_name": "pm-check_linux_host_alive", "last_state": "UP", "latency": 0.2317881584, "last_state_type": "HARD", "last_hard_state_change": 1444427108, "last_time_up": 0, "percent_state_change": 0.0, "state": "DOWN", "last_chk": 1444427104, "last_state_id": 0, "end_time": 0, "timeout": 0, "current_event_id": 10, "execution_time": 3.1496069431000002, "start_time": 0, "return_code": 2, "state_type": "SOFT", "output": "CRITICAL - Plugin timed out after 10 seconds", "in_checking": True, "early_timeout": 0, "in_scheduled_downtime": False, "attempt": 0, "state_type_id": 1, "acknowledgement_type": 1, "last_state_change": 1444427108.040841, "last_time_down": 1444427108, "instance_id": 0, "long_output": "", "current_problem_id": 0, "check_interval": 5, "state_id": 2, "has_been_checked": 1, "perf_data": "uptime=1200;rta=0.049000ms;2.000000;3.000000;0.000000 pl=0%;50;80;0" } b = Brok({'data': hcr, 'type': 'host_check_result'}, False) self._broker_daemon.manage_brok(b) self.show_logs() self.assert_log_count(2) self.assert_log_match("host check result: srv001", 0) self.assert_log_match( "received host check result for an unknown host: srv001", 1) # Service check result self.clear_logs() scr = { "host_name": "srv001", "service_description": "ping", "command_name": "ping", "attempt": 1, "execution_time": 3.1496069431000002, "latency": 0.2317881584, "return_code": 2, "state": "OK", "state_type": "HARD", "state_id": 0, "state_type_id": 1, "output": "PING OK - Packet loss = 0%, RTA = 0.05 ms", "long_output": "Long output ...", "perf_data": "rta=0.049000ms;2.000000;3.000000;0.000000 pl=0%;50;80;0", "passive_check": False, "problem_has_been_acknowledged": False, "acknowledgement_type": 1, "in_scheduled_downtime": False, "last_chk": 1473597375, "last_state_change": 1444427108.147903, "last_state_id": 0, "last_state": "UNKNOWN", "last_state_type": "HARD", "last_hard_state_change": 0.0, "last_time_unknown": 0, "last_time_unreachable": 0, "last_time_critical": 1473597376, "last_time_warning": 0, "last_time_ok": 0, "retry_interval": 2, "percent_state_change": 4.1, "check_interval": 5, "in_checking": False, "early_timeout": 0, "instance_id": "3ac88dd0c1c04b37a5d181622e93b5bc", "current_event_id": 1, "last_event_id": 0, "current_problem_id": 1, "last_problem_id": 0, "timeout": 0, "has_been_checked": 1, "start_time": 0, "end_time": 0 } b = Brok({'data': scr, 'type': 'service_check_result'}, False) self._broker_daemon.manage_brok(b) self.show_logs() self.assert_log_count(2) self.assert_log_match("service check result: srv001/ping", 0) self.assert_log_match( "received service check result for an unknown host", 1) # Initial host status self.clear_logs() hcr = { "host_name": "srv001", } b = Brok({'data': hcr, 'type': 'initial_host_status'}, False) self._broker_daemon.manage_brok(b) self.show_logs() # The module inner cache stored the host assert 'srv001' in my_module.hosts_cache assert my_module.hosts_cache['srv001'] == {'realm_name': 'All'} assert my_module.services_cache == {} # Initial service status self.clear_logs() hcr = {"host_name": "srv001", "service_description": "disks"} b = Brok({'data': hcr, 'type': 'initial_service_status'}, False) self._broker_daemon.manage_brok(b) self.show_logs() # The module inner cache stored the host assert 'srv001' in my_module.hosts_cache assert my_module.hosts_cache['srv001'] == {'realm_name': 'All'} assert 'srv001/disks' in my_module.services_cache assert my_module.services_cache['srv001/disks'] == {} # Now the host srv001 is known in the module, let's raise an host brok # Host check result self.clear_logs() hcr = { "host_name": "srv001", "last_time_unreachable": 0, "last_problem_id": 0, "passive_check": False, "retry_interval": 1, "last_event_id": 0, "problem_has_been_acknowledged": False, "command_name": "pm-check_linux_host_alive", "last_state": "UP", "latency": 0.2317881584, "last_state_type": "HARD", "last_hard_state_change": 1444427108, "last_time_up": 0, "percent_state_change": 0.0, "state": "DOWN", "last_chk": 1444427104, "last_state_id": 0, "end_time": 0, "timeout": 0, "current_event_id": 10, "execution_time": 3.1496069431000002, "start_time": 0, "return_code": 2, "state_type": "SOFT", "output": "CRITICAL - Plugin timed out after 10 seconds", "in_checking": True, "early_timeout": 0, "in_scheduled_downtime": False, "attempt": 0, "state_type_id": 1, "acknowledgement_type": 1, "last_state_change": 1444427108.040841, "last_time_down": 1444427108, "instance_id": 0, "long_output": "", "current_problem_id": 0, "check_interval": 5, "state_id": 2, "has_been_checked": 1, "perf_data": "uptime=1200 rta=0.049000ms;2.000000;3.000000;0.000000 pl=0%;50;80;0" } b = Brok({'data': hcr, 'type': 'host_check_result'}, False) self._broker_daemon.manage_brok(b) self.show_logs() self.assert_log_count(9) self.assert_log_match("host check result: srv001", 0) self.assert_log_match("service: host_check, metric: ", 1) self.assert_log_match("service: host_check, metric: ", 2) self.assert_log_match("service: host_check, metric: ", 3) self.assert_log_match("Metrics: host_check - ", 4) self.assert_log_match("Data: ", 5) self.assert_log_match("Flushing 1 metrics to Graphite/carbon", 6) self.assert_log_match("Flushing 1 metrics to InfluxDB", 7) self.assert_log_match( "Storing 1 metrics to /tmp/fusionsupervision-metrics.log", 8) # Service check result self.clear_logs() scr = { "host_name": "srv001", "service_description": "disks", "last_time_unreachable": 0, "last_problem_id": 0, "passive_check": False, "retry_interval": 1, "last_event_id": 0, "problem_has_been_acknowledged": False, "command_name": "pm-check_linux_disks", "last_state": "UP", "latency": 0.2317881584, "last_state_type": "HARD", "last_hard_state_change": 1444427108, "last_time_up": 0, "percent_state_change": 0.0, "state": "OK", "last_chk": 1444427104, "last_state_id": 0, "end_time": 0, "timeout": 0, "current_event_id": 10, "execution_time": 3.1496069431000002, "start_time": 0, "return_code": 2, "state_type": "SOFT", "output": "DISK OK - free space: / 3326 MB (56%); / 15272 MB (77%);/boot 68 MB (69%);/home 69357 MB (27%);/var/log 819 MB (84%);", "in_checking": True, "early_timeout": 0, "in_scheduled_downtime": False, "attempt": 0, "state_type_id": 1, "acknowledgement_type": 1, "last_state_change": 1444427108.040841, "last_time_down": 1444427108, "instance_id": 0, "long_output": "", "current_problem_id": 0, "check_interval": 5, "state_id": 2, "has_been_checked": 1, "perf_data": "/=2643MB;5948;5958;0;5968 /boot=68MB;88;93;0;98 /home=69357MB;253404;253409;0;253414 /var/log=818MB;970;975;0;980" } b = Brok({'data': scr, 'type': 'service_check_result'}, False) self._broker_daemon.manage_brok(b) self.show_logs() self.assert_log_count(10) self.assert_log_match("service check result: srv001/disks", 0) self.assert_log_match(re.escape("service: disks, metric: "), 1) self.assert_log_match(re.escape("service: disks, metric: "), 2) self.assert_log_match(re.escape("service: disks, metric: "), 3) self.assert_log_match(re.escape("service: disks, metric: "), 4) self.assert_log_match(re.escape("Metrics: disks - "), 5) self.assert_log_match("Data: ", 6) self.assert_log_match("Flushing 1 metrics to Graphite/carbon", 7) self.assert_log_match("Flushing 1 metrics to InfluxDB", 8) self.assert_log_match( "Storing 1 metrics to /tmp/fusionsupervision-metrics.log", 9) # Metrics count # File output assert os.path.exists('/tmp/fusionsupervision-metrics.log') with open('/tmp/fusionsupervision-metrics.log') as f: lines = f.readlines() first_line = False for line in lines: line = line.strip() assert 3 == len(line.split(';')) print(line) if not first_line: first_line = True metric = line.split(';') assert metric[0] == metric[2] assert metric[1] == 'connection-test' # Some metrics were stored! assert 33 == len(lines)
def gauge(self, key, value, timestamp=None): """Set a gauge value If the inner key does not exist is is created :param key: gauge to update :type key: str :param value: counter value :type value: float :return: An fusionsupervision_stat brok if broks are enabled else None """ _min, _max, count, _sum = self.stats.get(key, (None, None, 0, 0)) count += 1 _sum += value if _min is None or value < _min: _min = value if _max is None or value > _max: _max = value self.stats[key] = (_min, _max, count, _sum) # Manage local statsd part if self.statsd_enabled and self.statsd_sock: # beware, we are sending ms here, timer is in seconds packet = '%s.%s.%s:%d|g' % (self.statsd_prefix, self.name, key, value) packet = packet.encode('utf-8') try: self.statsd_sock.sendto(packet, self.statsd_addr) except (socket.error, socket.gaierror): pass # cannot send? ok not a huge problem here and we cannot # log because it will be far too verbose :p # Manage file part if self.statsd_enabled and self.file_d: if timestamp is None: timestamp = int(time.time()) packet = self.line_fmt if not self.date_fmt: date = "%s" % timestamp else: date = datetime.datetime.fromtimestamp(timestamp).strftime( self.date_fmt) packet = packet.replace("#date#", date) packet = packet.replace( "#counter#", '%s.%s.%s' % (self.statsd_prefix, self.name, key)) packet = packet.replace("#value#", '%d' % value) packet = packet.replace("#uom#", 'g') # Do not log because it is spamming the log file, but leave this code in place # for it may be restored easily if more tests are necessary... ;) # logger.debug("Writing data: %s", packet) try: self.file_d.write(packet) except IOError: logger.warning("Could not write to the file: %s", packet) # Manage Graphite part if self.statsd_enabled and self.carbon: self.send_to_graphite(key, value, timestamp=timestamp) if self.broks_enabled: logger.debug("fusionsupervision stat brok: %s = %s", key, value) if timestamp is None: timestamp = int(time.time()) return Brok({ 'type': 'fusionsupervision_stat', 'data': { 'ts': timestamp, 'type': 'gauge', 'metric': '%s.%s.%s' % (self.statsd_prefix, self.name, key), 'value': value, 'uom': 'g' } }) return None
def setup_new_conf(self): # pylint: disable=too-many-statements, too-many-branches, too-many-locals """Setup new conf received for scheduler :return: None """ # Execute the base class treatment... super(Fusionsupervision, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # self_conf is our own configuration from the fusionsupervision environment # self_conf = self.cur_conf['self_conf'] logger.debug("Got config: %s", self.cur_conf) if 'conf_part' not in self.cur_conf: self.cur_conf['conf_part'] = None conf_part = self.cur_conf['conf_part'] # Ok now we can save the retention data if self.sched.pushed_conf is not None: self.sched.update_retention() # Get the monitored objects configuration t00 = time.time() received_conf_part = None try: received_conf_part = unserialize(conf_part) assert received_conf_part is not None except AssertionError as exp: # This to indicate that no configuration is managed by this scheduler... logger.warning("No managed configuration received from arbiter") except FusionsupervisionClassLookupException as exp: # pragma: no cover # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) logger.error("Back trace of the error:\n%s", traceback.format_exc()) return except Exception as exp: # pylint: disable=broad-except # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) self.exit_on_exception(exp, str(self.new_conf)) # if not received_conf_part: # return logger.info("Monitored configuration %s received at %d. Un-serialized in %d secs", received_conf_part, t00, time.time() - t00) logger.info("Scheduler received configuration : %s", received_conf_part) # Now we create our pollers, reactionners and brokers for link_type in ['pollers', 'reactionners', 'brokers']: if link_type not in self.cur_conf['satellites']: logger.error("Missing %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link(link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replacing the satellite address and port by those defined in satellite_map if new_link.name in self.cur_conf['override_conf'].get('satellite_map', {}): override_conf = self.cur_conf['override_conf'] overriding = override_conf.get('satellite_map')[new_link.name] logger.warning("Do not override the configuration for: %s, with: %s. " "Please check whether this is necessary!", new_link.name, overriding) # First mix conf and override_conf to have our definitive conf for prop in getattr(self.cur_conf, 'override_conf', []): logger.debug("Overriden: %s / %s ", prop, getattr(received_conf_part, prop, None)) logger.debug("Overriding: %s / %s ", prop, self.cur_conf['override_conf']) setattr(received_conf_part, prop, self.cur_conf['override_conf'].get(prop, None)) # Scheduler modules if not self.have_modules: try: logger.debug("Modules configuration: %s", self.cur_conf['modules']) self.modules = unserialize(self.cur_conf['modules'], no_load=True) except FusionsupervisionClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.debug("I received some modules configuration: %s", self.modules) self.have_modules = True self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") if received_conf_part: logger.info("Loading configuration...") # Propagate the global parameters to the configuration items received_conf_part.explode_global_conf() # We give the configuration to our scheduler self.sched.reset() self.sched.load_conf(self.cur_conf['instance_id'], self.cur_conf['instance_name'], received_conf_part) # Once loaded, the scheduler has an inner pushed_conf object logger.info("Loaded: %s", self.sched.pushed_conf) # Update the scheduler ticks according to the daemon configuration self.sched.update_recurrent_works_tick(self) # We must update our pushed configuration macros with correct values # from the configuration parameters # self.sched.pushed_conf.fill_resource_macros_names_macros() # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(received_conf_part) # Now create the external commands manager # We are an applyer: our role is not to dispatch commands, but to apply them ecm = ExternalCommandManager( received_conf_part, 'applyer', self.sched, received_conf_part.accept_passive_unknown_check_results, received_conf_part.log_external_commands) # Scheduler needs to know about this external command manager to use it if necessary self.sched.external_commands_manager = ecm # Ok now we can load the retention data self.sched.retention_load() # Log hosts/services initial states self.sched.log_initial_states() # Create brok new conf brok = Brok({'type': 'new_conf', 'data': {}}) self.sched.add_brok(brok) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) if received_conf_part: # Enable the scheduling process logger.info("Loaded: %s", self.sched.pushed_conf) self.sched.start_scheduling() # Now I have a configuration! self.have_conf = True