def adjust_worker_number_by_load(self): """Try to create the minimum workers specified in the configuration :return: None """ to_del = [] logger.debug( "[%s] Trying to adjust worker number." " Actual number : %d, min per module : %d, max per module : %d", self.name, len(self.workers), self.min_workers, self.max_workers) # I want at least min_workers by module then if I can, I add worker for load balancing for mod in self.q_by_mod: # At least min_workers todo = max(0, self.min_workers - len(self.q_by_mod[mod])) for _ in range(todo): try: self.create_and_launch_worker(module_name=mod) # Maybe this modules is not a true worker one. # if so, just delete if from q_by_mod except NotWorkerMod: to_del.append(mod) break for mod in to_del: logger.debug( "[%s] The module %s is not a worker one, " "I remove it from the worker list", self.name, mod) del self.q_by_mod[mod]
def adjust_worker_number_by_load(self): """Try to create the minimum workers specified in the configuration :return: None """ to_del = [] logger.debug("[%s] Trying to adjust worker number." " Actual number : %d, min per module : %d, max per module : %d", self.name, len(self.workers), self.min_workers, self.max_workers) # I want at least min_workers by module then if I can, I add worker for load balancing for mod in self.q_by_mod: # At least min_workers todo = max(0, self.min_workers - len(self.q_by_mod[mod])) for _ in range(todo): try: self.create_and_launch_worker(module_name=mod) # Maybe this modules is not a true worker one. # if so, just delete if from q_by_mod except NotWorkerMod: to_del.append(mod) break for mod in to_del: logger.debug("[%s] The module %s is not a worker one, " "I remove it from the worker list", self.name, mod) del self.q_by_mod[mod]
def ping(self): """Send a HTTP request to the satellite (GET /ping) Add failed attempt if an error occurs Otherwise, set alive this satellite :return: None """ logger.debug("Pinging %s", self.get_name()) try: if self.con is None: self.create_connection() logger.debug(" (%s)", self.uri) # If the connection failed to initialize, bail out if self.con is None: self.add_failed_check_attempt() return res = self.con.get('ping') # Should return us pong string if res == 'pong': self.set_alive() else: self.add_failed_check_attempt() except HTTPEXCEPTIONS, exp: self.add_failed_check_attempt(reason=str(exp))
def ping(self): """Send a HTTP request to the satellite (GET /ping) Add failed attempt if an error occurs Otherwise, set alive this satellite :return: None """ logger.debug("Pinging %s", self.get_name()) try: if self.con is None: self.create_connection() logger.debug(" (%s)", self.uri) # If the connection failed to initialize, bail out if self.con is None: self.add_failed_check_attempt() return res = self.con.get("ping") # Should return us pong string if res == "pong": self.set_alive() else: self.add_failed_check_attempt() except HTTPEXCEPTIONS, exp: self.add_failed_check_attempt(reason=str(exp))
def main(self): """Main satellite function. Do init and then mainloop :return: None """ try: for line in self.get_header(): logger.info(line) self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() self.do_daemon_init_and_start() self.do_post_daemon_init() self.load_modules_manager() # We wait for initial conf self.wait_for_initial_conf() if not self.new_conf: # we must have either big problem or was requested to shutdown return self.setup_new_conf() # We can load our modules now self.do_load_modules(self.new_modules_conf) # And even start external ones self.modules_manager.start_external_instances() # Allocate Mortal Threads for _ in xrange(1, self.min_workers): to_del = [] for mod in self.q_by_mod: try: self.create_and_launch_worker(module_name=mod) # Maybe this modules is not a true worker one. # if so, just delete if from q_by_mod except NotWorkerMod: to_del.append(mod) for mod in to_del: logger.debug( "The module %s is not a worker one, " "I remove it from the worker list", mod) del self.q_by_mod[mod] # Now main loop self.do_mainloop() except Exception: self.print_unrecoverable(traceback.format_exc()) raise
def register( self, app, name, _type, api_key="", secret="", http_proxy="", statsd_host="localhost", statsd_port=8125, statsd_prefix="alignak", statsd_enabled=False, ): """Init statsd instance with real values :param app: application (arbiter, scheduler..) :type app: alignak.daemon.Daemon :param name: daemon name :type name: str :param _type: daemon type :type _type: :param api_key: api_key to post data :type api_key: str :param secret: secret to post data :type secret: str :param http_proxy: proxy http if necessary :type http_proxy: str :param statsd_host: host to post data :type statsd_host: str :param statsd_port: port to post data :type statsd_port: int :param statsd_prefix: prefix to add to metric :type statsd_prefix: str :param statsd_enabled: bool to enable statsd :type statsd_enabled: bool :return: None """ self.app = app self.name = name self.type = _type # kernel.io part self.api_key = api_key self.secret = secret self.http_proxy = http_proxy # local statsd part self.statsd_host = statsd_host self.statsd_port = statsd_port self.statsd_prefix = statsd_prefix self.statsd_enabled = statsd_enabled if self.statsd_enabled: logger.debug( "Loading statsd communication with %s:%s.%s", self.statsd_host, self.statsd_port, self.statsd_prefix ) self.load_statsd() # Also load the proxy if need self.con.set_proxy(self.http_proxy)
def main(self): """Main satellite function. Do init and then mainloop :return: None """ try: for line in self.get_header(): logger.info(line) self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() self.do_daemon_init_and_start() self.do_post_daemon_init() self.load_modules_manager() # We wait for initial conf self.wait_for_initial_conf() if not self.new_conf: # we must have either big problem or was requested to shutdown return self.setup_new_conf() # We can load our modules now self.modules_manager.set_modules(self.modules_manager.modules) self.do_load_modules() # And even start external ones self.modules_manager.start_external_instances() # Allocate Mortal Threads for _ in xrange(1, self.min_workers): to_del = [] for mod in self.q_by_mod: try: self.create_and_launch_worker(module_name=mod) # Maybe this modules is not a true worker one. # if so, just delete if from q_by_mod except NotWorkerMod: to_del.append(mod) for mod in to_del: logger.debug("The module %s is not a worker one, " "I remove it from the worker list", mod) del self.q_by_mod[mod] # Now main loop self.do_mainloop() except Exception: self.print_unrecoverable(traceback.format_exc()) raise
def manage_host_check_result_brok(self, b): """ Manage a host check result brok (we UPDATE data info with this) :param b: :type b: :return: None """ host_name = b.data['host_name'] logger.debug("[Graphite] host check result: %s", host_name) # If host initial status brok has not been received, ignore ... if host_name not in self.hosts_cache: logger.warning("[Graphite] received service check result for an unknown host: %s", host_name) return # Decode received metrics couples = self.get_metric_and_value('host_check', b.data['perf_data']) # If no values, we can exit now if len(couples) == 0: logger.debug("[Graphite] no metrics to send ...") return # Custom hosts variables hname = self.illegal_char_hostname.sub('_', host_name) if '_GRAPHITE_GROUP' in self.hosts_cache[host_name]: hname = ".".join((self.hosts_cache[host_name]['_GRAPHITE_GROUP'], hname)) if '_GRAPHITE_PRE' in self.hosts_cache[host_name]: hname = ".".join((self.hosts_cache[host_name]['_GRAPHITE_PRE'], hname)) if self.hostcheck: hname = '.'.join((hname, self.hostcheck)) # Checks latency if self.ignore_latency_limit >= b.data['latency'] > 0: check_time = int(b.data['last_chk']) - int(b.data['latency']) logger.info("[Graphite] Ignoring latency for service %s. Latency : %s", b.data['service_description'], b.data['latency']) else: check_time = int(b.data['last_chk']) # Graphite data source if self.graphite_data_source: path = '.'.join((hname, self.graphite_data_source)) else: path = hname lines = [] # Send a bulk of all metrics at once for (metric, value) in couples: lines.append("%s.%s %s %d" % (path, metric, value, check_time)) lines.append("\n") packet = '\n'.join(lines) self.send_packet(packet)
def wait_new_conf(self): """Ask to scheduler to wait for new conf (HTTP GET from arbiter) :return: None """ with self.app.conf_lock: logger.debug("Arbiter wants me to wait for a new configuration") self.app.sched.die() super(SchedulerInterface, self).wait_new_conf()
def what_i_managed(self): """Arbiter ask me which scheduler id I manage :return: managed configuration ids :rtype: dict """ print "The arbiter asked me what I manage. It's %s", self.app.what_i_managed() logger.debug("The arbiter asked me what I manage. It's %s", self.app.what_i_managed()) return self.app.what_i_managed()
def register(self, app, name, _type, api_key='', secret='', http_proxy='', statsd_host='localhost', statsd_port=8125, statsd_prefix='alignak', statsd_enabled=False): """Init statsd instance with real values :param app: application (arbiter, scheduler..) :type app: alignak.daemon.Daemon :param name: daemon name :type name: str :param _type: daemon type :type _type: :param api_key: api_key to post data :type api_key: str :param secret: secret to post data :type secret: str :param http_proxy: proxy http if necessary :type http_proxy: str :param statsd_host: host to post data :type statsd_host: str :param statsd_port: port to post data :type statsd_port: int :param statsd_prefix: prefix to add to metric :type statsd_prefix: str :param statsd_enabled: bool to enable statsd :type statsd_enabled: bool :return: None """ self.app = app self.name = name self.type = _type # kernel.io part self.api_key = api_key self.secret = secret self.http_proxy = http_proxy # local statsd part self.statsd_host = statsd_host self.statsd_port = statsd_port self.statsd_prefix = statsd_prefix self.statsd_enabled = statsd_enabled if self.statsd_enabled: logger.debug('Loading statsd communication with %s:%s.%s', self.statsd_host, self.statsd_port, self.statsd_prefix) self.load_statsd() # Also load the proxy if need self.con.set_proxy(self.http_proxy)
def wait_new_conf(self): """Ask the daemon to drop its configuration and wait for a new one :return: None """ with self.app.conf_lock: logger.debug("Arbiter wants me to wait for a new configuration") # Clear can occur while setting up a new conf and lead to error. self.app.schedulers.clear() self.app.cur_conf = None
def execute_query(self, query): """Just run the query :param query: the query :type query: str :return: None """ logger.debug("[SqliteDB] Info: I run query '%s'", query) self.db_cursor.execute(query) self.db.commit()
def reaper(self): """Get data from daemon and send it to the statsd daemon :return: None """ try: from Crypto.Cipher import AES except ImportError: logger.error("Cannot find python lib crypto: stats export is not available") AES = None # pylint: disable=C0103 while True: now = int(time.time()) stats = self.stats self.stats = {} if len(stats) != 0: string = ", ".join(["%s:%s" % (key, v) for (key, v) in stats.iteritems()]) # If we are not in an initializer daemon we skip, we cannot have a real name, it sucks # to find the data after this if not self.name or not self.api_key or not self.secret: time.sleep(60) continue metrics = [] for (key, elem) in stats.iteritems(): namekey = "%s.%s.%s" % (self.type, self.name, key) _min, _max, number, _sum = elem _avg = float(_sum) / number # nb can't be 0 here and _min_max can't be None too string = "%s.avg %f %d" % (namekey, _avg, now) metrics.append(string) string = "%s.min %f %d" % (namekey, _min, now) metrics.append(string) string = "%s.max %f %d" % (namekey, _max, now) metrics.append(string) string = "%s.count %f %d" % (namekey, number, now) metrics.append(string) # logger.debug('REAPER metrics to send %s (%d)' % (metrics, len(str(metrics))) ) # get the inner data for the daemon struct = self.app.get_stats_struct() struct["metrics"].extend(metrics) # logger.debug('REAPER whole struct %s' % struct) j = json.dumps(struct) if AES is not None and self.secret != "": logger.debug("Stats PUT to kernel.alignak.io/api/v1/put/ with %s %s", self.api_key, self.secret) # assume a %16 length messagexs encrypted_text = self._encrypt(j) try: self.con.put("/api/v1/put/?api_key=%s" % (self.api_key), encrypted_text) except HTTPException, exp: logger.error("Stats REAPER cannot put to the metric server %s", exp) time.sleep(60)
def declared(function): """ Decorator to add function in trigger environment :param function: function to add to trigger environment :type function: types.FunctionType :return : the function itself only update TRIGGER_FUNCTIONS variable """ name = function.func_name TRIGGER_FUNCTIONS[name] = function logger.debug("Added %s to trigger functions list ", name) return function
def what_i_managed(self): """Arbiter ask me which scheduler id I manage :return: managed configuration ids :rtype: dict """ print "The arbiter asked me what I manage. It's %s", self.app.what_i_managed( ) logger.debug("The arbiter asked me what I manage. It's %s", self.app.what_i_managed()) return self.app.what_i_managed()
def allperfs(obj_ref): """ Get all perfdatas from a service or a host :param obj_ref: :type obj_ref: object :return: dictionary with perfdatas :rtype: dict """ obj = get_object(obj_ref) perfdata = PerfDatas(obj.perf_data) logger.debug("[trigger] I get all perfdatas") return dict([(metric.name, perfdata[metric.name]) for metric in perfdata])
def set_proxy(self, proxy): """Set HTTP proxy :param proxy: proxy url :type proxy: str :return: None """ if proxy: logger.debug('PROXY SETTING PROXY %s', proxy) self._requests_con.proxies = { 'http': proxy, 'https': proxy, }
def do_pynag_con_init(self, s_id, i_type='scheduler'): """Initialize or re-initialize connection with scheduler or arbiter if type == arbiter :param s_id: s_id :type s_id: int :param i_type: type of item :type i_type: str :return: None """ # Get the good links tab for looping.. links = self.get_links_from_type(i_type) if links is None: logger.debug('Type unknown for connection! %s', i_type) return # default timeout for daemons like pollers/reactionners/... timeout = 3 data_timeout = 120 if i_type == 'scheduler': # If sched is not active, I do not try to init # it is just useless is_active = links[s_id]['active'] if not is_active: return # schedulers also got real timeout to respect timeout = links[s_id]['timeout'] data_timeout = links[s_id]['data_timeout'] # If we try to connect too much, we slow down our tests if self.is_connection_try_too_close(links[s_id]): return # Ok, we can now update it links[s_id]['last_connection'] = time.time() # DBG: print "Init connection with", links[s_id]['uri'] running_id = links[s_id]['running_id'] # DBG: print "Running id before connection", running_id uri = links[s_id]['uri'] try: con = links[s_id]['con'] = HTTPClient(uri=uri, strong_ssl=links[s_id]['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout) except HTTPEXCEPTIONS, exp: # But the multiprocessing module is not compatible with it! # so we must disable it immediately after logger.info("Connection problem to the %s %s: %s", i_type, links[s_id]['name'], str(exp)) links[s_id]['con'] = None return
def execute_query(self, query): """ Execute a query against an Oracle database. :param query: the query :type query: str :return: None """ logger.debug("[DBOracle] Execute Oracle query %s\n", query) try: self.db_cursor.execute(query) self.db.commit() except IntegrityError_exp, exp: logger.warning("[DBOracle] Warning: a query raise an integrity error: %s, %s", query, exp)
def get_nb_of_must_have_satellites(self, s_type): """Generic function to access one of the number satellite attribute ie : self.nb_pollers, self.nb_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.nb_*type*s :rtype: int """ if hasattr(self, 'nb_' + s_type + 's'): return getattr(self, 'nb_' + s_type + 's') else: logger.debug("[realm] do not have this kind of satellites: %s", s_type) return 0
def get_potential_satellites_by_type(self, s_type): """Generic function to access one of the potential satellite attribute ie : self.potential_pollers, self.potential_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.potential_*type*s :rtype: list """ if hasattr(self, 'potential_' + s_type + 's'): return getattr(self, 'potential_' + s_type + 's') else: logger.debug("[realm] do not have this kind of satellites: %s", s_type) return []
def add(self, elt): """Add elt to this broker Original comment : Schedulers have some queues. We can simplify the call by adding elements into the proper queue just by looking at their type Brok -> self.broks TODO: better tag ID? External commands -> self.external_commands :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks_internal_raised.append(elt) return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command '%s'", str(ExternalCommand.__dict__)) self.external_commands.append(elt) # Maybe we got a Message from the modules, it's way to ask something # like from now a full data from a scheduler for example. elif cls_type == 'message': # We got a message, great! logger.debug(str(elt.__dict__)) if elt.get_type() == 'NeedData': data = elt.get_data() # Full instance id means: I got no data for this scheduler # so give me all dumbass! if 'full_instance_id' in data: c_id = data['full_instance_id'] source = elt.source logger.info('The module %s is asking me to get all initial data ' 'from the scheduler %d', source, c_id) # so we just reset the connection and the running_id, # it will just get all new things try: self.schedulers[c_id]['con'] = None self.schedulers[c_id]['running_id'] = 0 except KeyError: # maybe this instance was not known, forget it logger.warning("the module %s ask me a full_instance_id " "for an unknown ID (%d)!", source, c_id) # Maybe a module tells me that it's dead, I must log it's last words... if elt.get_type() == 'ICrash': data = elt.get_data() logger.error('the module %s just crash! Please look at the traceback:', data['name']) logger.error(data['trace'])
def linkify_hg_by_realms(self, realms): """ More than an explode function, but we need to already have members so... Will be really linkify just after And we explode realm in ours members, but do not override a host realm value if it's already set :param realms: object Realms :type realms: object :return: None """ # Now we explode the realm value if we've got one # The group realm must not override a host one (warning?) for hostgroup in self: if not hasattr(hostgroup, "realm"): continue # Maybe the value is void? if not hostgroup.realm.strip(): continue realm = realms.find_by_name(hostgroup.realm.strip()) if realm is not None: hostgroup.realm = realm logger.debug("[hostgroups] %s is in %s realm", hostgroup.get_name(), realm.get_name()) else: err = "the hostgroup %s got an unknown realm '%s'" % (hostgroup.get_name(), hostgroup.realm) hostgroup.configuration_errors.append(err) hostgroup.realm = None continue for host in hostgroup: if host is None: continue if host.realm is None or host.got_default_realm: # default not hasattr(h, 'realm'): logger.debug( "[hostgroups] apply a realm %s to host %s from a hostgroup " "rule (%s)", hostgroup.realm.get_name(), host.get_name(), hostgroup.get_name(), ) host.realm = hostgroup.realm else: if host.realm != hostgroup.realm: logger.warning( "[hostgroups] host %s it not in the same realm than it's " "hostgroup %s", host.get_name(), hostgroup.get_name(), )
def do_not_run(self): """Master tells to slave to not run (HTTP GET) Master will ignore this call :return: None """ # If I'm the master, ignore the command if self.app.is_master: logger.debug("Received message to not run. " "I am the Master, ignore and continue to run.") # Else, I'm just a spare, so I listen to my master else: logger.debug("Received message to not run. I am the spare, stopping.") self.app.last_master_speack = time.time() self.app.must_run = False
def linkify_hg_by_realms(self, realms): """ More than an explode function, but we need to already have members so... Will be really linkify just after And we explode realm in ours members, but do not override a host realm value if it's already set :param realms: object Realms :type realms: object :return: None """ # Now we explode the realm value if we've got one # The group realm must not override a host one (warning?) for hostgroup in self: if not hasattr(hostgroup, 'realm'): continue # Maybe the value is void? if not hostgroup.realm.strip(): continue realm = realms.find_by_name(hostgroup.realm.strip()) if realm is not None: hostgroup.realm = realm logger.debug("[hostgroups] %s is in %s realm", hostgroup.get_name(), realm.get_name()) else: err = "the hostgroup %s got an unknown realm '%s'" % \ (hostgroup.get_name(), hostgroup.realm) hostgroup.configuration_errors.append(err) hostgroup.realm = None continue for host in hostgroup: if host is None: continue if host.realm is None or host.got_default_realm: # default not hasattr(h, 'realm'): logger.debug( "[hostgroups] apply a realm %s to host %s from a hostgroup " "rule (%s)", hostgroup.realm.get_name(), host.get_name(), hostgroup.get_name()) host.realm = hostgroup.realm else: if host.realm != hostgroup.realm: logger.warning( "[hostgroups] host %s it not in the same realm than it's " "hostgroup %s", host.get_name(), hostgroup.get_name())
def get_metric_and_value(self, service, perf_data): """ Get the metric and the value associated :param service: :type service: :param perf_data: :type perf_data: :return: :rtype; list """ result = [] metrics = PerfDatas(perf_data) for e in metrics: logger.debug("[Graphite] service: %s, metric: %s", e.name, e.value) if service in self.filtered_metrics: if e.name in self.filtered_metrics[service]: logger.debug("[Graphite] Ignore metric '%s' for filtered service: %s", e.name, service) continue name = self.illegal_char_metric.sub('_', e.name) name = self.multival.sub(r'.\1', name) # get metric value and its thresholds values if they exist name_value = {name: e.value} # bailout if no value if name_value[name] == '': continue # Get or ignore extra values depending upon module configuration if e.warning and self.send_warning: name_value[name + '_warn'] = e.warning if e.critical and self.send_critical: name_value[name + '_crit'] = e.critical if e.min and self.send_min: name_value[name + '_min'] = e.min if e.max and self.send_max: name_value[name + '_max'] = e.max for key, value in name_value.items(): result.append((key, value)) return result
def do_not_run(self): """Master tells to slave to not run (HTTP GET) Master will ignore this call :return: None """ # If I'm the master, ignore the command if self.app.is_master: logger.debug("Received message to not run. " "I am the Master, ignore and continue to run.") # Else, I'm just a spare, so I listen to my master else: logger.debug( "Received message to not run. I am the spare, stopping.") self.app.last_master_speack = time.time() self.app.must_run = False
def perf(obj_ref, metric_name): """ Get perf data from a service :param obj_ref: :type obj_ref: object :param metric_name: :type metric_name: str :return: None """ obj = get_object(obj_ref) perfdata = PerfDatas(obj.perf_data) if metric_name in perfdata: logger.debug("[trigger] I found the perfdata") return perfdata[metric_name].value logger.debug("[trigger] I am in perf command") return None
def try_to_restart_deads(self): """Try to reinit and restart dead instances :return: None """ to_restart = self.to_restart[:] del self.to_restart[:] for inst in to_restart: logger.debug("I should try to reinit %s", inst.get_name()) if self.try_instance_init(inst): logger.debug("Good, I try to restart %s", inst.get_name()) # If it's an external, it will start it inst.start() # Ok it's good now :) else: self.to_restart.append(inst)
def add(self, elt): """Add an object to the receiver one Handles brok and externalcommand :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks[elt._id] = elt return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command: %s", str(ExternalCommand.__dict__)) self.unprocessed_external_commands.append(elt)
def remove_instance(self, inst): """Request to cleanly remove the given instance. If instance is external also shutdown it cleanly :param inst: instance to remove :type inst: object :return: None """ # External instances need to be close before (process + queues) if inst.is_external: logger.debug("Ask stop process for %s", inst.get_name()) inst.stop_process() logger.debug("Stop process done") inst.clear_queues(self.manager) # Then do not listen anymore about it self.instances.remove(inst)
def manage_initial_host_status_brok(self, b): """ Initialize the cache for hosts :param b: :type b: :return: None """ host_name = b.data['host_name'] logger.info("[Graphite] got initial host status: %s", host_name) self.hosts_cache[host_name] = {} if '_GRAPHITE_PRE' in b.data['customs']: self.hosts_cache[host_name]['_GRAPHITE_PRE'] = b.data['customs']['_GRAPHITE_PRE'] if '_GRAPHITE_GROUP' in b.data['customs']: self.hosts_cache[host_name]['_GRAPHITE_GROUP'] = b.data['customs']['_GRAPHITE_GROUP'] logger.debug("[Graphite] initial host status received: %s", host_name)
def main(self): """ Main function where send queue to manage_brok function :return: None """ self.set_proctitle(self.name) self.set_exit_handler() while not self.interrupted: logger.debug("[Alignak Backend Brok] queue length: %s", self.to_q.qsize()) start = time.time() l = self.to_q.get() for b in l: b.prepare() self.manage_brok(b) logger.debug("[Alignak Backend Brok] time to manage %s broks (%d secs)", len(l), time.time() - start)
def add(self, elt): """Add an object to the satellite one Handles brok and externalcommand :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks[elt._id] = elt return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command '%s'", str(elt.__dict__)) with self.external_commands_lock: self.external_commands.append(elt)
def get_return_for_passive(self, sched_id): """Get returns of passive actions for a specific scheduler :param sched_id: scheduler id :type sched_id: int :return: Action list :rtype: list """ # I do not know this scheduler? sched = self.schedulers.get(sched_id) if sched is None: logger.debug("I do not know this scheduler: %s", sched_id) return [] ret, sched['wait_homerun'] = sched['wait_homerun'], {} logger.debug("Preparing to return %s results", len(ret)) return ret.values()
def manage_brok(self, brok): """Get a brok. We put brok data to the modules :param brok: object with data :type brok: object :return: None """ # Call all modules if they catch the call for mod in self.modules_manager.get_internal_instances(): try: mod.manage_brok(brok) except Exception, exp: logger.debug(str(exp.__dict__)) logger.warning("The mod %s raise an exception: %s, I'm tagging it to restart later", mod.get_name(), str(exp)) logger.warning("Exception type: %s", type(exp)) logger.warning("Back trace of this kill: %s", traceback.format_exc()) self.modules_manager.set_to_restart(mod)
def get_outputs(self, out, max_plugins_output_length): """Get outputs from single output (split perfdata etc). Edit output, perf_data and long_output attributes. :param out: output data of a check :type out: str :param max_plugins_output_length: max plugin data length :type max_plugins_output_length: int :return: None """ # Squeeze all output after max_plugins_output_length out = out[:max_plugins_output_length] # manage escaped pipes out = out.replace(r'\|', '___PROTECT_PIPE___') # Then cuts by lines elts = out.split('\n') # For perf data elts_line1 = elts[0].split('|') # First line before | is output, and strip it self.output = elts_line1[0].strip().replace('___PROTECT_PIPE___', '|') # Init perfdata as void self.perf_data = '' # After | is perfdata, and strip it if len(elts_line1) > 1: self.perf_data = elts_line1[1].strip().replace('___PROTECT_PIPE___', '|') # Now manage others lines. Before the | it's long_output # And after it's all perf_data, \n join long_output = [] in_perfdata = False for line in elts[1:]: # if already in perfdata, direct append if in_perfdata: self.perf_data += ' ' + line.strip().replace('___PROTECT_PIPE___', '|') else: # not already in? search for the | part :) elts = line.split('|', 1) # The first part will always be long_output long_output.append(elts[0].strip().replace('___PROTECT_PIPE___', '|')) if len(elts) > 1: in_perfdata = True self.perf_data += ' ' + elts[1].strip().replace('___PROTECT_PIPE___', '|') # long_output is all non output and perfline, join with \n self.long_output = '\n'.join(long_output) logger.debug("Command result for '%s': %s", self.command, self.output)
def get_all_states(self): """Return all the data of satellites :return: dict containing satellites data Output looks like this :: {'arbiter' : [{'schedproperty1':'value1' ..}, {'pollerproperty1', 'value11' ..}, ..], 'scheduler': [..], 'poller': [..], 'reactionner': [..], 'receiver': [..], 'broker: [..]' } :rtype: dict """ res = {} for s_type in [ 'arbiter', 'scheduler', 'poller', 'reactionner', 'receiver', 'broker' ]: lst = [] res[s_type] = lst for daemon in getattr(self.app.conf, s_type + 's'): cls = daemon.__class__ env = {} all_props = [cls.properties, cls.running_properties] for props in all_props: for prop in props: if hasattr(daemon, prop): val = getattr(daemon, prop) if prop == "realm": if hasattr(val, "realm_name"): env[prop] = val.realm_name # give a try to a json able object try: json.dumps(val) env[prop] = val except Exception, exp: logger.debug('%s', exp) lst.append(env)
def get_objects_properties(self, table): """'Dump all objects of the type in [hosts, services, contacts, commands, hostgroups, servicegroups] :param table: table name :type table: str :return: list all properties of all objects :rtype: list """ with self.app.conf_lock: logger.debug('ASK:: table= %s', str(table)) objs = getattr(self.app.conf, table, None) logger.debug("OBJS:: %s", str(objs)) if objs is None or len(objs) == 0: return [] res = [] for obj in objs: j_obj = jsonify_r(obj) res.append(j_obj) return res
def put_results(self, results): """Put results to scheduler, used by poller and reactionners :param results: results to handle :type results: :return: True or ?? (if lock acquire fails) :rtype: bool """ nb_received = len(results) self.app.sched.nb_check_received += nb_received if nb_received != 0: logger.debug("Received %d results", nb_received) for result in results: result.set_type_active() with self.app.sched.waiting_results_lock: self.app.sched.waiting_results.extend(results) # for c in results: # self.sched.put_results(c) return True
def __setstate_deprecated__(self, state): """In 1.0 we move to a dict save. :param state: it's the state :type state: dict :return: None """ cls = self.__class__ # Check if the len of this state is like the previous, # if not, we will do errors! # -1 because of the '_id' prop if len(cls.properties) != (len(state) - 1): logger.debug("Passing comment") return self._id = state.pop() for prop in cls.properties: val = state.pop() setattr(self, prop, val) if self._id >= cls._id: cls._id = self._id + 1
def execute(self): """Start this action command. The command will be executed in a subprocess. :return: None or str 'toomanyopenfiles' :rtype: None | str """ self.status = 'launched' self.check_time = time.time() self.wait_time = 0.0001 self.last_poll = self.check_time # Get a local env variables with our additional values self.local_env = self.get_local_environnement() # Initialize stdout and stderr. we will read them in small parts # if the fcntl is available self.stdoutdata = '' self.stderrdata = '' logger.debug("Launch command: %s", self.command) return self.execute__() # OS specific part