def main(self): """Main function for Scheduler, launch after the init:: * Init daemon * Load module manager * Launch main loop * Catch any Exception that occurs :return: None """ try: self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') self.look_for_early_exit() self.do_daemon_init_and_start() self.load_modules_manager() self.uri = self.http_daemon.uri logger.info("[scheduler] General interface is at: %s", self.uri) self.do_mainloop() except Exception, exp: self.print_unrecoverable(traceback.format_exc()) raise
def expect_file_dirs(root, path): """We got a file like /tmp/toto/toto2/bob.png And we want to be sure the dir /tmp/toto/toto2/ will really exists so we can copy it. Try to make if needed :param root: root directory :type root: str :param path: path to verify :type path: str :return: True on success, False otherwise :rtype: bool """ dirs = os.path.normpath(path).split('/') dirs = [d for d in dirs if d != ''] # We will create all directory until the last one # so we are doing a mkdir -p ..... # TODO: and windows???? tmp_dir = root for directory in dirs: path = os.path.join(tmp_dir, directory) logger.info('Verify the existence of file %s', path) if not os.path.exists(path): try: os.mkdir(path) except Exception: return False tmp_dir = path return True
def get_instance(mod_conf): """ Return a module instance for the plugin manager """ logger.info("[Alignak Backend Sched] Get a Alignak config module for plugin %s" % mod_conf.get_name()) instance = AlignakBackendSched(mod_conf) return instance
def hook_load_retention(self, daemon): if not isinstance(daemon, Arbiter): return logger.info("Dumping config to mongo ..") t0 = time.time() self.do_insert(daemon) logger.info("Mongo insert took %s", (time.time() - t0))
def do_pynag_con_init(self, s_id): """Initialize a connection with scheduler having '_id' Return the new connection to the scheduler if it succeeded, else: any error OR sched is inactive: return None. NB: if sched is inactive then None is directly returned. :param s_id: scheduler s_id to connect to :type s_id: int :return: scheduler connection object or None :rtype: alignak.http.client.HTTPClient """ sched = self.schedulers[s_id] if not sched['active']: return sname = sched['name'] uri = sched['uri'] running_id = sched['running_id'] timeout = sched['timeout'] data_timeout = sched['data_timeout'] logger.info("[%s] Init connection with %s at %s (%ss,%ss)", self.name, sname, uri, timeout, data_timeout) try: sch_con = sched['con'] = HTTPClient( uri=uri, strong_ssl=sched['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout) except HTTPEXCEPTIONS, exp: logger.warning("[%s] Scheduler %s is not initialized or has network problem: %s", self.name, sname, str(exp)) sched['con'] = None return
def push_host_names(self, sched_id, hnames): """ Send host names to receiver :param sched_id: id of the scheduler :type sched_id: int :param hnames: list of host names :type hnames: list :return: None """ try: if self.con is None: self.create_connection() logger.info(" (%s)", self.uri) # If the connection failed to initialize, bail out if self.con is None: self.add_failed_check_attempt() return # r = self.con.push_host_names(sched_id, hnames) self.con.get('ping') self.con.post('push_host_names', {'sched_id': sched_id, 'hnames': hnames}, wait='long') except HTTPEXCEPTIONS, exp: self.add_failed_check_attempt(reason=str(exp))
def main(self): """ Main function of BaseModule :return: None """ logger.info("BaseModule.main() not defined in your %s", self.__class__)
def start(self, http_daemon=None): """Actually restart the process if the module is external Try first to stop the process and create a new Process instance with target start_module. Finally start process. :param http_daemon: Not used here but can be used in other modules :type http_daemon: None | object :return: None """ if not self.is_external: return self.stop_process() logger.info("Starting external process for instance %s", self.name) proc = Process(target=self.start_module, args=()) # Under windows we should not call start() on an object that got # its process as object, so we remove it and we set it in a earlier # start try: del self.properties['process'] except KeyError: pass proc.start() # We save the process data AFTER the fork() self.process = proc self.properties['process'] = proc # TODO: temporary logger.info("%s is now started ; pid=%d", self.name, proc.pid)
def try_load(cls, mod_name, mod_dir=None): """Try in three different ways to load a module :param mod_name: module name to load :type mod_name: str :param mod_dir: module directory where module is :type mod_dir: str | None :return: module :rtype: object """ msg = '' mod = cls.try_best_load(mod_name) if mod: msg = "Correctly loaded %s as a very-new-style alignak module :)" else: mod = cls.try_best_load('.module', mod_name) if mod: msg = "Correctly loaded %s as an old-new-style alignak module :|" elif mod_dir: mod = cls.try_very_bad_load(mod_dir) if mod: msg = "Correctly loaded %s as a very old-style alignak module :s" if msg: logger.info(msg, mod_name) return mod
def expect_file_dirs(root, path): """We got a file like /tmp/toto/toto2/bob.png And we want to be sure the dir /tmp/toto/toto2/ will really exists so we can copy it. Try to make if needed :param root: root directory :type root: str :param path: path to verify :type path: str :return: True on success, False otherwise :rtype: bool """ dirs = os.path.normpath(path).split("/") dirs = [d for d in dirs if d != ""] # We will create all directory until the last one # so we are doing a mkdir -p ..... # TODO: and windows???? tmp_dir = root for directory in dirs: path = os.path.join(tmp_dir, directory) logger.info("Verify the existence of file %s", path) if not os.path.exists(path): try: os.mkdir(path) except Exception: return False tmp_dir = path return True
def try_instance_init(self, inst, late_start=False): """Try to "init" the given module instance. :param inst: instance to init :type inst: object :param late_start: If late_start, don't look for last_init_try :type late_start: bool :return: True on successful init. False if instance init method raised any Exception. :rtype: bool """ try: logger.info("Trying to init module: %s", inst.get_name()) inst.init_try += 1 # Maybe it's a retry if not late_start and inst.init_try > 1: # Do not try until 5 sec, or it's too loopy if inst.last_init_try > time.time() - 5: return False inst.last_init_try = time.time() # If it's an external, create/update Queues() if inst.is_external: inst.create_queues(self.manager) inst.init() except Exception, err: logger.error( "The instance %s raised an exception %s, I remove it!", inst.get_name(), str(err)) output = cStringIO.StringIO() traceback.print_exc(file=output) logger.error("Back trace of this remove: %s", output.getvalue()) output.close() return False
def push_host_names(self, sched_id, hnames): """ Send host names to receiver :param sched_id: id of the scheduler :type sched_id: int :param hnames: list of host names :type hnames: list :return: None """ try: if self.con is None: self.create_connection() logger.info(" (%s)", self.uri) # If the connection failed to initialize, bail out if self.con is None: self.add_failed_check_attempt() return # r = self.con.push_host_names(sched_id, hnames) self.con.get('ping') self.con.post('push_host_names', { 'sched_id': sched_id, 'hnames': hnames }, wait='long') except HTTPEXCEPTIONS, exp: self.add_failed_check_attempt(reason=str(exp))
def try_instance_init(self, inst, late_start=False): """Try to "init" the given module instance. :param inst: instance to init :type inst: object :param late_start: If late_start, don't look for last_init_try :type late_start: bool :return: True on successful init. False if instance init method raised any Exception. :rtype: bool """ try: logger.info("Trying to init module: %s", inst.get_name()) inst.init_try += 1 # Maybe it's a retry if not late_start and inst.init_try > 1: # Do not try until 5 sec, or it's too loopy if inst.last_init_try > time.time() - 5: return False inst.last_init_try = time.time() # If it's an external, create/update Queues() if inst.is_external: inst.create_queues(self.manager) inst.init() except Exception, err: logger.error("The instance %s raised an exception %s, I remove it!", inst.get_name(), str(err)) output = cStringIO.StringIO() traceback.print_exc(file=output) logger.error("Back trace of this remove: %s", output.getvalue()) output.close() return False
def _prework(self, real_work, *args): """Simply drop the BrokHandler before doing the real_work""" for handler in list(logger.handlers): if isinstance(handler, BrokHandler): logger.info("Cleaning BrokHandler %r from logger.handlers..", handler) logger.removeHandler(handler) real_work(*args)
def main(self): """Main satellite function. Do init and then mainloop :return: None """ try: for line in self.get_header(): logger.info(line) self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() self.do_daemon_init_and_start() self.do_post_daemon_init() self.load_modules_manager() # We wait for initial conf self.wait_for_initial_conf() if not self.new_conf: # we must have either big problem or was requested to shutdown return self.setup_new_conf() # We can load our modules now self.modules_manager.set_modules(self.modules_manager.modules) self.do_load_modules() # And even start external ones self.modules_manager.start_external_instances() # Allocate Mortal Threads for _ in xrange(1, self.min_workers): to_del = [] for mod in self.q_by_mod: try: self.create_and_launch_worker(module_name=mod) # Maybe this modules is not a true worker one. # if so, just delete if from q_by_mod except NotWorkerMod: to_del.append(mod) for mod in to_del: logger.debug("The module %s is not a worker one, " "I remove it from the worker list", mod) del self.q_by_mod[mod] # Now main loop self.do_mainloop() except Exception: self.print_unrecoverable(traceback.format_exc()) raise
def do_updates(self, db, objs_updated): n_updated = 0 tot_attr_updated = 0 if __debug__: attributes_updated = set() t0 = time.time() for cls, objects in objs_updated.iteritems(): infos = types_infos[cls] collection = db[infos.plural] if pymongo.version >= "2.7": bulkop = collection.initialize_unordered_bulk_op() for obj, attr_set in objects.iteritems(): dest = {} dobj = {'$set': dest} key = get_object_unique_key(obj, infos) for attr in attr_set: try: value = getattr(obj, attr) except AttributeError: continue dest[attr] = sanitize_value(cls, obj, attr, value) if __debug__: attributes_updated.add(attr) tot_attr_updated += len(dest) try: if pymongo.version >= "2.7": bulkop.find(key).upsert().update_one(dobj) else: collection.update(key, dobj, upsert=True) except Exception as err: raise RuntimeError("Error on insert/update of %s : %s" % (obj.get_name(), err)) n_updated += 1 # end for obj, lst in objects.items() if objects and pymongo.version >= "2.7": # mongo requires at least one document for a bulkop.execute() try: bulkop.execute() except Exception as err: raise RuntimeError("Error on bulk execute for collection " "%s : %s" % (infos.plural, err)) if n_updated: fmt = "updated %s objects with %s attributes in mongo in %s secs" args = [n_updated, tot_attr_updated, time.time() - t0] if __debug__: fmt += " attributes=%s" args.append(attributes_updated) logger.info(fmt, *args)
def main(self): """Main satellite function. Do init and then mainloop :return: None """ try: for line in self.get_header(): logger.info(line) self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() self.do_daemon_init_and_start() self.do_post_daemon_init() self.load_modules_manager() # We wait for initial conf self.wait_for_initial_conf() if not self.new_conf: # we must have either big problem or was requested to shutdown return self.setup_new_conf() # We can load our modules now self.do_load_modules(self.new_modules_conf) # And even start external ones self.modules_manager.start_external_instances() # Allocate Mortal Threads for _ in xrange(1, self.min_workers): to_del = [] for mod in self.q_by_mod: try: self.create_and_launch_worker(module_name=mod) # Maybe this modules is not a true worker one. # if so, just delete if from q_by_mod except NotWorkerMod: to_del.append(mod) for mod in to_del: logger.debug( "The module %s is not a worker one, " "I remove it from the worker list", mod) del self.q_by_mod[mod] # Now main loop self.do_mainloop() except Exception: self.print_unrecoverable(traceback.format_exc()) raise
def manage_host_check_result_brok(self, b): """ Manage a host check result brok (we UPDATE data info with this) :param b: :type b: :return: None """ host_name = b.data['host_name'] logger.debug("[Graphite] host check result: %s", host_name) # If host initial status brok has not been received, ignore ... if host_name not in self.hosts_cache: logger.warning("[Graphite] received service check result for an unknown host: %s", host_name) return # Decode received metrics couples = self.get_metric_and_value('host_check', b.data['perf_data']) # If no values, we can exit now if len(couples) == 0: logger.debug("[Graphite] no metrics to send ...") return # Custom hosts variables hname = self.illegal_char_hostname.sub('_', host_name) if '_GRAPHITE_GROUP' in self.hosts_cache[host_name]: hname = ".".join((self.hosts_cache[host_name]['_GRAPHITE_GROUP'], hname)) if '_GRAPHITE_PRE' in self.hosts_cache[host_name]: hname = ".".join((self.hosts_cache[host_name]['_GRAPHITE_PRE'], hname)) if self.hostcheck: hname = '.'.join((hname, self.hostcheck)) # Checks latency if self.ignore_latency_limit >= b.data['latency'] > 0: check_time = int(b.data['last_chk']) - int(b.data['latency']) logger.info("[Graphite] Ignoring latency for service %s. Latency : %s", b.data['service_description'], b.data['latency']) else: check_time = int(b.data['last_chk']) # Graphite data source if self.graphite_data_source: path = '.'.join((hname, self.graphite_data_source)) else: path = hname lines = [] # Send a bulk of all metrics at once for (metric, value) in couples: lines.append("%s.%s %s %d" % (path, metric, value, check_time)) lines.append("\n") packet = '\n'.join(lines) self.send_packet(packet)
def __init__(self, host, port, http_interface, use_ssl, ca_cert, ssl_key, ssl_cert, daemon_thread_pool_size): self.port = port self.host = host self.srv = None # Port = 0 means "I don't want HTTP server" if self.port == 0: return self.use_ssl = use_ssl self.srv = None protocol = 'http' if use_ssl: protocol = 'https' self.uri = '%s://%s:%s' % (protocol, self.host, self.port) logger.info("Opening HTTP socket at %s", self.uri) # This config override default processors so we put them back in case we need them config = { '/': { 'request.body.processors': {'application/x-www-form-urlencoded': process_urlencoded, 'multipart/form-data': process_multipart_form_data, 'multipart': process_multipart, 'application/zlib': zlib_processor}, 'tools.gzip.on': True, 'tools.gzip.mime_types': ['text/*', 'application/json'] } } # disable console logging of cherrypy when not in DEBUG if getattr(logger, 'level') != logging.DEBUG: cherrypy.log.screen = False self.srv = CherryPyWSGIServer((host, port), cherrypy.Application(http_interface, "/", config), numthreads=daemon_thread_pool_size, shutdown_timeout=1) if SSL and pyOpenSSLAdapter and use_ssl: adapter = pyOpenSSLAdapter(ssl_cert, ssl_key, ca_cert) context = adapter.get_context() # SSLV2 is deprecated since 2011 by RFC 6176 # SSLV3, TLSV1 and TLSV1.1 have POODLE weakness (harder to exploit on TLS) # So for now (until a new TLS version) we only have TLSv1.2 left # WE also remove compression because of BREACH weakness context.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3 | SSL.OP_NO_TLSv1 | SSL.OP_NO_TLSv1_1 | SSL.OP_NO_COMPRESSION) # All excluded algorithm beyond are known to be weak. context.set_cipher_list('DEFAULT:!DSS:!PSK:!SRP:!3DES:!RC4:!DES:!IDEA:!RC2:!NULL') adapter.context = context self.srv.ssl_adapter = adapter if use_ssl: self.srv.ssl_certificate = ssl_cert self.srv.ssl_private_key = ssl_key
def check_bad_dispatch(self): """Check if we have a bad dispatch For example : a spare started but the master was still alive We need ask the spare to wait a new conf :return: None """ for elt in self.elements: if hasattr(elt, 'conf'): # If element has a conf, I do not care, it's a good dispatch # If dead: I do not ask it something, it won't respond.. if elt.conf is None and elt.reachable: # print "Ask", elt.get_name() , 'if it got conf' if elt.have_conf(): logger.warning("The element %s have a conf and should " "not have one! I ask it to idle now", elt.get_name()) elt.active = False elt.wait_new_conf() # I do not care about order not send or not. If not, # The next loop will resent it # else: # print "No conf" # I ask satellites which sched_id they manage. If I do not agree, I ask # them to remove it for satellite in self.satellites: kind = satellite.get_my_type() if satellite.reachable: cfg_ids = satellite.managed_confs # what_i_managed() # I do not care about satellites that do nothing, they already # do what I want :) if len(cfg_ids) != 0: id_to_delete = [] for cfg_id in cfg_ids: # DBG print kind, ":", satellite.get_name(), "manage cfg id:", cfg_id # Ok, we search for realms that have the conf for realm in self.realms: if cfg_id in realm.confs: # Ok we've got the realm, we check its to_satellites_managed_by # to see if reactionner is in. If not, we remove he sched_id for it if satellite not in realm.to_satellites_managed_by[kind][cfg_id]: id_to_delete.append(cfg_id) # Maybe we removed all cfg_id of this reactionner # We can put it idle, no active and wait_new_conf if len(id_to_delete) == len(cfg_ids): satellite.active = False logger.info("I ask %s to wait a new conf", satellite.get_name()) satellite.wait_new_conf() else: # It is not fully idle, just less cfg for r_id in id_to_delete: logger.info("I ask to remove configuration N%d from %s", r_id, satellite.get_name()) satellite.remove_from_conf(id)
def get_instance(mod_conf): """Return a module instance for the plugin manager :param mod_conf: Configuration object :type mod_conf: object :return: GraphiteBroker instance :rtype: object """ logger.info("[Graphite] Get a graphite data module for plugin %s", mod_conf.get_name()) instance = GraphiteBroker(mod_conf) return instance
def get_instance(mod_conf): """Return a module instance for the plugin manager :param mod_conf: Configuration object :type mod_conf: object :return: AlignakBackendArbit instance :rtype: object """ logger.info("[Alignak Backend Brok] Get a Alignak log & livestate module for plugin %s" % mod_conf.get_name()) instance = AlignakBackendBrok(mod_conf) return instance
def prepare_for_satellites_conf(self): """Init the following attributes:: * to_satellites (with *satellite type* keys) * to_satellites_need_dispatch (with *satellite type* keys) * to_satellites_managed_by (with *satellite type* keys) * nb_*satellite type*s * self.potential_*satellite type*s (satellite type are reactionner, poller, broker and receiver) :return: None """ self.to_satellites = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } self.to_satellites_need_dispatch = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } self.to_satellites_managed_by = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } self.count_reactionners() self.fill_potential_satellites_by_type('reactionners') self.count_pollers() self.fill_potential_satellites_by_type('pollers') self.count_brokers() self.fill_potential_satellites_by_type('brokers') self.count_receivers() self.fill_potential_satellites_by_type('receivers') line = "%s: (in/potential) (schedulers:%d) (pollers:%d/%d)" \ " (reactionners:%d/%d) (brokers:%d/%d) (receivers:%d/%d)" % \ (self.get_name(), len(self.schedulers), self.nb_pollers, len(self.potential_pollers), self.nb_reactionners, len(self.potential_reactionners), self.nb_brokers, len(self.potential_brokers), self.nb_receivers, len(self.potential_receivers) ) logger.info(line)
def _main(self): """module "main" method. Only used by external modules. :return: None """ self.set_proctitle(self.name) self.set_signal_handler() logger.info("[%s[%d]]: Now running..", self.name, os.getpid()) # Will block here! self.main() self.do_stop() logger.info("[%s]: exiting now..", self.name)
def do_pynag_con_init(self, s_id, i_type='scheduler'): """Initialize or re-initialize connection with scheduler or arbiter if type == arbiter :param s_id: s_id :type s_id: int :param i_type: type of item :type i_type: str :return: None """ # Get the good links tab for looping.. links = self.get_links_from_type(i_type) if links is None: logger.debug('Type unknown for connection! %s', i_type) return # default timeout for daemons like pollers/reactionners/... timeout = 3 data_timeout = 120 if i_type == 'scheduler': # If sched is not active, I do not try to init # it is just useless is_active = links[s_id]['active'] if not is_active: return # schedulers also got real timeout to respect timeout = links[s_id]['timeout'] data_timeout = links[s_id]['data_timeout'] # If we try to connect too much, we slow down our tests if self.is_connection_try_too_close(links[s_id]): return # Ok, we can now update it links[s_id]['last_connection'] = time.time() # DBG: print "Init connection with", links[s_id]['uri'] running_id = links[s_id]['running_id'] # DBG: print "Running id before connection", running_id uri = links[s_id]['uri'] try: con = links[s_id]['con'] = HTTPClient(uri=uri, strong_ssl=links[s_id]['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout) except HTTPEXCEPTIONS, exp: # But the multiprocessing module is not compatible with it! # so we must disable it immediately after logger.info("Connection problem to the %s %s: %s", i_type, links[s_id]['name'], str(exp)) links[s_id]['con'] = None return
def do_stop(self): """Stop all workers modules and sockets :return: None """ logger.info("[%s] Stopping all workers", self.name) for worker in self.workers.values(): try: worker.terminate() worker.join(timeout=1) # A already dead worker or in a worker except (AttributeError, AssertionError): pass super(Satellite, self).do_stop()
def do_loop_turn(self): """Scheduler loop turn Basically wait initial conf and run :return: None """ # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return logger.info("New configuration received") self.setup_new_conf() logger.info("New configuration loaded") self.sched.run()
def add(self, elt): """Add elt to this broker Original comment : Schedulers have some queues. We can simplify the call by adding elements into the proper queue just by looking at their type Brok -> self.broks TODO: better tag ID? External commands -> self.external_commands :param elt: object to add :type elt: object :return: None """ cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks_internal_raised.append(elt) return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command '%s'", str(ExternalCommand.__dict__)) self.external_commands.append(elt) # Maybe we got a Message from the modules, it's way to ask something # like from now a full data from a scheduler for example. elif cls_type == 'message': # We got a message, great! logger.debug(str(elt.__dict__)) if elt.get_type() == 'NeedData': data = elt.get_data() # Full instance id means: I got no data for this scheduler # so give me all dumbass! if 'full_instance_id' in data: c_id = data['full_instance_id'] source = elt.source logger.info('The module %s is asking me to get all initial data ' 'from the scheduler %d', source, c_id) # so we just reset the connection and the running_id, # it will just get all new things try: self.schedulers[c_id]['con'] = None self.schedulers[c_id]['running_id'] = 0 except KeyError: # maybe this instance was not known, forget it logger.warning("the module %s ask me a full_instance_id " "for an unknown ID (%d)!", source, c_id) # Maybe a module tells me that it's dead, I must log it's last words... if elt.get_type() == 'ICrash': data = elt.get_data() logger.error('the module %s just crash! Please look at the traceback:', data['name']) logger.error(data['trace'])
def is_me(self, lookup_name): """ Check if parameter name if same than name of this object :param lookup_name: name of arbiter to check :type lookup_name: str :return: true if parameter name if same than this name :rtype: bool """ logger.info("And arbiter is launched with the hostname:%s " "from an arbiter point of view of addr:%s", self.host_name, socket.getfqdn()) if lookup_name: return lookup_name == self.get_name() else: return self.host_name == socket.getfqdn() or self.host_name == socket.gethostname()
class Action(ActionBase): """Action class for Windows systems """ properties = ActionBase.properties.copy() def execute__(self): """Execute action in a subprocess :return: None """ # 2.7 and higher Python version need a list of args for cmd # 2.4->2.6 accept just the string command if sys.version_info < (2, 7): cmd = self.command else: try: cmd = shlex.split(self.command.encode('utf8', 'ignore')) except Exception, exp: self.output = 'Not a valid shell command: ' + exp.__str__() self.exit_status = 3 self.status = 'done' self.execution_time = time.time() - self.check_time return try: self.process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=self.local_env, shell=True) except WindowsError, exp: logger.info("We kill the process: %s %s", exp, self.command) self.status = 'timeout' self.execution_time = time.time() - self.check_time
def get_objects(self): logger.info("[Dummy Arbiter] Ask me for objects to return") r = {'hosts': []} h = {'name': 'dummy host from dummy arbiter module', 'register': '0', } r['hosts'].append(h) r['hosts'].append({ 'host_name': "dummyhost1", 'use': 'linux-server', 'address': 'localhost' }) logger.info("[Dummy Arbiter] Returning to Arbiter the hosts: %s", str(r)) return r
def main(self): """Main receiver function Init daemon and loop forever :return: None """ try: self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() for line in self.get_header(): logger.info(line) logger.info("[Receiver] Using working directory: %s", os.path.abspath(self.workdir)) self.do_daemon_init_and_start() self.load_modules_manager() # We wait for initial conf self.wait_for_initial_conf() if not self.new_conf: return self.setup_new_conf() self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() # Do the modules part, we have our modules in self.modules # REF: doc/receiver-modules.png (1) # Now the main loop self.do_mainloop() except Exception, exp: self.print_unrecoverable(traceback.format_exc()) raise
def main(self): """Main receiver function Init daemon and loop forever :return: None """ try: self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') # Look if we are enabled or not. If ok, start the daemon mode self.look_for_early_exit() for line in self.get_header(): logger.info(line) logger.info("[Receiver] Using working directory: %s", os.path.abspath(self.workdir)) self.do_daemon_init_and_start() self.load_modules_manager() # We wait for initial conf self.wait_for_initial_conf() if not self.new_conf: return self.setup_new_conf() self.modules_manager.set_modules(self.modules) self.do_load_modules() # and start external modules too self.modules_manager.start_external_instances() # Do the modules part, we have our modules in self.modules # REF: doc/receiver-modules.png (1) # Now the main loop self.do_mainloop() except Exception, exp: self.print_unrecoverable(traceback.format_exc()) raise
def check_activation(self): """Enter or exit downtime if necessary :return: None """ now = time.time() was_is_in_effect = self.is_in_effect self.is_in_effect = (self.start_time <= now <= self.end_time) logger.info("CHECK ACTIVATION:%s", self.is_in_effect) # Raise a log entry when we get in the downtime if not was_is_in_effect and self.is_in_effect: self.enter() # Same for exit purpose if was_is_in_effect and not self.is_in_effect: self.exit()
def fill_initial_broks(self, bname): """Get initial_broks type broks from scheduler, used by brokers Do not send broks, only make scheduler internal processing :param bname: broker name, used to filter broks :type bname: str :return: None TODO: Maybe we should check_last time we did it to prevent DDoS """ with self.app.conf_lock: if bname not in self.app.brokers: logger.info("A new broker just connected : %s", bname) self.app.sched.brokers[bname] = {'broks': {}, 'has_full_broks': False} env = self.app.sched.brokers[bname] if not env['has_full_broks']: env['broks'].clear() self.app.sched.fill_initial_broks(bname, with_logs=True)
def init(self): """ Initialize Graphite connection :return: None or socket """ logger.info("[Graphite] initializing connection to %s:%d ...", str(self.host), self.port) try: self.con = socket() self.con.connect((self.host, self.port)) except IOError as e: logger.error("[Graphite] Graphite Carbon instance connexion failed" " IOError: %s", str(e)) # do not raise an exception - logging is enough ... self.con = None return self.con
def is_me(self, lookup_name): """ Check if parameter name if same than name of this object :param lookup_name: name of arbiter to check :type lookup_name: str :return: true if parameter name if same than this name :rtype: bool """ logger.info( "And arbiter is launched with the hostname:%s " "from an arbiter point of view of addr:%s", self.host_name, socket.getfqdn()) if lookup_name: return lookup_name == self.get_name() else: return self.host_name == socket.getfqdn( ) or self.host_name == socket.gethostname()
def manage_initial_host_status_brok(self, b): """ Initialize the cache for hosts :param b: :type b: :return: None """ host_name = b.data['host_name'] logger.info("[Graphite] got initial host status: %s", host_name) self.hosts_cache[host_name] = {} if '_GRAPHITE_PRE' in b.data['customs']: self.hosts_cache[host_name]['_GRAPHITE_PRE'] = b.data['customs']['_GRAPHITE_PRE'] if '_GRAPHITE_GROUP' in b.data['customs']: self.hosts_cache[host_name]['_GRAPHITE_GROUP'] = b.data['customs']['_GRAPHITE_GROUP'] logger.debug("[Graphite] initial host status received: %s", host_name)
def start_external_instances(self, late_start=False): """Launch external instances that are load correctly :param late_start: If late_start, don't look for last_init_try :type late_start: bool :return: None """ for inst in [inst for inst in self.instances if inst.is_external]: # But maybe the init failed a bit, so bypass this ones from now if not self.try_instance_init(inst, late_start=late_start): logger.warning( "The module '%s' failed to init, I will try to restart it later", inst.get_name()) self.to_restart.append(inst) continue # ok, init succeed logger.info("Starting external module %s", inst.get_name()) inst.start()
def fill_initial_broks(self, bname): """Get initial_broks type broks from scheduler, used by brokers Do not send broks, only make scheduler internal processing :param bname: broker name, used to filter broks :type bname: str :return: None TODO: Maybe we should check_last time we did it to prevent DDoS """ with self.app.conf_lock: if bname not in self.app.brokers: logger.info("A new broker just connected : %s", bname) self.app.sched.brokers[bname] = { 'broks': {}, 'has_full_broks': False } env = self.app.sched.brokers[bname] if not env['has_full_broks']: env['broks'].clear() self.app.sched.fill_initial_broks(bname, with_logs=True)
def check_alive_instances(self): """Check alive isntances. If not, log error and try to restart it :return: None """ # Only for external for inst in self.instances: if inst not in self.to_restart: if inst.is_external and not inst.process.is_alive(): logger.error( "The external module %s goes down unexpectedly!", inst.get_name()) logger.info("Setting the module %s to restart", inst.get_name()) # We clean its queues, they are no more useful inst.clear_queues(self.manager) self.to_restart.append(inst) # Ok, no need to look at queue size now continue # Now look for man queue size. If above value, the module should got a huge problem # and so bailout. It's not a perfect solution, more a watchdog # If max_queue_size is 0, don't check this if self.max_queue_size == 0: continue # Ok, go launch the dog! queue_size = 0 try: queue_size = inst.to_q.qsize() except Exception, exp: pass if queue_size > self.max_queue_size: logger.error( "The external module %s got a too high brok queue size (%s > %s)!", inst.get_name(), queue_size, self.max_queue_size) logger.info("Setting the module %s to restart", inst.get_name()) # We clean its queues, they are no more useful inst.clear_queues(self.manager) self.to_restart.append(inst)
def __setstate_deprecated__(self, state): """In 1.0 we move to a dict save. :param state: it's the state :type state: dict :return: None TODO: REMOVE THIS""" cls = self.__class__ # Check if the len of this state is like the previous, # if not, we will do errors! # -1 because of the '_id' prop if len(cls.properties) != (len(state) - 1): logger.info("Passing downtime") return self._id = state.pop() for prop in cls.properties: val = state.pop() setattr(self, prop, val) if self._id >= cls._id: cls._id = self._id + 1
def test_utf8log(self): sutf = 'h\351h\351' # Latin Small Letter E with acute in Latin-1 logger.info(sutf) sutf8 = u'I love myself $£¤' # dollar, pound, currency logger.info(sutf8) s = unichr(40960) + u'abcd' + unichr(1972) logger.info(s)
def stop_process(self): """Request the module process to stop and release it :return: None """ if self.process: logger.info("I'm stopping module %r (pid=%s)", self.get_name(), self.process.pid) self.process.terminate() self.process.join(timeout=1) if self.process.is_alive(): logger.warning( "%r is still alive normal kill, I help it to die", self.get_name()) self.kill() self.process.join(1) if self.process.is_alive(): logger.error( "%r still alive after brutal kill, I leave it.", self.get_name()) self.process = None
def do_pynag_con_init(self, s_id): """Initialize a connection with scheduler having '_id' Return the new connection to the scheduler if it succeeded, else: any error OR sched is inactive: return None. NB: if sched is inactive then None is directly returned. :param s_id: scheduler s_id to connect to :type s_id: int :return: scheduler connection object or None :rtype: alignak.http.client.HTTPClient """ sched = self.schedulers[s_id] if not sched['active']: return sname = sched['name'] uri = sched['uri'] running_id = sched['running_id'] timeout = sched['timeout'] data_timeout = sched['data_timeout'] logger.info("[%s] Init connection with %s at %s (%ss,%ss)", self.name, sname, uri, timeout, data_timeout) try: sch_con = sched['con'] = HTTPClient( uri=uri, strong_ssl=sched['hard_ssl_name_check'], timeout=timeout, data_timeout=data_timeout) except HTTPEXCEPTIONS, exp: logger.warning( "[%s] Scheduler %s is not initialized or has network problem: %s", self.name, sname, str(exp)) sched['con'] = None return