Example #1
0
 def add(self, elt):
     cls_type = elt.__class__.my_type
     if cls_type == 'brok':
         # For brok, we TAG brok with our instance_id
         elt.data['instance_id'] = 0
         self.broks_internal_raised.append(elt)
         return
     elif cls_type == 'externalcommand':
         print "Adding in queue an external command", ExternalCommand.__dict__
         self.external_commands.append(elt)
     # Maybe we got a Message from the modules, it's way to ask something
     #like from now a full data from a scheduler for example.
     elif cls_type == 'message':
         # We got a message, great!
         print elt.__dict__
         if elt.get_type() == 'NeedData':
             data = elt.get_data()
             # Full instance id mean : I got no data for this scheduler
             # so give me all dumbass!
             if 'full_instance_id' in data:
                 c_id = data['full_instance_id']
                 logger.log('A module is asking me to get all initial data from the scheduler %d' % c_id)
                 # so we just reset the connexion adn the running_id, it will just get all new things
                 self.schedulers[c_id]['con'] = None
                 self.schedulers[c_id]['running_id'] = 0
Example #2
0
    def load(self):
        now = int(time.time())
        """ Try to import the requested modules ; put the imported modules in self.imported_modules.
The previous imported modules, if any, are cleaned before. """ 
        # We get all modules file with .py
        modules_files = [ fname[:-3] for fname in os.listdir(self.modules_path) 
                         if fname.endswith(".py") ]

        # And directories
        modules_files.extend([ fname for fname in os.listdir(self.modules_path)
                               if os.path.isdir(os.path.join(self.modules_path, fname)) ])

        # Now we try to load thems
        # So first we add their dir into the sys.path
        if not self.modules_path in sys.path:
            sys.path.append(self.modules_path)

        # We try to import them, but we keep only the one of
        # our type
        del self.imported_modules[:]
        for fname in modules_files:
            #print "Try to load", fname
            try:
                m = __import__(fname)
                if not hasattr(m, 'properties'):
                    continue

                # We want to keep only the modules of our type
                if self.modules_type in m.properties['daemons']:
                    self.imported_modules.append(m)
            except Exception , exp:
                logger.log("Warning in importing module : %s" % exp)
Example #3
0
    def no_loop_in_parents(self):
        # Ok, we say "from now, no loop :) "
        r = True

        # Create parent graph
        parents = Graph()

        # With all hosts as nodes
        for h in self:
            if h is not None:
                parents.add_node(h)

        # And now fill edges
        for h in self:
            for p in h.parents:
                if p is not None:
                    parents.add_edge(p, h)

        # Now get the list of all hosts in a loop
        host_in_loops = parents.loop_check()

        # and raise errors about it
        for h in host_in_loops:
            logger.log("Error: The host '%s' is part of a circular parent/child chain!" % h.get_name())
            r = False

        return r
Example #4
0
    def is_correct(self):
        state = True  #guilty or not? :)
        cls = self.__class__

        #All of the above are checks in the notificationways part
        for prop, entry in cls.properties.items():
            if prop not in _special_properties:
                if not hasattr(self, prop) and entry.required:
                    print self.get_name(), " : I do not have", prop
                    state = False  #Bad boy...

        #There is a case where there is no nw : when there is not special_prop defined
        #at all!!
        if self.notificationways == []:
            for p in _special_properties:
                print self.get_name(), " : I'm missing the property %s" % p
                state = False

        if hasattr(self, 'contact_name'):
            for c in cls.illegal_object_name_chars:
                if c in self.contact_name:
                    logger.log(
                        "%s : My contact_name got the caracter %s that is not allowed."
                        % (self.get_name(), c))
                    state = False
        else:
            if hasattr(self,
                       'alias'):  #take the alias if we miss the contact_name
                self.contact_name = self.alias
        return state
Example #5
0
    def is_correct(self):
        state = True #guilty or not? :)
        cls = self.__class__

        #All of the above are checks in the notificationways part
        for prop, entry in cls.properties.items():
            if prop not in _special_properties:
                if not hasattr(self, prop) and entry.required:
                    print self.get_name(), " : I do not have", prop
                    state = False #Bad boy...

        #There is a case where there is no nw : when there is not special_prop defined
        #at all!!
        if self.notificationways == []:
            for p in _special_properties:
                print self.get_name()," : I'm missing the property %s" % p
                state = False

        if hasattr(self, 'contact_name'):
            for c in cls.illegal_object_name_chars:
                if c in self.contact_name:
                    logger.log("%s : My contact_name got the caracter %s that is not allowed." % (self.get_name(), c))
                    state = False
        else:
            if hasattr(self, 'alias'): #take the alias if we miss the contact_name
                self.contact_name = self.alias

        return state
Example #6
0
def search(look_at):
    # Now really publish it
    proxy = CONFIG['shinken.io']['proxy']
    api_key = CONFIG['shinken.io']['api_key']

    # Ok we will push the file with a 10s timeout
    c = pycurl.Curl()
    c.setopt(c.POST, 0)
    c.setopt(c.CONNECTTIMEOUT, 10)
    c.setopt(c.TIMEOUT, 10)
    if proxy:
        c.setopt(c.PROXY, proxy)

    args = {'keywords': ','.join(look_at)}
    c.setopt(c.URL, str('shinken.io/searchcli?' + urllib.urlencode(args)))
    response = StringIO()
    c.setopt(pycurl.WRITEFUNCTION, response.write)
    #c.setopt(c.VERBOSE, 1)
    c.perform()
    r = c.getinfo(pycurl.HTTP_CODE)
    c.close()
    if r != 200:
        logger.error("There was a critical error : %s" % response.getvalue())
        sys.exit(2)
    else:
        ret = json.loads(response.getvalue().replace('\\/', '/'))
        status = ret.get('status')
        result = ret.get('result')
        if status != 200:
            logger.log(result)
            return []
        return result
Example #7
0
def publish_archive(archive):
    # Now really publish it
    proxy = CONFIG['shinken.io']['proxy']
    api_key = CONFIG['shinken.io']['api_key']
    
    # Ok we will push the file with a 10s timeout
    c = pycurl.Curl()
    c.setopt(c.POST, 1)
    c.setopt(c.CONNECTTIMEOUT, 10)
    c.setopt(c.TIMEOUT, 10)
    if proxy:
        c.setopt(c.PROXY, proxy)
    c.setopt(c.URL, "http://shinken.io/push")
    c.setopt(c.HTTPPOST, [("api_key", api_key),
                          ("data",
                           (c.FORM_FILE, str(archive),
                            c.FORM_CONTENTTYPE, "application/x-gzip"))
                          ])
    response = StringIO()
    c.setopt(pycurl.WRITEFUNCTION, response.write)
    c.setopt(c.VERBOSE, 1)
    c.perform()
    r = c.getinfo(pycurl.HTTP_CODE)
    c.close()
    if r != 200:
        logger.error("There was a critical error : %s" % response.getvalue())
        sys.exit(2)
    else:    
        ret  = json.loads(response.getvalue().replace('\\/', '/'))
        status = ret.get('status')
        text   = ret.get('text')
        if status == 200:
            logger.log(text)
        else:
            logger.error(text)
Example #8
0
    def get_scheduler_ordered_list(self, r):
        # get scheds, alive and no spare first
        scheds =  []
        for s in r.schedulers:
            scheds.append(s)

        # now the spare scheds of higher realms
        # they are after the sched of realm, so
        # they will be used after the spare of
        # the realm
        for higher_r in r.higher_realms:
            for s in higher_r.schedulers:
                if s.spare:
                    scheds.append(s)

        # Now we sort the scheds so we take master, then spare
        # the dead, but we do not care about thems
        scheds.sort(alive_then_spare_then_deads)
        scheds.reverse() #pop is last, I need first

        #DBG: dump
        print_sched = [s.get_name() for s in scheds]
        print_sched.reverse()
        print_string = '[%s] Schedulers order : ' % r.get_name()
        for s in print_sched:
            print_string += '%s ' % s
        logger.log(print_string)
        #END DBG

        return scheds
Example #9
0
 def add(self, elt):
     cls_type = elt.__class__.my_type
     if cls_type == "brok":
         # For brok, we TAG brok with our instance_id
         elt.data["instance_id"] = 0
         self.broks_internal_raised.append(elt)
         return
     elif cls_type == "externalcommand":
         print "Adding in queue an external command", ExternalCommand.__dict__
         self.external_commands.append(elt)
     # Maybe we got a Message from the modules, it's way to ask something
     # like from now a full data from a scheduler for example.
     elif cls_type == "message":
         # We got a message, great!
         print elt.__dict__
         if elt.get_type() == "NeedData":
             data = elt.get_data()
             # Full instance id mean : I got no data for this scheduler
             # so give me all dumbass!
             if "full_instance_id" in data:
                 c_id = data["full_instance_id"]
                 logger.log("A module is asking me to get all initial data from the scheduler %d" % c_id)
                 # so we just reset the connection adn the running_id, it will just get all new things
                 try:
                     self.schedulers[c_id]["con"] = None
                     self.schedulers[c_id]["running_id"] = 0
                 except KeyError:  # maybe this instance was not known, forget it
                     print "WARNING: a module ask me a full_instance_id for an unknown ID!", c_id
    def init(self):
        logger.log("I connect to NDO database")
        self.db = DBMysql(self.host, self.user, self.password, self.database, 
                          self.character_set, table_prefix='nagios_', port=self.port)
        self.connect_database()

        # Cache for hosts and services
        # The structure is as follow:
        # First the instance id then the host / (host,service desc) to access the wanted data
        self.services_cache_sync = {}
        self.hosts_cache_sync = {}

        # We need to search for centreon_specific fields, like long_output
        query = u"select TABLE_NAME from information_schema.columns where TABLE_SCHEMA='ndo' and TABLE_NAME='nagios_servicestatus' and COLUMN_NAME='long_output';"
        self.db.execute_query(query)
        row = self.db.fetchone()
        if row is None or len(row) < 1:
            self.centreon_version = False
        else:
            self.centreon_version = True
            logger.log("[MySQL/NDO] Using the centreon version")

        # Cache for database id
        # In order not to query the database every time
        self.database_id_cache = {}

        # Mapping service_id in Shinken and in database
        # Because can't acces host_name from a service everytime :(
        self.mapping_service_id = {}

        # Todo list to manage brok
        self.todo = []
Example #11
0
    def get_new_broks(self, type='scheduler'):
            # Get the good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.log('DBG: Type unknown for connection! %s' % type)
            return

        # We check for new check in each schedulers and put
        # the result in new_checks
        for sched_id in links:
            try:
                con = links[sched_id]['con']
                if con is not None: # None = not initilized
                    tmp_broks = con.get_broks()
                    for b in tmp_broks.values():
                        b.instance_id = links[sched_id]['instance_id']

                    # Ok, we can add theses broks to our queues
                    self.add_broks_to_queue(tmp_broks.values())

                else: # no con? make the connection
                    self.pynag_con_init(sched_id, type=type)
            # Ok, con is not known, so we create it
            except KeyError , exp:
                print exp
                self.pynag_con_init(sched_id, type=type)
            except Pyro.errors.ProtocolError , exp:
                logger.log("[%s] Connection problem to the %s %s : %s" % (self.name, type, links[sched_id]['name'], str(exp)))
                links[sched_id]['con'] = None
Example #12
0
def search(look_at):
    # Now really publish it
    proxy = CONFIG['shinken.io']['proxy']
    api_key = CONFIG['shinken.io']['api_key']
    
    # Ok we will push the file with a 10s timeout
    c = pycurl.Curl()
    c.setopt(c.POST, 0)
    c.setopt(c.CONNECTTIMEOUT, 10)
    c.setopt(c.TIMEOUT, 10)
    if proxy:
        c.setopt(c.PROXY, proxy)

    args = {'keywords':','.join(look_at)}
    c.setopt(c.URL, str('shinken.io/searchcli?'+urllib.urlencode(args)))
    response = StringIO()
    c.setopt(pycurl.WRITEFUNCTION, response.write)
    #c.setopt(c.VERBOSE, 1)
    c.perform()
    r = c.getinfo(pycurl.HTTP_CODE)
    c.close()
    if r != 200:
        logger.error("There was a critical error : %s" % response.getvalue())
        sys.exit(2)
    else:    
        ret  = json.loads(response.getvalue().replace('\\/', '/'))
        status = ret.get('status')
        result   = ret.get('result')
        if status != 200:
            logger.log(result)
            return []
        return result
Example #13
0
    def hook_save_retention(self, daemon):
        log_mgr = logger
        logger.log(
            "[PickleRetentionGeneric] asking me to update the retention objects"
        )

        #Now the flat file method
        try:
            # Open a file near the path, with .tmp extension
            # so in cae or problem, we do not lost the old one
            f = open(self.path + '.tmp', 'wb')

            # We get interesting retention data from the daemon it self
            all_data = daemon.get_retention_data()

            # And we save it on file :)

            #s = cPickle.dumps(all_data)
            #s_compress = zlib.compress(s)
            cPickle.dump(all_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
            #f.write(s_compress)
            f.close()

            # Now move the .tmp fiel to the real path
            shutil.move(self.path + '.tmp', self.path)
        except IOError, exp:
            log_mgr.log("Error: retention file creation failed, %s" % str(exp))
            return
Example #14
0
    def get_scheduler_ordered_list(self, r):
        # get scheds, alive and no spare first
        scheds = []
        for s in r.schedulers:
            scheds.append(s)

        # now the spare scheds of higher realms
        # they are after the sched of realm, so
        # they will be used after the spare of
        # the realm
        for higher_r in r.higher_realms:
            for s in higher_r.schedulers:
                if s.spare:
                    scheds.append(s)

        # Now we sort the scheds so we take master, then spare
        # the dead, but we do not care about thems
        scheds.sort(alive_then_spare_then_deads)
        scheds.reverse()  #pop is last, I need first

        #DBG: dump
        print_sched = [s.get_name() for s in scheds]
        print_sched.reverse()
        print_string = '[%s] Schedulers order : ' % r.get_name()
        for s in print_sched:
            print_string += '%s ' % s
        logger.log(print_string)
        #END DBG

        return scheds
Example #15
0
    def try_instance_init(self, inst):
        """ Try to "init" the given module instance. 
Returns: True on successfull init. False if instance init method raised any Exception. """
        try:
            print "Trying to init module", inst.get_name()
            inst.init_try += 1
            # Maybe it's a retry
            if inst.init_try > 1:
                # Do not try until 5 sec, or it's too loopy
                if inst.last_init_try > time.time() - 5:
                    return False
            inst.last_init_try = time.time()

            # If it's an external, create/update Queues()
            if inst.is_external:
                inst.create_queues()

            inst.init()
        except Exception, e:
            logger.log(
                "Error : the instance %s raised an exception %s, I remove it!"
                % (inst.get_name(), str(e)))
            output = cStringIO.StringIO()
            traceback.print_exc(file=output)
            logger.log("Back trace of this remove : %s" % (output.getvalue()))
            output.close()
            return False
    def hook_save_retention(self, daemon):
        log_mgr = logger
        logger.log("[PickleRetentionGeneric] asking me to update the retention objects")

        #Now the flat file method
        try:
            # Open a file near the path, with .tmp extension
            # so in cae or problem, we do not lost the old one
            f = open(self.path+'.tmp', 'wb')
            
            # We get interesting retention data from the daemon it self
            all_data = daemon.get_retention_data()
            
            # And we save it on file :)

            #s = cPickle.dumps(all_data)
            #s_compress = zlib.compress(s)
            cPickle.dump(all_data, f, protocol=cPickle.HIGHEST_PROTOCOL)
            #f.write(s_compress)
            f.close()
            
            # Now move the .tmp fiel to the real path
            shutil.move(self.path+'.tmp', self.path)
        except IOError , exp:
            log_mgr.log("Error: retention file creation failed, %s" % str(exp))
            return
Example #17
0
    def try_instance_init(self, inst):
        """ Try to "init" the given module instance. 
Returns: True on successfull init. False if instance init method raised any Exception. """ 
        try:
            print "Trying to init module", inst.get_name()
            inst.init_try += 1
            # Maybe it's a retry
            if inst.init_try > 1:
                # Do not try until 5 sec, or it's too loopy
                if inst.last_init_try > time.time() - 5:
                    return False
            inst.last_init_try = time.time()

            # If it's an external, create/update Queues()
            if inst.is_external:
                inst.create_queues()

            inst.init()
        except Exception, e:
            logger.log("Error : the instance %s raised an exception %s, I remove it!" % (inst.get_name(), str(e)))
            output = cStringIO.StringIO()
            traceback.print_exc(file=output)
            logger.log("Back trace of this remove : %s" % (output.getvalue()))
            output.close()
            return False
Example #18
0
 def add(self, elt):
     cls_type = elt.__class__.my_type
     if cls_type == 'brok':
         # For brok, we TAG brok with our instance_id
         elt.data['instance_id'] = 0
         self.broks_internal_raised.append(elt)
         return
     elif cls_type == 'externalcommand':
         print "Adding in queue an external command", ExternalCommand.__dict__
         self.external_commands.append(elt)
     # Maybe we got a Message from the modules, it's way to ask something
     #like from now a full data from a scheduler for example.
     elif cls_type == 'message':
         # We got a message, great!
         print elt.__dict__
         if elt.get_type() == 'NeedData':
             data = elt.get_data()
             # Full instance id mean : I got no data for this scheduler
             # so give me all dumbass!
             if 'full_instance_id' in data:
                 c_id = data['full_instance_id']
                 logger.log(
                     'A module is asking me to get all initial data from the scheduler %d'
                     % c_id)
                 # so we just reset the connexion adn the running_id, it will just get all new things
                 self.schedulers[c_id]['con'] = None
                 self.schedulers[c_id]['running_id'] = 0
Example #19
0
    def compensate_system_time_change(self, difference):
        """ Compensate a system time change of difference for all hosts/services/checks/notifs """
        logger.log('Warning: A system time change of %d has been detected.  Compensating...' % difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        # Then we compasate all host/services
        for h in self.sched.hosts:
            h.compensate_system_time_change(difference)
        for s in self.sched.services:
            s.compensate_system_time_change(difference)

        # Now all checks and actions
        for c in self.sched.checks.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go
                ref = c.ref
                new_t = max(0, t_to_go + difference)
                # But it's no so simple, we must match the timeperiod
                new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for c in self.sched.actions.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go

                #  Event handler do not have ref
                ref = getattr(c, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if c.is_a == 'notification':
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    c.creation_time = c.creation_time + difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t
Example #20
0
    def setup_new_conf(self):
        conf = self.new_conf
        self.new_conf = None
        self.cur_conf = conf
        # Got our name from the globals
        if 'receiver_name' in conf['global']:
            name = conf['global']['receiver_name']
        else:
            name = 'Unnamed receiver'
        self.name = name
        self.log.load_obj(self, name)

        print "[%s] Sending us configuration %s" % (self.name, conf)

        if not self.have_modules:
            self.modules = mods = conf['global']['modules']
            self.have_modules = True
            logger.log("[%s] We received modules %s " % (self.name,  mods))

        # Set our giving timezone from arbiter
        use_timezone = conf['global']['use_timezone']
        if use_timezone != 'NOTSET':
            logger.log("[%s] Setting our timezone to" % (self.name, use_timezone))
            os.environ['TZ'] = use_timezone
            time.tzset()
Example #21
0
    def get_new_broks(self, type='scheduler'):
        # Get teh good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.log('DBG: Type unknown for connexion! %s' % type)
            return

        # We check for new check in each schedulers and put
        # the result in new_checks
        for sched_id in links:
            try:
                con = links[sched_id]['con']
                if con is not None:  # None = not initilized
                    tmp_broks = con.get_broks()
                    for b in tmp_broks.values():
                        b.instance_id = links[sched_id]['instance_id']

                    # Ok, we can add theses broks to our queues
                    self.add_broks_to_queue(tmp_broks.values())

                else:  # no con? make the connexion
                    self.pynag_con_init(sched_id, type=type)
            # Ok, con is not know, so we create it
            except KeyError, exp:
                print exp
                self.pynag_con_init(sched_id, type=type)
            except Pyro.errors.ProtocolError, exp:
                logger.log(
                    "[%s] Connexion problem to the %s %s : %s" %
                    (self.name, type, links[sched_id]['name'], str(exp)))
                links[sched_id]['con'] = None
Example #22
0
    def setup_new_conf(self):
        conf = self.new_conf
        self.new_conf = None
        self.cur_conf = conf
        # Got our name from the globals
        if 'receiver_name' in conf['global']:
            name = conf['global']['receiver_name']
        else:
            name = 'Unnamed receiver'
        self.name = name
        self.log.load_obj(self, name)

        print "[%s] Sending us configuration %s" % (self.name, conf)

        if not self.have_modules:
            self.modules = mods = conf['global']['modules']
            self.have_modules = True
            logger.log("[%s] We received modules %s " % (self.name, mods))

        # Set our giving timezone from arbiter
        use_timezone = conf['global']['use_timezone']
        if use_timezone != 'NOTSET':
            logger.log("[%s] Setting our timezone to" %
                       (self.name, use_timezone))
            os.environ['TZ'] = use_timezone
            time.tzset()
 def is_me(self):
     logger.log(
         "And arbiter is launched with the hostname:%s from an arbiter point of view of addr :%s"
         % (self.host_name, socket.getfqdn()),
         print_it=False,
     )
     return self.host_name == socket.getfqdn() or self.host_name == socket.gethostname()
Example #24
0
def publish_archive(archive):
    # Now really publish it
    proxy = CONFIG['shinken.io']['proxy']
    api_key = CONFIG['shinken.io']['api_key']

    # Ok we will push the file with a 10s timeout
    c = pycurl.Curl()
    c.setopt(c.POST, 1)
    c.setopt(c.CONNECTTIMEOUT, 10)
    c.setopt(c.TIMEOUT, 10)
    if proxy:
        c.setopt(c.PROXY, proxy)
    c.setopt(c.URL, "http://shinken.io/push")
    c.setopt(c.HTTPPOST,
             [("api_key", api_key),
              ("data", (c.FORM_FILE, str(archive), c.FORM_CONTENTTYPE,
                        "application/x-gzip"))])
    response = StringIO()
    c.setopt(pycurl.WRITEFUNCTION, response.write)
    c.setopt(c.VERBOSE, 1)
    c.perform()
    r = c.getinfo(pycurl.HTTP_CODE)
    c.close()
    if r != 200:
        logger.error("There was a critical error : %s" % response.getvalue())
        sys.exit(2)
    else:
        ret = json.loads(response.getvalue().replace('\\/', '/'))
        status = ret.get('status')
        text = ret.get('text')
        if status == 200:
            logger.log(text)
        else:
            logger.error(text)
Example #25
0
 def main(self):
     self.set_signal_handler()
     logger.log("[%s[%d]]: Now running.." % (self.name, os.getpid()))
     while not self.interrupted:
         self.do_loop_turn()
     self.do_stop()
     logger.log("[%s]: exiting now.." % (self.name))
Example #26
0
    def main(self):
        
        self.load_config_file()
        
        for line in self.get_header():
            self.log.log(line)

        logger.log("[Broker] Using working directory : %s" % os.path.abspath(self.workdir))
        
        self.do_daemon_init_and_start()

        self.uri2 = self.pyro_daemon.register(self.interface, "ForArbiter")
        print "The Arbtier uri it at", self.uri2

        #  We wait for initial conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return

        self.setup_new_conf()

        # Set modules, init them and start external ones
        self.modules_manager.set_modules(self.modules)
        self.do_load_modules()
        self.modules_manager.start_external_instances()

        # Do the modules part, we have our modules in self.modules
        # REF: doc/broker-modules.png (1)
        self.hook_point('load_retention')

        # Now the main loop
        self.do_mainloop()
Example #27
0
    def main(self):

        self.load_config_file()

        for line in self.get_header():
            self.log.log(line)

        logger.log("[Receiver] Using working directory : %s" %
                   os.path.abspath(self.workdir))

        self.do_daemon_init_and_start()

        self.uri2 = self.pyro_daemon.register(self.interface, "ForArbiter")
        print "The Arbtier uri it at", self.uri2

        #  We wait for initial conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return

        self.setup_new_conf()

        self.modules_manager.set_modules(self.modules)
        self.do_load_modules()
        # and start external modules too
        self.modules_manager.start_external_instances()

        # Do the modules part, we have our modules in self.modules
        # REF: doc/receiver-modules.png (1)

        # Now the main loop
        self.do_mainloop()
Example #28
0
 def main(self):
     """ module "main" method. Only used by external modules. """
     self.set_signal_handler()
     logger.log("[%s[%d]]: Now running.." % (self.name, os.getpid()))
     while not self.interrupted:
         self.do_loop_turn()
     self.do_stop()
     logger.log("[%s]: exiting now.." % (self.name))
Example #29
0
 def create_connection(self):
     try:
         self.uri = pyro.create_uri(self.address, self.port, "ForArbiter", self.__class__.use_ssl)
         self.con = pyro.getProxy(self.uri)
         pyro.set_timeout(self.con, self.timeout)
     except Pyro_exp_pack , exp:
         self.con = None
         logger.log('Error : in creation connexion for %s : %s' % (self.get_name(), str(exp)))
Example #30
0
 def main(self):
     """ module "main" method. Only used by external modules. """
     self.set_signal_handler()
     logger.log("[%s[%d]]: Now running.." % (self.name, os.getpid()))
     while not self.interrupted:
         self.do_loop_turn()
     self.do_stop()
     logger.log("[%s]: exiting now.." % (self.name))
Example #31
0
    def hook_early_configuration(self, arb):
        logger.log("[IpTag] in hook late config")
        for h in arb.conf.hosts:
            if not hasattr(h, 'address') and not hasattr(h, 'host_name'):
                continue
            # The address to resolve
            addr = None
            
            #By default take the address, if not, take host_name
            if not hasattr(h, 'address'):
                addr = h.host_name
            else:
                addr = h.address
                
            print "Looking for h", h.get_name()
            print addr
            h_ip = None
            try:
                IP(addr)
                # If we reach here, it's it was a real IP :)
                h_ip = addr
            except:
                pass

            # Ok, try again with name resolution
            if not h_ip:
                try:
                    h_ip = socket.gethostbyname(addr)
                except:
                    pass

            # Ok, maybe we succeed :)
            print "Host ip is:", h_ip
            # If we got an ip that match and the object do not already got
            # the property, tag it!
            if h_ip and h_ip in self.ip_range:
                print "Is in the range"
                # 2 cases : append or replace.
                # append will join with the value if exist
                # replace will replace it if NOT existing
                if self.method == 'append':
                    orig_v = getattr(h, self.property, '')
                    print "Orig_v", orig_v
                    new_v = ','.join([orig_v, self.value])
                    print "Newv", new_v
                    setattr(h, self.property, new_v)
                    # If it's a poller_tag, remember to also tag commands!
                    if(self.property == 'poller_tag'):
                        h.check_command.poller_tag = self.value

                if self.method == 'replace':
                    if not hasattr(h, self.property):

                        # Ok, set the value!
                        setattr(h, self.property, self.value)
                        # If it's a poller_tag, remember to also tag commands!
                        if(self.property == 'poller_tag'):
                            h.check_command.poller_tag = self.value
Example #32
0
    def pynag_con_init(self, id, type="scheduler"):
        # Get teh good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.log("DBG: Type unknown for connection! %s" % type)
            return

        if type == "scheduler":
            # If sched is not active, I do not try to init
            # it is just useless
            is_active = links[id]["active"]
            if not is_active:
                return

        # If we try to connect too much, we slow down our tests
        if self.is_connection_try_too_close(links[id]):
            return

        # Ok, we can now update it
        links[id]["last_connection"] = time.time()

        # DBG: print "Init connection with", links[id]['uri']
        running_id = links[id]["running_id"]
        # DBG: print "Running id before connection", running_id
        uri = links[id]["uri"]
        links[id]["con"] = Pyro.core.getProxyForURI(uri)

        try:
            # intial ping must be quick
            pyro.set_timeout(links[id]["con"], 5)
            links[id]["con"].ping()
            new_run_id = links[id]["con"].get_running_id()
            # data transfert can be longer
            pyro.set_timeout(links[id]["con"], 120)

            # The schedulers have been restart : it has a new run_id.
            # So we clear all verifs, they are obsolete now.
            if new_run_id != running_id:
                print "[%s] New running id for the %s %s : %s (was %s)" % (
                    self.name,
                    type,
                    links[id]["name"],
                    new_run_id,
                    running_id,
                )
                links[id]["broks"].clear()
                # we must ask for a enw full broks if
                # it's a scheduler
                if type == "scheduler":
                    print "[%s] I ask for a broks generation to the scheduler %s" % (self.name, links[id]["name"])
                    links[id]["con"].fill_initial_broks()
            # else:
            #     print "I do nto ask for brok generation"
            links[id]["running_id"] = new_run_id
        except (Pyro.errors.ProtocolError, Pyro.errors.CommunicationError), exp:
            logger.log("[%s] Connexion problem to the %s %s : %s" % (self.name, type, links[id]["name"], str(exp)))
            links[id]["con"] = None
            return
Example #33
0
 def register_local_log(self):
     # The arbiter don't have such an attribute
     if hasattr(self, 'use_local_log') and self.use_local_log:
         try:
             self.local_log_fd = self.log.register_local_log(self.local_log)
         except IOError, exp:
             print "Error : opening the log file '%s' failed with '%s'" % (self.local_log, exp)
             sys.exit(2)
         logger.log("Using the local log file '%s'" % self.local_log)
Example #34
0
 def stop_process(self):
     if self.process:
         logger.log("I'm stopping module '%s' process pid:%s " % (self.get_name(), self.process.pid))
         self.process.terminate()
         self.process.join(timeout=1)
         if self.process.is_alive():
             logger.log("The process is still alive, I help it to die")
             self.__kill()
         self.process = None
Example #35
0
    def pynag_con_init(self, id, type='scheduler'):
        # Get teh good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.log('DBG: Type unknown for connexion! %s' % type)
            return

        if type == 'scheduler':
            # If sched is not active, I do not try to init
            # it is just useless
            is_active = links[id]['active']
            if not is_active:
                return

        # If we try to connect too much, we slow down our tests
        if self.is_connexion_try_too_close(links[id]):
            return

        # Ok, we can now update it
        links[id]['last_connexion'] = time.time()

        # DBG: print "Init connexion with", links[id]['uri']
        running_id = links[id]['running_id']
        # DBG: print "Running id before connexion", running_id
        uri = links[id]['uri']
        links[id]['con'] = Pyro.core.getProxyForURI(uri)

        try:
            # intial ping must be quick
            pyro.set_timeout(links[id]['con'], 5)
            links[id]['con'].ping()
            new_run_id = links[id]['con'].get_running_id()
            # data transfert can be longer
            pyro.set_timeout(links[id]['con'], 120)

            # The schedulers have been restart : it has a new run_id.
            # So we clear all verifs, they are obsolete now.
            if new_run_id != running_id:
                print "[%s] New running id for the %s %s : %s (was %s)" % (
                    self.name, type, links[id]['name'], new_run_id, running_id)
                links[id]['broks'].clear()
                # we must ask for a enw full broks if
                # it's a scheduler
                if type == 'scheduler':
                    print "[%s] I ask for a broks generation to the scheduler %s" % (
                        self.name, links[id]['name'])
                    links[id]['con'].fill_initial_broks()
            # else:
            #     print "I do nto ask for brok generation"
            links[id]['running_id'] = new_run_id
        except (Pyro.errors.ProtocolError,
                Pyro.errors.CommunicationError), exp:
            logger.log("[%s] Connexion problem to the %s %s : %s" %
                       (self.name, type, links[id]['name'], str(exp)))
            links[id]['con'] = None
            return
Example #36
0
 def start(self):
     """ Start this module process if it's external. if not -> donothing """
     if not self.is_external:
         return
     self.stop_process()
     logger.log("Starting external process for instance %s" % (self.name))
     p = self.process = Process(target=self.main, args=())
     self.properties['process'] = p  ## TODO: temporary
     p.start()
     logger.log("%s is now started ; pid=%d" % (self.name, p.pid))
Example #37
0
 def register_local_log(self):
     # The arbiter don't have such an attribute
     if hasattr(self, 'use_local_log') and self.use_local_log:
         try:
             self.local_log_fd = self.log.register_local_log(self.local_log)
         except IOError, exp:
             print "Error : opening the log file '%s' failed with '%s'" % (
                 self.local_log, exp)
             sys.exit(2)
         logger.log("Using the local log file '%s'" % self.local_log)
Example #38
0
 def hook_point(self, hook_name):
     for inst in self.modules_manager.instances:
         full_hook_name = 'hook_' + hook_name
         if hasattr(inst, full_hook_name):
             f = getattr(inst, full_hook_name)
             try :
                 f(self)
             except Exception, exp:
                 logger.log('The instance %s raise an exception %s. I disable, and set it to restart later' % (inst.get_name(), str(exp)))
                 self.modules_manager.set_to_restart(inst)
Example #39
0
 def start(self):
     """ Start this module process if it's external. if not -> donothing """
     if not self.is_external:
         return
     self.stop_process()
     logger.log("Starting external process for instance %s" % (self.name))
     p = self.process = Process(target=self.main, args=())
     self.properties['process'] = p  ## TODO: temporary
     p.start()
     logger.log("%s is now started ; pid=%d" % (self.name, p.pid))
Example #40
0
 def raise_notification_log_entry(self, n):
     contact = n.contact
     command = n.command_call
     if n.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
         state = '%s (%s)' % (n.type, self.state)
     else:
         state = self.state
     if self.__class__.log_notifications:
         logger.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact.get_name(), self.get_name(), state, \
                                                              command.get_name(), self.output))
    def set_dead(self):
        was_alive = self.alive
        self.alive = False
        self.con = None

        # We are dead now. Must raise
        # a brok to say it
        if was_alive:
            logger.log("Warning : Setting the satellite %s to a dead state." % self.get_name())
            b = self.get_update_status_brok()
            self.broks.append(b)
 def add_failed_check_attempt(self, reason=''):
     self.reachable = False
     self.attempt += 1
     self.attempt = min(self.attempt, self.max_check_attempts)
     # Don't need to warn again and again if the satellite is already dead
     if self.alive:
         s = "Info : Add failed attempt to %s (%d/%d) %s" % (self.get_name(), self.attempt, self.max_check_attempts, reason)
         logger.log(s)
     # check when we just go HARD (dead)
     if self.attempt == self.max_check_attempts:
         self.set_dead()
Example #43
0
 def stop_process(self):
     """ Request the module process to stop and release it """
     if self.process:
         logger.log("I'm stopping process pid:%s " % self.process.pid)
         self.process.terminate()
         self.process.join(timeout=1)
         print dir(self.process)
         if self.process.is_alive():
             logger.log("The process is still alive, I help it to die")
             self.__kill()
         self.process = None
Example #44
0
 def stop_process(self):
     """ Request the module process to stop and release it """
     if self.process:
         logger.log("I'm stopping process pid:%s " % self.process.pid)
         self.process.terminate()
         self.process.join(timeout=1)
         print dir(self.process)
         if self.process.is_alive():
             logger.log("The process is still alive, I help it to die")
             self.__kill()
         self.process = None
Example #45
0
 def start_external_instances(self):
     for inst in [inst for inst in self.instances if inst.is_external]:
         # But maybe the init failed a bit, so bypass this ones from now
         if not self.try_instance_init(inst):
             logger.log("Warning : the module '%s' failed to init, I will try to restart it later" % inst.get_name())
             self.to_restart.append(inst)
             continue
         
         # ok, init succeed
         logger.log("Starting external module %s" % inst.get_name())
         inst.start()
Example #46
0
 def hook_point(self, hook_name):
     for inst in self.modules_manager.instances:
         full_hook_name = 'hook_' + hook_name
         if hasattr(inst, full_hook_name):
             f = getattr(inst, full_hook_name)
             try:
                 f(self)
             except Exception, exp:
                 logger.log(
                     'The instance %s raise an exception %s. I disable, and set it to restart later'
                     % (inst.get_name(), str(exp)))
                 self.modules_manager.set_to_restart(inst)
Example #47
0
    def start_external_instances(self):
        for inst in [inst for inst in self.instances if inst.is_external]:
            # But maybe the init failed a bit, so bypass this ones from now
            if not self.try_instance_init(inst):
                logger.log(
                    "Warning : the module '%s' failed to init, I will try to restart it later"
                    % inst.get_name())
                self.to_restart.append(inst)
                continue

            # ok, init succeed
            print "Starting external module %s" % inst.get_name(), inst.from_q
            inst.start()
Example #48
0
 def do_stop(self):
     if self.modules_manager:
         # We save what we can but NOT for the scheduler
         # because the current sched object is a dummy one
         # and the old one aleady do it!
         if not hasattr(self, 'sched'):
             self.hook_point('save_retention')
         # And we quit
         logger.log('Stopping all modules')
         self.modules_manager.stop_all()
     if self.pyro_daemon:
         pyro.shutdown(self.pyro_daemon)  #.shutdown(True)
     logger.quit()
Example #49
0
 def wait_for_initial_conf(self, timeout=1.0):
     logger.log("Waiting for initial configuration")
     cur_timeout = timeout
     # Arbiter do not already set our have_conf param
     while not self.new_conf and not self.interrupted:
         elapsed, _, _ = self.handleRequests(cur_timeout)
         if elapsed:
             cur_timeout -= elapsed
             if cur_timeout > 0:
                 continue
             cur_timeout = timeout
         sys.stdout.write(".")
         sys.stdout.flush()
Example #50
0
 def check_alive_instances(self):
     #to_del = []
     #Only for external
     for inst in self.instances:
         if not inst in self.to_restart:
             if inst.is_external and not inst.process.is_alive():
                 logger.log(
                     "Error : the external module %s goes down unexpectly!"
                     % inst.get_name())
                 logger.log("Setting the module %s to restart" %
                            inst.get_name())
                 # We clean its queues, they are no more useful
                 inst.clear_queues()
                 self.to_restart.append(inst)
Example #51
0
    def is_correct(self):
        state = True
        properties = self.__class__.properties

        # Raised all previously saw errors like unknown contacts and co
        if self.configuration_errors != []:
            state = False
            for err in self.configuration_errors:
                logger.log(err)

        for prop, entry in properties.items():
            if not hasattr(self, prop) and entry.required:
                print self.get_name(), "missing property :", prop
                state = False
        return state
Example #52
0
    def check_and_do_archive(self, first_pass=False):
        now = int(time.time())
        #first check if the file last mod (or creation) was
        #not our day
        try:
            t_last_mod = int(float(str(os.path.getmtime(self.path))))
        except OSError:  #there should be no path from now, so no move :)
            return False
        #print "Ctime %d" % os.path.getctime(self.path)
        t_last_mod_day = get_day(t_last_mod)
        today = get_day(now)
        #print "Dates: t_last_mod : %d, t_last_mod_day: %d, today : %d" % (t_last_mod, t_last_mod_day, today)
        if t_last_mod_day != today:
            logger.log("We are archiving the old log file")

            #For the first pass, it's not already open
            if not first_pass:
                self.file.close()

            #Now we move it
            #Get a new name like MM

            #f_name is like nagios.log
            f_name = os.path.basename(self.path)
            #remove the ext -> (nagios,.log)
            (f_base_name, ext) = os.path.splitext(f_name)
            #make the good looking day for archive name
            #like -05-09-2010-00
            d = datetime.datetime.fromtimestamp(today)
            s_day = d.strftime("-%m-%d-%Y-00")
            archive_name = f_base_name + s_day + ext
            file_archive_path = os.path.join(self.archive_path, archive_name)
            logger.log("Moving the old log file from %s to %s" %
                       (self.path, file_archive_path))

            shutil.move(self.path, file_archive_path)

            #and we overwrite it
            print "I open the log file %s" % self.path
            self.file = open(self.path, 'a')

            return True
        return False
Example #53
0
 def test_utf8log(self):
     sutf = 'h\351h\351'  # Latin Small Letter E with acute in Latin-1
     logger.log(sutf)
     sutf8 = u'I love myself $£¤'  # dollar, pound, currency
     logger.log(sutf8)
     s = unichr(40960) + u'abcd' + unichr(1972)
     logger.log(s)
Example #54
0
 def dump_memory(self):
     logger.log("I dump my memory, it can ask some seconds to do")
     try:
         from guppy import hpy
         hp = hpy()
         logger.log(hp.heap())
     except ImportError:
         logger.log(
             'I do not have the module guppy for memory dump, please install it'
         )
Example #55
0
    def check_bad_dispatch(self):
        for elt in self.elements:
            if hasattr(elt, 'conf'):
                # If element have a conf, I do not care, it's a good dispatch
                # If die : I do not ask it something, it won't respond..
                if elt.conf is None and elt.reachable:
                    # print "Ask", elt.get_name() , 'if it got conf'
                    if elt.have_conf():
                        logger.log(
                            'Warning : The element %s have a conf and should not have one! I ask it to idle now'
                            % elt.get_name())
                        elt.active = False
                        elt.wait_new_conf()
                        # I do not care about order not send or not. If not,
                        # The next loop wil resent it
                    # else:
                    #    print "No conf"

        # I ask satellite witch sched_id they manage. If I am not agree, I ask
        # them to remove it
        for satellite in self.satellites:
            kind = satellite.get_my_type()
            if satellite.reachable:
                cfg_ids = satellite.what_i_managed()
                # I do nto care about satellites that do nothing, it already
                # do what I want :)
                if len(cfg_ids) != 0:
                    id_to_delete = []
                    for cfg_id in cfg_ids:
                        # DBG print kind, ":", satellite.get_name(), "manage cfg id:", cfg_id
                        # Ok, we search for realm that have the conf
                        for r in self.realms:
                            if cfg_id in r.confs:
                                # Ok we've got the realm, we check it's to_satellites_managed_by
                                # to see if reactionner is in. If not, we remove he sched_id for it
                                if not satellite in r.to_satellites_managed_by[
                                        kind][cfg_id]:
                                    id_to_delete.append(cfg_id)
                    # Maybe we removed all cfg_id of this reactionner
                    # We can make it idle, no active and wait_new_conf
                    if len(id_to_delete) == len(cfg_ids):
                        satellite.active = False
                        logger.log("I ask %s to wait a new conf" %
                                   satellite.get_name())
                        satellite.wait_new_conf()
                    else:  #It is not fully idle, just less cfg
                        for id in id_to_delete:
                            logger.log(
                                "I ask to remove configuration N%d from %s" %
                                (cfg_id, satellite.get_name()))
                            satellite.remove_from_conf(cfg_id)
Example #56
0
 def manage_brok(self, b):
     # Call all modules if they catch the call
     for mod in self.modules_manager.get_internal_instances():
         try:
             mod.manage_brok(b)
         except Exception, exp:
             print exp.__dict__
             logger.log(
                 "[%s] Warning : The mod %s raise an exception: %s, I'm tagging it to restart later"
                 % (self.name, mod.get_name(), str(exp)))
             logger.log("[%s] Exception type : %s" % (self.name, type(exp)))
             logger.log("Back trace of this kill: %s" %
                        (traceback.format_exc()))
             self.modules_manager.set_to_restart(inst)
Example #57
0
 def manage_brok(self, b):
     to_del = []
     # Call all modules if they catch the call
     for mod in self.modules_manager.get_internal_instances():
         try:
             mod.manage_brok(b)
         except Exception, exp:
             print exp.__dict__
             logger.log(
                 "[%s] Warning : The mod %s raise an exception: %s, I kill it"
                 % (self.name, mod.get_name(), str(exp)))
             logger.log("[%s] Exception type : %s" % (self.name, type(exp)))
             logger.log("Back trace of this kill: %s" %
                        (traceback.format_exc()))
             to_del.append(mod)
Example #58
0
    def get_instances(self):
        """ Create, init and then returns the list of module instances that the caller needs.
If an instance can't be created or init'ed then only log is done. That instance is skipped.
The previous modules instance(s), if any, are all cleaned. """
        self.clear_instances()
        for (mod_conf, module) in self.modules_assoc:
            try:
                mod_conf.properties = module.properties.copy()
                inst = module.get_instance(mod_conf)
                if inst is None:  #None = Bad thing happened :)
                    logger.log("get_instance for module %s returned None !" %
                               (mod_conf.get_name()))
                    continue
                assert (isinstance(inst, BaseModule))
                self.instances.append(inst)
            except Exception, exp:
                logger.log(
                    "Error : the module %s raised an exception %s, I remove it!"
                    % (mod_conf.get_name(), str(exp)))
                output = cStringIO.StringIO()
                traceback.print_exc(file=output)
                logger.log("Back trace of this remove : %s" %
                           (output.getvalue()))
                output.close()
 def hook_save_retention(self, daemon):
     logger.log(
         "[NagiosRetention] asking me to update the retention objects, but I won't do it."
     )