コード例 #1
0
class Poller(Satellite):
    do_checks = True  # I do checks
    do_actions = False  # but no actions

    properties = Satellite.properties.copy()
    properties.update({
        'pidfile': PathProp(default='pollerd.pid'),
        'port': IntegerProp(default='7771'),
        'local_log': PathProp(default='pollerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):
        super(Poller, self).__init__('poller', config_file, is_daemon,
                                     do_replace, debug, debug_file)
コード例 #2
0
class Reactionner(Satellite):
    do_checks = False  # I do not do checks
    do_actions = True
    my_type = 'reactionner'

    properties = Satellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='reactionnerd.pid'),
        'port':      IntegerProp(default=7769),
        'local_log': PathProp(default='reactionnerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''):
        super(Reactionner, self).__init__('reactionner', config_file, is_daemon, do_replace, debug,
                                          debug_file)
コード例 #3
0
class Reactionner(Satellite):
    do_checks = False  # I do not do checks
    do_actions = True  # just actions like notifications

    properties = Satellite.properties.copy()
    properties.update({
        'pidfile':
        PathProp(default='/usr/local/shinken/var/reactionnerd.pid'),
        'port':
        IntegerProp(default='7769'),
        'local_log':
        PathProp(default='/usr/local/shinken/var/reactionnerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):
        super(Reactionner,
              self).__init__('reactionner', config_file, is_daemon, do_replace,
                             debug, debug_file)
コード例 #4
0
ファイル: receiverdaemon.py プロジェクト: zoranzaric/shinken
class Receiver(BaseSatellite):

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':
        PathProp(default='/usr/local/shinken/var/receiverd.pid'),
        'port':
        IntegerProp(default='7773'),
        'local_log':
        PathProp(default='/usr/local/shinken/var/receiverd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):

        super(Receiver, self).__init__('receiver', config_file, is_daemon,
                                       do_replace, debug, debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers and reactionners
        self.pollers = {}
        self.reactionners = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands give by modules
        # will be taken by arbiter to process
        self.external_commands = []

        # All broks to manage
        self.broks = []  # broks to manage
        # broks raised this turn and that need to be put in self.broks
        self.broks_internal_raised = []

    # Schedulers have some queues. We can simplify call by adding
    # elements into the proper queue just by looking at their type
    # Brok -> self.broks
    # TODO : better tag ID?
    # External commands -> self.external_commands
    def add(self, elt):
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.data['instance_id'] = 0
            self.broks_internal_raised.append(elt)
            return
        elif cls_type == 'externalcommand':
            print "Adding in queue an external command", ExternalCommand.__dict__
            self.external_commands.append(elt)

#    # Get teh good tabs for links by the kind. If unknown, return None
#    def get_links_from_type(self, type):
#        t = {'scheduler' : self.schedulers, 'arbiter' : self.arbiters, \
#             'poller' : self.pollers, 'reactionner' : self.reactionners}
#        if type in t :
#            return t[type]
#        return None

# Call by arbiter to get our external commands

    def get_external_commands(self):
        res = self.external_commands
        self.external_commands = []
        return res

    # Get a brok. Our role is to put it in the modules
    # THEY MUST DO NOT CHANGE data of b !!!
    # REF: doc/receiver-modules.png (4-5)
    def manage_brok(self, b):
        to_del = []
        # Call all modules if they catch the call
        for mod in self.modules_manager.get_internal_instances():
            try:
                mod.manage_brok(b)
            except Exception, exp:
                print exp.__dict__
                logger.log(
                    "[%s] Warning : The mod %s raise an exception: %s, I kill it"
                    % (self.name, mod.get_name(), str(exp)))
                logger.log("[%s] Exception type : %s" % (self.name, type(exp)))
                logger.log("Back trace of this kill: %s" %
                           (traceback.format_exc()))
                to_del.append(mod)
        # Now remove mod that raise an exception
        self.modules_manager.clear_instances(to_del)
コード例 #5
0
class Receiver(Satellite):
    my_type = 'receiver'
    
    properties = Satellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='receiverd.pid'),
        'port':      IntegerProp(default=7773),
        'local_log': PathProp(default='receiverd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):

        super(Receiver, self).__init__('receiver', config_file, is_daemon, do_replace, debug, debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers and reactionners
        self.pollers = {}
        self.reactionners = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands give by modules
        # will be taken by arbiter to process
        self.external_commands = []
        # and the unprocessed one, a buffer
        self.unprocessed_external_commands = []

        self.host_assoc = {}
        self.direct_routing = False
        self.accept_passive_unknown_check_results = False

        self.istats = IStats(self)
        self.ibroks = IBroks(self)

        # Now create the external commander. It's just here to dispatch
        # the commands to schedulers
        e = ExternalCommandManager(None, 'receiver')
        e.load_receiver(self)
        self.external_command = e

    # Schedulers have some queues. We can simplify call by adding
    # elements into the proper queue just by looking at their type
    # Brok -> self.broks
    # TODO: better tag ID?
    # External commands -> self.external_commands
    def add(self, elt):
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.instance_id = 0
            self.broks[elt.id] = elt
            return
        elif cls_type == 'externalcommand':
            logger.debug("Enqueuing an external command: %s", str(ExternalCommand.__dict__))
            self.unprocessed_external_commands.append(elt)


    def push_host_names(self, sched_id, hnames):
        for h in hnames:
            self.host_assoc[h] = sched_id


    def get_sched_from_hname(self, hname):
        i = self.host_assoc.get(hname, None)
        e = self.schedulers.get(i, None)
        return e


    # Get a brok. Our role is to put it in the modules
    # THEY MUST DO NOT CHANGE data of b!!!
    # REF: doc/receiver-modules.png (4-5)
    def manage_brok(self, b):
        to_del = []
        # Call all modules if they catch the call
        for mod in self.modules_manager.get_internal_instances():
            try:
                mod.manage_brok(b)
            except Exception, exp:
                logger.warning("The mod %s raise an exception: %s, I kill it", mod.get_name(), str(exp))
                logger.warning("Exception type: %s", type(exp))
                logger.warning("Back trace of this kill: %s", traceback.format_exc())
                to_del.append(mod)
        # Now remove mod that raise an exception
        self.modules_manager.clear_instances(to_del)
コード例 #6
0
ファイル: brokerdaemon.py プロジェクト: yoyox0123/shinken
class Broker(BaseSatellite):
    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='brokerd.pid'),
        'port':      IntegerProp(default=7772),
        'local_log': PathProp(default='brokerd.log'),
    })


    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''):

        super(Broker, self).__init__('broker', config_file, is_daemon, do_replace, debug,
                                     debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers, reactionners and receivers
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands given by modules
        # will be processed by arbiter
        self.external_commands = []

        # All broks to manage
        self.broks = deque()  # broks to manage
        self.external_module_broks = deque()  # broks during this loop to send to external modules
        self.broks_lock = threading.RLock()  # to manage lock when managing broks
        # broks raised this turn and that needs to be put in self.broks
        self.broks_internal_raised = []
        # broks raised by the arbiters, we need a lock so the push can be in parallel
        # to our current activities and won't lock the arbiter
        self.arbiter_broks = []
        self.arbiter_broks_lock = threading.RLock()

        self.timeout = 1.0

        self.istats = IStats(self)


    # Schedulers have some queues. We can simplify the call by adding
    # elements into the proper queue just by looking at their type
    # Brok -> self.broks
    # TODO: better tag ID?
    # External commands -> self.external_commands
    def add(self, elt):
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.instance_id = 0
            self.broks_internal_raised.append(elt)
            return
        elif cls_type == 'externalcommand':
            logger.debug("Enqueuing an external command '%s'", str(ExternalCommand.__dict__))
            self.external_commands.append(elt)
        # Maybe we got a Message from the modules, it's way to ask something
        # like from now a full data from a scheduler for example.
        elif cls_type == 'message':
            # We got a message, great!
            logger.debug(str(elt.__dict__))
            if elt.get_type() == 'NeedData':
                data = elt.get_data()
                # Full instance id means: I got no data for this scheduler
                # so give me all dumbass!
                if 'full_instance_id' in data:
                    c_id = data['full_instance_id']
                    source = elt.source
                    logger.info('The module %s is asking me to get all initial data '
                                'from the scheduler %d',
                                source, c_id)
                    # so we just reset the connection and the running_id,
                    # it will just get all new things
                    try:
                        self.schedulers[c_id]['con'] = None
                        self.schedulers[c_id]['running_id'] = 0
                    except KeyError:  # maybe this instance was not known, forget it
                        logger.warning("the module %s ask me a full_instance_id "
                                       "for an unknown ID (%d)!", source, c_id)
            # Maybe a module tells me that it's dead, I must log it's last words...
            if elt.get_type() == 'ICrash':
                data = elt.get_data()
                logger.error('the module %s just crash! Please look at the traceback:',
                             data['name'])
                logger.error(data['trace'])

                # The module death will be looked for elsewhere and restarted.


    # Get the good tabs for links by the kind. If unknown, return None
    def get_links_from_type(self, d_type):
        t = {'scheduler':   self.schedulers,
             'arbiter':     self.arbiters,
             'poller':      self.pollers,
             'reactionner': self.reactionners,
             'receiver':    self.receivers
             }
        if d_type in t:
            return t[d_type]
        return None


    # Check if we do not connect to often to this
    def is_connection_try_too_close(self, elt):
        now = time.time()
        last_connection = elt['last_connection']
        if now - last_connection < 5:
            return True
        return False


    # wrapper function for the real function do_
    # just for timing the connection
    def pynag_con_init(self, id, type='scheduler'):
        _t = time.time()
        r = self.do_pynag_con_init(id, type)
        statsmgr.timing('con-init.%s' % type, time.time() - _t, 'perf')
        return r


    # initialize or re-initialize connection with scheduler or
    # arbiter if type == arbiter
    def do_pynag_con_init(self, id, type='scheduler'):
        # Get the good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.debug('Type unknown for connection! %s', type)
            return

        # default timeout for daemons like pollers/reactionners/...
        timeout = 3
        data_timeout = 120

        if type == 'scheduler':
            # If sched is not active, I do not try to init
            # it is just useless
            is_active = links[id]['active']
            if not is_active:
                return
            # schedulers also got real timeout to respect
            timeout = links[id]['timeout']
            data_timeout = links[id]['data_timeout']

        # If we try to connect too much, we slow down our tests
        if self.is_connection_try_too_close(links[id]):
            return

        # Ok, we can now update it
        links[id]['last_connection'] = time.time()

        # DBG: print "Init connection with", links[id]['uri']
        running_id = links[id]['running_id']
        # DBG: print "Running id before connection", running_id
        uri = links[id]['uri']
        try:
            con = links[id]['con'] = HTTPClient(uri=uri,
                                                strong_ssl=links[id]['hard_ssl_name_check'],
                                                timeout=timeout, data_timeout=data_timeout)
        except HTTPExceptions, exp:
            # But the multiprocessing module is not compatible with it!
            # so we must disable it immediately after
            logger.info("Connection problem to the %s %s: %s", type, links[id]['name'], str(exp))
            links[id]['con'] = None
            return

        try:
            # initial ping must be quick
            con.get('ping')
            new_run_id = con.get('get_running_id')
            new_run_id = float(new_run_id)
            # data transfer can be longer

            # The schedulers have been restarted: it has a new run_id.
            # So we clear all verifs, they are obsolete now.
            if new_run_id != running_id:
                logger.debug("[%s] New running id for the %s %s: %s (was %s)",
                             self.name, type, links[id]['name'], new_run_id, running_id)
                del links[id]['broks'][:]
                # we must ask for a new full broks if
                # it's a scheduler
                if type == 'scheduler':
                    logger.debug("[%s] I ask for a broks generation to the scheduler %s",
                                 self.name, links[id]['name'])
                    con.get('fill_initial_broks', {'bname': self.name}, wait='long')
            # Ok all is done, we can save this new running id
            links[id]['running_id'] = new_run_id
        except HTTPExceptions, exp:
            logger.info("Connection problem to the %s %s: %s", type, links[id]['name'], str(exp))
            links[id]['con'] = None
            return
コード例 #7
0
class Shinken(BaseSatellite):

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='schedulerd.pid'),
        'port':      IntegerProp(default='7768'),
        'local_log': PathProp(default='schedulerd.log'),
    })

    # Create the shinken class:
    # Create a Pyro server (port = arvg 1)
    # then create the interface for arbiter
    # Then, it wait for a first configuration
    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''):

        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file)

        self.interface = IForArbiter(self)
        self.istats = IStats(self)
        self.sched = Scheduler(self)

        self.ichecks = None
        self.ibroks = None
        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}


    def do_stop(self):
        if self.http_daemon:
            if self.ibroks:
                self.http_daemon.unregister(self.ibroks)
            if self.ichecks:
                self.http_daemon.unregister(self.ichecks)
        super(Shinken, self).do_stop()


    def compensate_system_time_change(self, difference):
        """ Compensate a system time change of difference for all hosts/services/checks/notifs """
        logger.warning("A system time change of %d has been detected. Compensating..." % difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for h in self.sched.hosts:
            h.compensate_system_time_change(difference)
        for s in self.sched.services:
            s.compensate_system_time_change(difference)

        # Now all checks and actions
        for c in self.sched.checks.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled' and c.t_to_go is not None:
                t_to_go = c.t_to_go
                ref = c.ref
                new_t = max(0, t_to_go + difference)
                if ref.check_period is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for c in self.sched.actions.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go

                #  Event handler do not have ref
                ref = getattr(c, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if c.is_a == 'notification':
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        new_t = ref.notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    c.creation_time = c.creation_time + difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t

    def manage_signal(self, sig, frame):
        logger.warning("Received a SIGNAL %s" % sig)
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            self.sched.need_dump_memory = True
        elif sig == signal.SIGUSR2: #usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)


    def do_loop_turn(self):
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        logger.info("New configuration received")
        self.setup_new_conf()
        logger.info("New configuration loaded")
        self.sched.run()


    def setup_new_conf(self):
        pk = self.new_conf
        conf_raw = pk['conf']
        override_conf = pk['override_conf']
        modules = pk['modules']
        satellites = pk['satellites']
        instance_name = pk['instance_name']
        push_flavor = pk['push_flavor']
        skip_initial_broks = pk['skip_initial_broks']
        accept_passive_unknown_check_results = pk['accept_passive_unknown_check_results']
        
        # horay, we got a name, we can set it in our stats objects
        statsmgr.register(instance_name, 'scheduler')
        
        t0 = time.time()
        conf = cPickle.loads(conf_raw)
        logger.debug("Conf received at %d. Unserialized in %d secs" % (t0, time.time() - t0))
        self.new_conf = None

        # Tag the conf with our data
        self.conf = conf
        self.conf.push_flavor = push_flavor
        self.conf.instance_name = instance_name
        self.conf.skip_initial_broks = skip_initial_broks
        self.conf.accept_passive_unknown_check_results = accept_passive_unknown_check_results

        self.cur_conf = conf
        self.override_conf = override_conf
        self.modules = modules
        self.satellites = satellites
        #self.pollers = self.app.pollers

        if self.conf.human_timestamp_log:
            logger.set_human_format()

        # Now We create our pollers
        for pol_id in satellites['pollers']:
            # Must look if we already have it
            already_got = pol_id in self.pollers
            p = satellites['pollers'][pol_id]
            self.pollers[pol_id] = p

            if p['name'] in override_conf['satellitemap']:
                p = dict(p)  # make a copy
                p.update(override_conf['satellitemap'][p['name']])

            proto = 'http'
            if p['use_ssl']:
                proto = 'https'
            uri = '%s://%s:%s/' % (proto, p['address'], p['port'])
            self.pollers[pol_id]['uri'] = uri
            self.pollers[pol_id]['last_connection'] = 0

        # First mix conf and override_conf to have our definitive conf
        for prop in self.override_conf:
            #print "Overriding the property %s with value %s" % (prop, self.override_conf[prop])
            val = self.override_conf[prop]
            setattr(self.conf, prop, val)

        if self.conf.use_timezone != '':
            logger.debug("Setting our timezone to %s" % str(self.conf.use_timezone))
            os.environ['TZ'] = self.conf.use_timezone
            time.tzset()

        if len(self.modules) != 0:
            logger.debug("I've got %s modules" % str(self.modules))

        # TODO: if scheduler had previous modules instanciated it must clean them!
        self.modules_manager.set_modules(self.modules)
        self.do_load_modules()

        # give it an interface
        # But first remove previous interface if exists
        if self.ichecks is not None:
            logger.debug("Deconnecting previous Check Interface")
            self.http_daemon.unregister(self.ichecks)
        # Now create and connect it
        self.ichecks = IChecks(self.sched)
        self.http_daemon.register(self.ichecks)
        logger.debug("The Scheduler Interface uri is: %s" % self.uri)
        
        # Same for Broks
        if self.ibroks is not None:
            logger.debug("Deconnecting previous Broks Interface")
            self.http_daemon.unregister(self.ibroks)
        # Create and connect it
        self.ibroks = IBroks(self.sched)
        self.http_daemon.register(self.ibroks)

        logger.info("Loading configuration.")
        self.conf.explode_global_conf()

        # we give sched it's conf
        self.sched.reset()
        self.sched.load_conf(self.conf)
        self.sched.load_satellites(self.pollers, self.reactionners)

        # We must update our Config dict macro with good value
        # from the config parameters
        self.sched.conf.fill_resource_macros_names_macros()
        #print "DBG: got macros", self.sched.conf.macros

        # Creating the Macroresolver Class & unique instance
        m = MacroResolver()
        m.init(self.conf)
        
        #self.conf.dump()
        #self.conf.quick_debug()
        
        # Now create the external commander
        # it's a applyer: it role is not to dispatch commands,
        # but to apply them
        e = ExternalCommandManager(self.conf, 'applyer')
        
        # Scheduler need to know about external command to
        # activate it if necessary
        self.sched.load_external_command(e)

        # External command need the sched because he can raise checks
        e.load_scheduler(self.sched)

        # We clear our schedulers managed (it's us :) )
        # and set ourself in it
        self.schedulers = {self.conf.instance_id: self.sched}


    # Give the arbiter the data about what I manage
    # for me it's just my instance_id and my push flavor
    def what_i_managed(self):
        if hasattr(self, 'conf'):
            return {self.conf.instance_id: self.conf.push_flavor}
        else:
            return {}

    # our main function, launch after the init
    def main(self):
        try:
            self.load_config_file()
            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            self.load_modules_manager()
            self.http_daemon.register(self.interface)
            self.http_daemon.register(self.istats)

            #self.inject = Injector(self.sched)
            #self.http_daemon.register(self.inject)

            self.http_daemon.unregister(self.interface)
            self.uri = self.http_daemon.uri
            logger.info("[scheduler] General interface is at: %s" % self.uri)
            self.do_mainloop()
        except Exception, exp:
            logger.critical("I got an unrecoverable error. I have to exit")
            logger.critical("You can log a bug ticket at https://github.com/naparuba/shinken/issues/new to get help")
            logger.critical("Back trace of it: %s" % (traceback.format_exc()))
            raise
コード例 #8
0
class Broker(BaseSatellite):

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile': PathProp(default='brokerd.pid'),
        'port': IntegerProp(default='7772'),
        'local_log': PathProp(default='brokerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):

        super(Broker, self).__init__('broker', config_file, is_daemon,
                                     do_replace, debug, debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers and reactionners
        self.pollers = {}
        self.reactionners = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands given by modules
        # will be processed by arbiter
        self.external_commands = []

        # All broks to manage
        self.broks = []  # broks to manage
        # broks raised this turn and that needs to be put in self.broks
        self.broks_internal_raised = []

        self.timeout = 1.0

    # Schedulers have some queues. We can simplify the call by adding
    # elements into the proper queue just by looking at their type
    # Brok -> self.broks
    # TODO: better tag ID?
    # External commands -> self.external_commands
    def add(self, elt):
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.instance_id = 0
            self.broks_internal_raised.append(elt)
            return
        elif cls_type == 'externalcommand':
            logger.debug("Enqueuing an external command '%s'" %
                         str(ExternalCommand.__dict__))
            self.external_commands.append(elt)
        # Maybe we got a Message from the modules, it's way to ask something
        # like from now a full data from a scheduler for example.
        elif cls_type == 'message':
            # We got a message, great!
            logger.debug(str(elt.__dict__))
            if elt.get_type() == 'NeedData':
                data = elt.get_data()
                # Full instance id means: I got no data for this scheduler
                # so give me all dumbass!
                if 'full_instance_id' in data:
                    c_id = data['full_instance_id']
                    source = elt.source
                    logger.info(
                        'The module %s is asking me to get all initial data from the scheduler %d'
                        % (source, c_id))
                    # so we just reset the connection and the running_id, it will just get all new things
                    try:
                        self.schedulers[c_id]['con'] = None
                        self.schedulers[c_id]['running_id'] = 0
                    except KeyError:  # maybe this instance was not known, forget it
                        logger.warning(
                            "the module %s ask me a full_instance_id for an unknown ID (%d)!"
                            % (source, c_id))
            # Maybe a module tells me that it's dead, I must log it's last words...
            if elt.get_type() == 'ICrash':
                data = elt.get_data()
                logger.error(
                    'the module %s just crash! Please look at the traceback:' %
                    data['name'])
                logger.error(data['trace'])

                # The module death will be looked for elsewhere and restarted.

    # Get the good tabs for links by the kind. If unknown, return None
    def get_links_from_type(self, type):
        t = {'scheduler': self.schedulers, 'arbiter': self.arbiters, \
             'poller': self.pollers, 'reactionner': self.reactionners}
        if type in t:
            return t[type]
        return None

    # Call by arbiter to get our external commands
    def get_external_commands(self):
        res = self.external_commands
        self.external_commands = []
        return res

    # Check if we do not connect to often to this
    def is_connection_try_too_close(self, elt):
        now = time.time()
        last_connection = elt['last_connection']
        if now - last_connection < 5:
            return True
        return False

    # initialize or re-initialize connection with scheduler or
    # arbiter if type == arbiter
    def pynag_con_init(self, id, type='scheduler'):
        # Get the good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.debug('Type unknown for connection! %s' % type)
            return

        if type == 'scheduler':
            # If sched is not active, I do not try to init
            # it is just useless
            is_active = links[id]['active']
            if not is_active:
                return

        # If we try to connect too much, we slow down our tests
        if self.is_connection_try_too_close(links[id]):
            return

        # Ok, we can now update it
        links[id]['last_connection'] = time.time()

        # DBG: print "Init connection with", links[id]['uri']
        running_id = links[id]['running_id']
        # DBG: print "Running id before connection", running_id
        uri = links[id]['uri']

        try:
            socket.setdefaulttimeout(3)
            links[id]['con'] = Pyro.core.getProxyForURI(uri)
            socket.setdefaulttimeout(None)
        except Pyro_exp_pack, exp:
            # But the multiprocessing module is not compatible with it!
            # so we must disable it immediately after
            socket.setdefaulttimeout(None)
            logger.info("Connection problem to the %s %s: %s" %
                        (type, links[id]['name'], str(exp)))
            links[id]['con'] = None
            return

        try:
            # initial ping must be quick
            pyro.set_timeout(links[id]['con'], 5)
            links[id]['con'].ping()
            new_run_id = links[id]['con'].get_running_id()
            # data transfer can be longer
            pyro.set_timeout(links[id]['con'], 120)

            # The schedulers have been restarted: it has a new run_id.
            # So we clear all verifs, they are obsolete now.
            if new_run_id != running_id:
                logger.debug("[%s] New running id for the %s %s: %s (was %s)" %
                             (self.name, type, links[id]['name'], new_run_id,
                              running_id))
                links[id]['broks'].clear()
                # we must ask for a new full broks if
                # it's a scheduler
                if type == 'scheduler':
                    logger.debug(
                        "[%s] I ask for a broks generation to the scheduler %s"
                        % (self.name, links[id]['name']))
                    links[id]['con'].fill_initial_broks(self.name)
            # Ok all is done, we can save this new running id
            links[id]['running_id'] = new_run_id
        except Pyro_exp_pack, exp:
            logger.info("Connection problem to the %s %s: %s" %
                        (type, links[id]['name'], str(exp)))
            links[id]['con'] = None
            return
コード例 #9
0
ファイル: schedulerdaemon.py プロジェクト: zoranzaric/shinken
class Shinken(BaseSatellite):

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='/usr/local/shinken/var/schedulerd.pid'),
        'port':      IntegerProp(default='7768'),
        'local_log': PathProp(default='/usr/local/shinken/var/schedulerd.log'),
    })
    
    
    #Create the shinken class:
    #Create a Pyro server (port = arvg 1)
    #then create the interface for arbiter
    #Then, it wait for a first configuration
    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):
        
        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file)

        self.interface = IForArbiter(self)
        self.sched = Scheduler(self)
        
        self.ichecks = None
        self.ibroks = None
        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}


    def do_stop(self):
        self.pyro_daemon.unregister(self.ibroks)
        self.pyro_daemon.unregister(self.ichecks)
        super(Shinken, self).do_stop()


    def compensate_system_time_change(self, difference):
        """ Compensate a system time change of difference for all hosts/services/checks/notifs """
        logger.log('Warning: A system time change of %d has been detected.  Compensating...' % difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        # Then we compasate all host/services
        for h in self.sched.hosts:
            h.compensate_system_time_change(difference)
        for s in self.sched.services:
            s.compensate_system_time_change(difference)

        # Now all checks and actions
        for c in self.sched.checks.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go
                ref = c.ref
                new_t = max(0, t_to_go + difference)
                # But it's no so simple, we must match the timeperiod
                new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for c in self.sched.actions.values():
            # Already launch checks should not be touch
            if c.status == 'scheduled':
                t_to_go = c.t_to_go

                #  Event handler do not have ref
                ref = getattr(c, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if c.is_a == 'notification':
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    c.creation_time = c.creation_time + difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    c.state = 'waitconsume'
                    c.exit_status = 2
                    c.output = '(Error: there is no available check time after time change!)'
                    c.check_time = time.time()
                    c.execution_time = 0
                else:
                    c.t_to_go = new_t


    def manage_signal(self, sig, frame):
        # If we got USR1, just dump memory
        if sig == 10:
            self.sched.need_dump_memory = True
        else: # if not, die :)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)


    def do_loop_turn(self):        
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        print "Ok we've got conf"
        self.setup_new_conf()
        print "Configuration Loaded"
        self.sched.run()


    def setup_new_conf(self):
        #self.use_ssl = self.app.use_ssl
        (conf, override_conf, modules, satellites) = self.new_conf
        self.new_conf = None
        
        # In fact it make the scheduler just DIE as a bad guy. 
        # Must manage it better or not manage it at all!
        #if self.cur_conf and self.cur_conf.magic_hash == conf.magic_hash:
        #    print("I received a conf with same hash than me, I skip it.")
        #    return
        
        self.conf = conf
        self.cur_conf = conf
        self.override_conf = override_conf
        self.modules = modules
        self.satellites = satellites
        #self.pollers = self.app.pollers

        # Now We create our pollers
        for pol_id in satellites['pollers']:
            # Must look if we already have it
            already_got = pol_id in self.pollers
            p = satellites['pollers'][pol_id]
            self.pollers[pol_id] = p
            uri = pyro.create_uri(p['address'], p['port'], 'Schedulers', self.use_ssl)
            self.pollers[pol_id]['uri'] = uri
            self.pollers[pol_id]['last_connexion'] = 0
            print "Got a poller", p

        #First mix conf and override_conf to have our definitive conf
        for prop in self.override_conf:
            print "Overriding the property %s with value %s" % (prop, self.override_conf[prop])
            val = self.override_conf[prop]
            setattr(self.conf, prop, val)

        if self.conf.use_timezone != 'NOTSET':
            print "Setting our timezone to", self.conf.use_timezone
            os.environ['TZ'] = self.conf.use_timezone
            time.tzset()

        print "I've got modules", self.modules

        # TODO: if scheduler had previous modules instanciated it must clean them !
        self.modules_manager.set_modules(self.modules)
        self.do_load_modules()
        # And start external ones too
        self.modules_manager.start_external_instances()
        
        # give it an interface
        # But first remove previous interface if exists
        if self.ichecks is not None:
            print "Deconnecting previous Check Interface from pyro_daemon"
            self.pyro_daemon.unregister(self.ichecks)
        #Now create and connect it
        self.ichecks = IChecks(self.sched)
        self.uri = self.pyro_daemon.register(self.ichecks, "Checks")
        print "The Checks Interface uri is:", self.uri

        #Same for Broks
        if self.ibroks is not None:
            print "Deconnecting previous Broks Interface from pyro_daemon"
            self.pyro_daemon.unregister(self.ibroks)
        #Create and connect it
        self.ibroks = IBroks(self.sched)
        self.uri2 = self.pyro_daemon.register(self.ibroks, "Broks")
        print "The Broks Interface uri is:", self.uri2

        print("Loading configuration..")
        self.conf.explode_global_conf()
        
        #we give sched it's conf
        self.sched.reset()
        self.sched.load_conf(self.conf)
        self.sched.load_satellites(self.pollers, self.reactionners)

        #We must update our Config dict macro with good value
        #from the config parameters
        self.sched.conf.fill_resource_macros_names_macros()
        #print "DBG: got macors", self.sched.conf.macros

        #Creating the Macroresolver Class & unique instance
        m = MacroResolver()
        m.init(self.conf)

        #self.conf.dump()
        #self.conf.quick_debug()

        #Now create the external commander
        #it's a applyer : it role is not to dispatch commands,
        #but to apply them
        e = ExternalCommandManager(self.conf, 'applyer')

        #Scheduler need to know about external command to
        #activate it if necessery
        self.sched.load_external_command(e)

        #External command need the sched because he can raise checks
        e.load_scheduler(self.sched)


    # our main function, launch after the init
    def main(self):

        self.load_config_file()
        
        self.do_daemon_init_and_start()
        self.uri2 = self.pyro_daemon.register(self.interface, "ForArbiter")
        print "The Arbiter Interface is at:", self.uri2
        
        self.do_mainloop()
            
コード例 #10
0
class Broker(BaseSatellite):

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':
        PathProp(default='/usr/local/shinken/var/brokerd.pid'),
        'port':
        IntegerProp(default='7772'),
        'local_log':
        PathProp(default='/usr/local/shinken/var/brokerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):

        super(Broker, self).__init__('broker', config_file, is_daemon,
                                     do_replace, debug, debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers and reactionners
        self.pollers = {}
        self.reactionners = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands give by modules
        # will be taken by arbiter to process
        self.external_commands = []

        # All broks to manage
        self.broks = []  # broks to manage
        # broks raised this turn and that need to be put in self.broks
        self.broks_internal_raised = []

        self.timeout = 1.0

    # Schedulers have some queues. We can simplify call by adding
    # elements into the proper queue just by looking at their type
    # Brok -> self.broks
    # TODO : better tag ID?
    # External commands -> self.external_commands
    def add(self, elt):
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.data['instance_id'] = 0
            self.broks_internal_raised.append(elt)
            return
        elif cls_type == 'externalcommand':
            print "Adding in queue an external command", ExternalCommand.__dict__
            self.external_commands.append(elt)
        # Maybe we got a Message from the modules, it's way to ask something
        #like from now a full data from a scheduler for example.
        elif cls_type == 'message':
            # We got a message, great!
            print elt.__dict__
            if elt.get_type() == 'NeedData':
                data = elt.get_data()
                # Full instance id mean : I got no data for this scheduler
                # so give me all dumbass!
                if 'full_instance_id' in data:
                    c_id = data['full_instance_id']
                    logger.log(
                        'A module is asking me to get all initial data from the scheduler %d'
                        % c_id)
                    # so we just reset the connexion adn the running_id, it will just get all new things
                    self.schedulers[c_id]['con'] = None
                    self.schedulers[c_id]['running_id'] = 0

    # Get teh good tabs for links by the kind. If unknown, return None
    def get_links_from_type(self, type):
        t = {'scheduler' : self.schedulers, 'arbiter' : self.arbiters, \
             'poller' : self.pollers, 'reactionner' : self.reactionners}
        if type in t:
            return t[type]
        return None

    # Call by arbiter to get our external commands
    def get_external_commands(self):
        res = self.external_commands
        self.external_commands = []
        return res

    # Check if we do not connect to ofthen to this
    def is_connexion_try_too_close(self, elt):
        now = time.time()
        last_connexion = elt['last_connexion']
        if now - last_connexion < 5:
            return True
        return False

    # initialise or re-initialise connexion with scheduler or
    # arbiter if type == arbiter
    def pynag_con_init(self, id, type='scheduler'):
        # Get teh good links tab for looping..
        links = self.get_links_from_type(type)
        if links is None:
            logger.log('DBG: Type unknown for connexion! %s' % type)
            return

        if type == 'scheduler':
            # If sched is not active, I do not try to init
            # it is just useless
            is_active = links[id]['active']
            if not is_active:
                return

        # If we try to connect too much, we slow down our tests
        if self.is_connexion_try_too_close(links[id]):
            return

        # Ok, we can now update it
        links[id]['last_connexion'] = time.time()

        # DBG: print "Init connexion with", links[id]['uri']
        running_id = links[id]['running_id']
        # DBG: print "Running id before connexion", running_id
        uri = links[id]['uri']
        links[id]['con'] = Pyro.core.getProxyForURI(uri)

        try:
            # intial ping must be quick
            pyro.set_timeout(links[id]['con'], 5)
            links[id]['con'].ping()
            new_run_id = links[id]['con'].get_running_id()
            # data transfert can be longer
            pyro.set_timeout(links[id]['con'], 120)

            # The schedulers have been restart : it has a new run_id.
            # So we clear all verifs, they are obsolete now.
            if new_run_id != running_id:
                print "[%s] New running id for the %s %s : %s (was %s)" % (
                    self.name, type, links[id]['name'], new_run_id, running_id)
                links[id]['broks'].clear()
                # we must ask for a enw full broks if
                # it's a scheduler
                if type == 'scheduler':
                    print "[%s] I ask for a broks generation to the scheduler %s" % (
                        self.name, links[id]['name'])
                    links[id]['con'].fill_initial_broks()
            # else:
            #     print "I do nto ask for brok generation"
            links[id]['running_id'] = new_run_id
        except (Pyro.errors.ProtocolError,
                Pyro.errors.CommunicationError), exp:
            logger.log("[%s] Connexion problem to the %s %s : %s" %
                       (self.name, type, links[id]['name'], str(exp)))
            links[id]['con'] = None
            return
        except Pyro.errors.NamingError, exp:
            logger.log("[%s] the %s '%s' is not initilised : %s" %
                       (self.name, type, links[id]['name'], str(exp)))
            links[id]['con'] = None
            return
コード例 #11
0
class Daemon(object):

    properties = {
        'workdir': PathProp(default='/usr/local/shinken/var'),
        'host': StringProp(default='0.0.0.0'),
        'user': StringProp(default='shinken'),
        'group': StringProp(default='shinken'),
        'use_ssl': BoolProp(default='0'),
        'certs_dir': StringProp(default='etc/certs'),
        'ca_cert': StringProp(default='etc/certs/ca.pem'),
        'server_cert': StringProp(default='etc/certs/server.pem'),
        'use_local_log': BoolProp(default='0'),
        'hard_ssl_name_check': BoolProp(default='0'),
        'idontcareaboutsecurity': BoolProp(default='0'),
        'spare': BoolProp(default='0')
    }

    def __init__(self, name, config_file, is_daemon, do_replace, debug,
                 debug_file):

        self.check_shm()

        self.name = name
        self.config_file = config_file
        self.is_daemon = is_daemon
        self.do_replace = do_replace
        self.debug = debug
        self.debug_file = debug_file
        self.interrupted = False

        # Track time
        now = time.time()
        self.program_start = now
        self.t_each_loop = now  # used to track system time change
        self.sleep_time = 0.0  #used to track the time we wait

        self.pyro_daemon = None

        # Log init
        self.log = logger
        self.log.load_obj(self)

        self.new_conf = None  # used by controller to push conf
        self.cur_conf = None

        # Flag to know if we need to dump memory or not
        self.need_dump_memory = False

        #Keep a trace of the local_log file desc if need
        self.local_log_fd = None

        self.modules_manager = ModulesManager(name, self.find_modules_path(),
                                              [])

        os.umask(UMASK)
        self.set_exit_handler()

    # At least, lose the local log file if need
    def do_stop(self):
        if self.modules_manager:
            # We save what we can but NOT for the scheduler
            # because the current sched object is a dummy one
            # and the old one aleady do it!
            if not hasattr(self, 'sched'):
                self.hook_point('save_retention')
            # And we quit
            logger.log('Stopping all modules')
            self.modules_manager.stop_all()
        if self.pyro_daemon:
            pyro.shutdown(self.pyro_daemon)  #.shutdown(True)
        logger.quit()

    def request_stop(self):
        self.unlink()  ## unlink first
        self.do_stop()
        print("Exiting")
        sys.exit(0)

    def do_loop_turn(self):
        raise NotImplementedError()

    # Main loop for nearly all daemon
    # the scheduler is not managed by it :'(
    def do_mainloop(self):
        while True:
            self.do_loop_turn()
            # If ask us to dump memory, do it
            if self.need_dump_memory:
                self.dump_memory()
                self.need_dump_memory = False
            # Maybe we ask us to die, if so, do it :)
            if self.interrupted:
                break
        self.request_stop()

    def do_load_modules(self):
        self.modules_manager.load_and_init()
        self.log.log("I correctly loaded the modules : [%s]" % (','.join(
            [inst.get_name() for inst in self.modules_manager.instances])))

    def add(self, elt):
        """ Dummy method for adding broker to this daemon """
        pass

    def dump_memory(self):
        logger.log("I dump my memory, it can ask some seconds to do")
        try:
            from guppy import hpy
            hp = hpy()
            logger.log(hp.heap())
        except ImportError:
            logger.log(
                'I do not have the module guppy for memory dump, please install it'
            )

    def load_config_file(self):
        self.parse_config_file()
        if self.config_file is not None:
            # Some paths can be relatives. We must have a full path by taking
            # the config file by reference
            self.relative_paths_to_full(os.path.dirname(self.config_file))
        # Then start to log all in the local file if asked so
        self.register_local_log()

    def change_to_workdir(self):
        try:
            os.chdir(self.workdir)
        except Exception, e:
            raise InvalidWorkDir(e)
        print("Successfully changed to workdir: %s" % (self.workdir))