Ejemplo n.º 1
0
    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 profile=''):

        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon,
                               do_replace, debug, debug_file)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}
Ejemplo n.º 2
0
    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 port=None,
                 local_log=None,
                 daemon_name=None):
        self.daemon_name = 'scheduler'
        if daemon_name:
            self.daemon_name = daemon_name

        BaseSatellite.__init__(self, self.daemon_name, config_file, is_daemon,
                               do_replace, debug, debug_file, port, local_log)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_broks_send = 0
        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}
Ejemplo n.º 3
0
    def __init__(self, **kwargs):
        """Scheduler daemon initialisation

        :param kwargs: command line arguments
        """
        super(Alignak,
              self).__init__(kwargs.get('daemon_name', 'Default-scheduler'),
                             **kwargs)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        self.brokers = {}
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # This because it is the Satellite that has thes properties and I am a Satellite
        # todo: change this?
        # Broks are stored in each broker link, not locally
        # self.broks = []
        self.broks_lock = threading.RLock()

        # Modules are only loaded one time
        self.have_modules = False

        self.first_scheduling = False
Ejemplo n.º 4
0
    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''):

        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug,
                               debug_file)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}
Ejemplo n.º 5
0
    def __init__(self, **kwargs):
        """Scheduler daemon initialisation

        :param kwargs: command line arguments
        """
        super(Alignak, self).__init__(kwargs.get('daemon_name', 'Default-scheduler'), **kwargs)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        self.brokers = {}
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # This because it is the Satellite that has thes properties and I am a Satellite
        # todo: change this?
        # Broks are stored in each broker link, not locally
        # self.broks = []
        self.broks_lock = threading.RLock()

        # Modules are only loaded one time
        self.have_modules = False

        self.first_scheduling = False
Ejemplo n.º 6
0
class Alignak(BaseSatellite):
    # pylint: disable=too-many-instance-attributes
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'type': StringProp(default='scheduler'),
        'port': IntegerProp(default=7768)
    })

    def __init__(self, **kwargs):
        """Scheduler daemon initialisation

        :param kwargs: command line arguments
        """
        super(Alignak,
              self).__init__(kwargs.get('daemon_name', 'Default-scheduler'),
                             **kwargs)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        self.brokers = {}
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # This because it is the Satellite that has thes properties and I am a Satellite
        # todo: change this?
        # Broks are stored in each broker link, not locally
        # self.broks = []
        self.broks_lock = threading.RLock()

        # Modules are only loaded one time
        self.have_modules = False

        self.first_scheduling = False

    def get_broks(self, broker_name):
        """Send broks to a specific broker

        :param broker_name: broker name to send broks
        :type broker_name: str
        :greturn: dict of brok for this broker
        :rtype: dict[alignak.brok.Brok]
        """
        logger.debug("Broker %s requests my broks list", broker_name)
        res = []
        if not broker_name:
            return res

        for broker_link in list(self.brokers.values()):
            if broker_name == broker_link.name:
                for brok in sorted(broker_link.broks,
                                   key=lambda x: x.creation_time):
                    # Only provide broks that did not yet sent to our external modules
                    if getattr(brok, 'sent_to_externals', False):
                        res.append(brok)
                        brok.got = True
                broker_link.broks = [
                    b for b in broker_link.broks
                    if not getattr(b, 'got', False)
                ]
                logger.debug("Providing %d broks to %s", len(res), broker_name)
                break
        else:
            logger.warning("Got a brok request from an unknown broker: %s",
                           broker_name)

        return res

    def compensate_system_time_change(self, difference):  # pragma: no cover,
        # pylint: disable=too-many-branches
        # not with unit tests
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        super(Alignak, self).compensate_system_time_change(difference)

        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in list(self.sched.checks.values()):
            # Already launch checks should not be touch
            if chk.status == u'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = self.sched.find_item_by_id(chk.ref)
                new_t = max(0, t_to_go + difference)
                timeperiod = self.sched.timeperiods[ref.check_period]
                if timeperiod is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = timeperiod.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = u'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in list(self.sched.actions.values()):
            # Already launch checks should not be touch
            if act.status == u'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref_id = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == u'notification':
                    ref = self.sched.find_item_by_id(ref_id)
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        notification_period = self.sched.timeperiods[
                            ref.notification_period]
                        new_t = notification_period.get_next_valid_time_from_t(
                            new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def do_before_loop(self):
        """Stop the scheduling process"""
        if self.sched:
            self.sched.stop_scheduling()

    def do_loop_turn(self):
        """Scheduler loop turn

        Simply run the Alignak scheduler loop

        This is called when a configuration got received by the scheduler daemon. As of it,
        check if the first scheduling has been done... and manage this.

        :return: None
        """
        if not self.first_scheduling:
            # Ok, now all is initialized, we can make the initial broks
            logger.info("First scheduling launched")
            _t0 = time.time()
            # Program start brok
            self.sched.initial_program_status()
            # First scheduling
            self.sched.schedule()
            statsmgr.timer('first_scheduling', time.time() - _t0)
            logger.info("First scheduling done")

            # Connect to our passive satellites if needed
            for satellite in [
                    s for s in list(self.pollers.values()) if s.passive
            ]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s",
                                 satellite)

            for satellite in [
                    s for s in list(self.reactionners.values()) if s.passive
            ]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s",
                                 satellite)

            # Ticks are for recurrent function call like consume, del zombies etc
            self.sched.ticks = 0
            self.first_scheduling = True

        # Each loop turn, execute the daemon specific treatment...
        # only if the daemon has a configuration to manage
        if self.sched.pushed_conf:
            # If scheduling is not yet enabled, enable scheduling
            if not self.sched.must_schedule:
                self.sched.start_scheduling()
                self.sched.before_run()
            self.sched.run()
        else:
            logger.warning("#%d - No monitoring configuration to scheduler...",
                           self.loop_count)

    def get_managed_configurations(self):
        """Get the configurations managed by this scheduler

        The configuration managed by a scheduler is the self configuration got
        by the scheduler during the dispatching.

        :return: a dict of scheduler links with instance_id as key and
        hash, push_flavor and configuration identifier as values
        :rtype: dict
        """
        # for scheduler_link in list(self.schedulers.values()):
        #     res[scheduler_link.instance_id] = {
        #         'hash': scheduler_link.hash,
        #         'push_flavor': scheduler_link.push_flavor,
        #         'managed_conf_id': scheduler_link.managed_conf_id
        #     }

        res = {}
        if self.sched.pushed_conf and self.cur_conf and 'instance_id' in self.cur_conf:
            res[self.cur_conf['instance_id']] = {
                'hash': self.cur_conf['hash'],
                'push_flavor': self.cur_conf['push_flavor'],
                'managed_conf_id': self.cur_conf['managed_conf_id']
            }
        logger.debug("Get managed configuration: %s", res)
        return res

    def setup_new_conf(self):
        # pylint: disable=too-many-statements, too-many-branches, too-many-locals
        """Setup new conf received for scheduler

        :return: None
        """
        # Execute the base class treatment...
        super(Alignak, self).setup_new_conf()

        # ...then our own specific treatment!
        with self.conf_lock:
            # self_conf is our own configuration from the alignak environment
            # self_conf = self.cur_conf['self_conf']
            logger.debug("Got config: %s", self.cur_conf)
            if 'conf_part' not in self.cur_conf:
                self.cur_conf['conf_part'] = None
            conf_part = self.cur_conf['conf_part']

            # Ok now we can save the retention data
            if self.sched.pushed_conf is not None:
                self.sched.update_retention()

            # Get the monitored objects configuration
            t00 = time.time()
            received_conf_part = None
            try:
                received_conf_part = unserialize(conf_part)
                assert received_conf_part is not None
            except AssertionError as exp:
                # This to indicate that no configuration is managed by this scheduler...
                logger.warning(
                    "No managed configuration received from arbiter")
            except AlignakClassLookupException as exp:  # pragma: no cover
                # This to indicate that the new configuration is not managed...
                self.new_conf = {
                    "_status":
                    "Cannot un-serialize configuration received from arbiter",
                    "_error": str(exp)
                }
                logger.error(self.new_conf)
                logger.error("Back trace of the error:\n%s",
                             traceback.format_exc())
                return
            except Exception as exp:  # pylint: disable=broad-except
                # This to indicate that the new configuration is not managed...
                self.new_conf = {
                    "_status":
                    "Cannot un-serialize configuration received from arbiter",
                    "_error": str(exp)
                }
                logger.error(self.new_conf)
                self.exit_on_exception(exp, str(self.new_conf))

            # if not received_conf_part:
            #     return

            logger.info(
                "Monitored configuration %s received at %d. Un-serialized in %d secs",
                received_conf_part, t00,
                time.time() - t00)
            logger.info("Scheduler received configuration : %s",
                        received_conf_part)

            # Now we create our pollers, reactionners and brokers
            for link_type in ['pollers', 'reactionners', 'brokers']:
                if link_type not in self.cur_conf['satellites']:
                    logger.error("Missing %s in the configuration!", link_type)
                    continue

                my_satellites = getattr(self, link_type, {})
                received_satellites = self.cur_conf['satellites'][link_type]
                for link_uuid in received_satellites:
                    rs_conf = received_satellites[link_uuid]
                    logger.debug("- received %s - %s: %s",
                                 rs_conf['instance_id'], rs_conf['type'],
                                 rs_conf['name'])

                    # Must look if we already had a configuration and save our broks
                    already_got = rs_conf['instance_id'] in my_satellites
                    broks = []
                    actions = {}
                    wait_homerun = {}
                    external_commands = {}
                    running_id = 0
                    if already_got:
                        logger.warning("I already got: %s",
                                       rs_conf['instance_id'])
                        # Save some information
                        running_id = my_satellites[link_uuid].running_id
                        (broks, actions,
                         wait_homerun, external_commands) = \
                            my_satellites[link_uuid].get_and_clear_context()
                        # Delete the former link
                        del my_satellites[link_uuid]

                    # My new satellite link...
                    new_link = SatelliteLink.get_a_satellite_link(
                        link_type[:-1], rs_conf)
                    my_satellites[new_link.uuid] = new_link
                    logger.info("I got a new %s satellite: %s", link_type[:-1],
                                new_link)

                    new_link.running_id = running_id
                    new_link.external_commands = external_commands
                    new_link.broks = broks
                    new_link.wait_homerun = wait_homerun
                    new_link.actions = actions

                    # Replacing the satellite address and port by those defined in satellite_map
                    if new_link.name in self.cur_conf['override_conf'].get(
                            'satellite_map', {}):
                        override_conf = self.cur_conf['override_conf']
                        overriding = override_conf.get('satellite_map')[
                            new_link.name]
                        logger.warning(
                            "Do not override the configuration for: %s, with: %s. "
                            "Please check whether this is necessary!",
                            new_link.name, overriding)

            # First mix conf and override_conf to have our definitive conf
            for prop in getattr(self.cur_conf, 'override_conf', []):
                logger.debug("Overriden: %s / %s ", prop,
                             getattr(received_conf_part, prop, None))
                logger.debug("Overriding: %s / %s ", prop,
                             self.cur_conf['override_conf'])
                setattr(received_conf_part, prop,
                        self.cur_conf['override_conf'].get(prop, None))

            # Scheduler modules
            if not self.have_modules:
                try:
                    logger.debug("Modules configuration: %s",
                                 self.cur_conf['modules'])
                    self.modules = unserialize(self.cur_conf['modules'],
                                               no_load=True)
                except AlignakClassLookupException as exp:  # pragma: no cover, simple protection
                    logger.error(
                        'Cannot un-serialize modules configuration '
                        'received from arbiter: %s', exp)
                if self.modules:
                    logger.debug("I received some modules configuration: %s",
                                 self.modules)
                    self.have_modules = True

                    self.do_load_modules(self.modules)
                    # and start external modules too
                    self.modules_manager.start_external_instances()
                else:
                    logger.info("I do not have modules")

            if received_conf_part:
                logger.info("Loading configuration...")

                # Propagate the global parameters to the configuration items
                received_conf_part.explode_global_conf()

                # We give the configuration to our scheduler
                self.sched.reset()
                self.sched.load_conf(self.cur_conf['instance_id'],
                                     self.cur_conf['instance_name'],
                                     received_conf_part)

                # Once loaded, the scheduler has an inner pushed_conf object
                logger.info("Loaded: %s", self.sched.pushed_conf)

                # Update the scheduler ticks according to the daemon configuration
                self.sched.update_recurrent_works_tick(self)

                # We must update our pushed configuration macros with correct values
                # from the configuration parameters
                # self.sched.pushed_conf.fill_resource_macros_names_macros()

                # Creating the Macroresolver Class & unique instance
                m_solver = MacroResolver()
                m_solver.init(received_conf_part)

                # Now create the external commands manager
                # We are an applyer: our role is not to dispatch commands, but to apply them
                ecm = ExternalCommandManager(
                    received_conf_part, 'applyer', self.sched,
                    received_conf_part.accept_passive_unknown_check_results,
                    received_conf_part.log_external_commands)

                # Scheduler needs to know about this external command manager to use it if necessary
                self.sched.external_commands_manager = ecm

                # Ok now we can load the retention data
                self.sched.retention_load()

                # Log hosts/services initial states
                self.sched.log_initial_states()

            # Create brok new conf
            brok = Brok({'type': 'new_conf', 'data': {}})
            self.sched.add_brok(brok)

            # Initialize connection with all our satellites
            logger.info("Initializing connection with my satellites:")
            my_satellites = self.get_links_of_type(s_type='')
            for satellite in list(my_satellites.values()):
                logger.info("- : %s/%s", satellite.type, satellite.name)
                if not self.daemon_connection_init(satellite):
                    logger.error("Satellite connection failed: %s", satellite)

            if received_conf_part:
                # Enable the scheduling process
                logger.info("Loaded: %s", self.sched.pushed_conf)
                self.sched.start_scheduling()

        # Now I have a configuration!
        self.have_conf = True

    def clean_previous_run(self):
        """Clean variables from previous configuration

        :return: None
        """
        # Execute the base class treatment...
        super(Alignak, self).clean_previous_run()

        # Clean all lists
        self.pollers.clear()
        self.reactionners.clear()
        self.brokers.clear()

    def get_daemon_stats(self, details=False):
        """Increase the stats provided by the Daemon base class

        :return: stats dictionary
        :rtype: dict
        """
        # Call the base Daemon one
        res = super(Alignak, self).get_daemon_stats(details=details)

        res.update({
            'name': self.name,
            'type': self.type,
            'monitored_objects': {}
        })

        counters = res['counters']

        # Satellites counters
        counters['brokers'] = len(self.brokers)
        counters['pollers'] = len(self.pollers)
        counters['reactionners'] = len(self.reactionners)
        counters['receivers'] = len(self.receivers)

        if not self.sched:
            return res

        # # Hosts/services problems counters
        # m_solver = MacroResolver()
        # counters['hosts_problems'] = m_solver._get_total_host_problems()
        # counters['hosts_unhandled_problems'] = m_solver._get_total_host_problems_unhandled()
        # counters['services_problems'] = m_solver._get_total_service_problems()
        # counters['services_unhandled_problems'] = m_solver._get_total_service_problems_unhandled()

        # Get statistics from the scheduler
        scheduler_stats = self.sched.get_scheduler_stats(details=details)
        res['counters'].update(scheduler_stats['counters'])
        scheduler_stats.pop('counters')
        res.update(scheduler_stats)

        return res

    def get_monitoring_problems(self):
        """Get the current scheduler livesynthesis

        :return: live synthesis and problems dictionary
        :rtype: dict
        """
        res = {}
        if not self.sched:
            return res

        # Get statistics from the scheduler
        scheduler_stats = self.sched.get_scheduler_stats(details=True)
        if 'livesynthesis' in scheduler_stats:
            res['livesynthesis'] = scheduler_stats['livesynthesis']
        if 'problems' in scheduler_stats:
            res['problems'] = scheduler_stats['problems']

        return res

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            # Start the daemon mode
            if not self.do_daemon_init_and_start():
                self.exit_on_error(message="Daemon initialization error",
                                   exit_code=3)

            #  We wait for initial conf
            self.wait_for_initial_conf()
            if self.new_conf:
                # Setup the received configuration
                self.setup_new_conf()

                # Now the main loop
                self.do_main_loop()
                logger.info("Exited from the main loop.")

                # On main loop exit, call the scheduler after run process
                self.sched.after_run()

            self.request_stop()
        except Exception:  # pragma: no cover, this should never happen indeed ;)
            self.exit_on_exception(traceback.format_exc())
            raise
Ejemplo n.º 7
0
class Alignak(BaseSatellite):
    # pylint: disable=too-many-instance-attributes
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'type':
            StringProp(default='scheduler'),
        'port':
            IntegerProp(default=7768)
    })

    def __init__(self, **kwargs):
        """Scheduler daemon initialisation

        :param kwargs: command line arguments
        """
        super(Alignak, self).__init__(kwargs.get('daemon_name', 'Default-scheduler'), **kwargs)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        self.brokers = {}
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # This because it is the Satellite that has thes properties and I am a Satellite
        # todo: change this?
        # Broks are stored in each broker link, not locally
        # self.broks = []
        self.broks_lock = threading.RLock()

        # Modules are only loaded one time
        self.have_modules = False

        self.first_scheduling = False

    def get_broks(self, broker_name):
        """Send broks to a specific broker

        :param broker_name: broker name to send broks
        :type broker_name: str
        :greturn: dict of brok for this broker
        :rtype: dict[alignak.brok.Brok]
        """
        logger.debug("Broker %s requests my broks list", broker_name)
        res = []
        if not broker_name:
            return res

        for broker_link in list(self.brokers.values()):
            if broker_name == broker_link.name:
                for brok in sorted(broker_link.broks, key=lambda x: x.creation_time):
                    # Only provide broks that did not yet sent to our external modules
                    if getattr(brok, 'sent_to_externals', False):
                        res.append(brok)
                        brok.got = True
                broker_link.broks = [b for b in broker_link.broks if not getattr(b, 'got', False)]
                logger.debug("Providing %d broks to %s", len(res), broker_name)
                break
        else:
            logger.warning("Got a brok request from an unknown broker: %s", broker_name)

        return res

    def compensate_system_time_change(self, difference):  # pragma: no cover,
        # pylint: disable=too-many-branches
        # not with unit tests
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        super(Alignak, self).compensate_system_time_change(difference)

        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in list(self.sched.checks.values()):
            # Already launch checks should not be touch
            if chk.status == u'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = self.sched.find_item_by_id(chk.ref)
                new_t = max(0, t_to_go + difference)
                timeperiod = self.sched.timeperiods[ref.check_period]
                if timeperiod is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = timeperiod.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = u'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in list(self.sched.actions.values()):
            # Already launch checks should not be touch
            if act.status == u'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref_id = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == u'notification':
                    ref = self.sched.find_item_by_id(ref_id)
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        notification_period = self.sched.timeperiods[ref.notification_period]
                        new_t = notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def do_before_loop(self):
        """Stop the scheduling process"""
        if self.sched:
            self.sched.stop_scheduling()

    def do_loop_turn(self):
        """Scheduler loop turn

        Simply run the Alignak scheduler loop

        This is called when a configuration got received by the scheduler daemon. As of it,
        check if the first scheduling has been done... and manage this.

        :return: None
        """
        if not self.first_scheduling:
            # Ok, now all is initialized, we can make the initial broks
            logger.info("First scheduling launched")
            _t0 = time.time()
            # Program start brok
            self.sched.initial_program_status()
            # First scheduling
            self.sched.schedule()
            statsmgr.timer('first_scheduling', time.time() - _t0)
            logger.info("First scheduling done")

            # Connect to our passive satellites if needed
            for satellite in [s for s in list(self.pollers.values()) if s.passive]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s", satellite)

            for satellite in [s for s in list(self.reactionners.values()) if s.passive]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s", satellite)

            # Ticks are for recurrent function call like consume, del zombies etc
            self.sched.ticks = 0
            self.first_scheduling = True

        # Each loop turn, execute the daemon specific treatment...
        # only if the daemon has a configuration to manage
        if self.sched.pushed_conf:
            # If scheduling is not yet enabled, enable scheduling
            if not self.sched.must_schedule:
                self.sched.start_scheduling()
                self.sched.before_run()
            self.sched.run()
        else:
            logger.warning("#%d - No monitoring configuration to scheduler...",
                           self.loop_count)

    def get_managed_configurations(self):
        """Get the configurations managed by this scheduler

        The configuration managed by a scheduler is the self configuration got
        by the scheduler during the dispatching.

        :return: a dict of scheduler links with instance_id as key and
        hash, push_flavor and configuration identifier as values
        :rtype: dict
        """
        # for scheduler_link in list(self.schedulers.values()):
        #     res[scheduler_link.instance_id] = {
        #         'hash': scheduler_link.hash,
        #         'push_flavor': scheduler_link.push_flavor,
        #         'managed_conf_id': scheduler_link.managed_conf_id
        #     }

        res = {}
        if self.sched.pushed_conf and self.cur_conf and 'instance_id' in self.cur_conf:
            res[self.cur_conf['instance_id']] = {
                'hash': self.cur_conf['hash'],
                'push_flavor': self.cur_conf['push_flavor'],
                'managed_conf_id': self.cur_conf['managed_conf_id']
            }
        logger.debug("Get managed configuration: %s", res)
        return res

    def setup_new_conf(self):
        # pylint: disable=too-many-statements, too-many-branches, too-many-locals
        """Setup new conf received for scheduler

        :return: None
        """
        # Execute the base class treatment...
        super(Alignak, self).setup_new_conf()

        # ...then our own specific treatment!
        with self.conf_lock:
            # self_conf is our own configuration from the alignak environment
            # self_conf = self.cur_conf['self_conf']
            logger.debug("Got config: %s", self.cur_conf)
            if 'conf_part' not in self.cur_conf:
                self.cur_conf['conf_part'] = None
            conf_part = self.cur_conf['conf_part']

            # Ok now we can save the retention data
            if self.sched.pushed_conf is not None:
                self.sched.update_retention()

            # Get the monitored objects configuration
            t00 = time.time()
            received_conf_part = None
            try:
                received_conf_part = unserialize(conf_part)
                assert received_conf_part is not None
            except AssertionError as exp:
                # This to indicate that no configuration is managed by this scheduler...
                logger.warning("No managed configuration received from arbiter")
            except AlignakClassLookupException as exp:  # pragma: no cover
                # This to indicate that the new configuration is not managed...
                self.new_conf = {
                    "_status": "Cannot un-serialize configuration received from arbiter",
                    "_error": str(exp)
                }
                logger.error(self.new_conf)
                logger.error("Back trace of the error:\n%s", traceback.format_exc())
                return
            except Exception as exp:  # pylint: disable=broad-except
                # This to indicate that the new configuration is not managed...
                self.new_conf = {
                    "_status": "Cannot un-serialize configuration received from arbiter",
                    "_error": str(exp)
                }
                logger.error(self.new_conf)
                self.exit_on_exception(exp, str(self.new_conf))

            # if not received_conf_part:
            #     return

            logger.info("Monitored configuration %s received at %d. Un-serialized in %d secs",
                        received_conf_part, t00, time.time() - t00)
            logger.info("Scheduler received configuration : %s", received_conf_part)

            # Now we create our pollers, reactionners and brokers
            for link_type in ['pollers', 'reactionners', 'brokers']:
                if link_type not in self.cur_conf['satellites']:
                    logger.error("Missing %s in the configuration!", link_type)
                    continue

                my_satellites = getattr(self, link_type, {})
                received_satellites = self.cur_conf['satellites'][link_type]
                for link_uuid in received_satellites:
                    rs_conf = received_satellites[link_uuid]
                    logger.debug("- received %s - %s: %s", rs_conf['instance_id'],
                                 rs_conf['type'], rs_conf['name'])

                    # Must look if we already had a configuration and save our broks
                    already_got = rs_conf['instance_id'] in my_satellites
                    broks = []
                    actions = {}
                    wait_homerun = {}
                    external_commands = {}
                    running_id = 0
                    if already_got:
                        logger.warning("I already got: %s", rs_conf['instance_id'])
                        # Save some information
                        running_id = my_satellites[link_uuid].running_id
                        (broks, actions,
                         wait_homerun, external_commands) = \
                            my_satellites[link_uuid].get_and_clear_context()
                        # Delete the former link
                        del my_satellites[link_uuid]

                    # My new satellite link...
                    new_link = SatelliteLink.get_a_satellite_link(link_type[:-1],
                                                                  rs_conf)
                    my_satellites[new_link.uuid] = new_link
                    logger.info("I got a new %s satellite: %s", link_type[:-1], new_link)

                    new_link.running_id = running_id
                    new_link.external_commands = external_commands
                    new_link.broks = broks
                    new_link.wait_homerun = wait_homerun
                    new_link.actions = actions

                    # Replacing the satellite address and port by those defined in satellite_map
                    if new_link.name in self.cur_conf['override_conf'].get('satellite_map', {}):
                        override_conf = self.cur_conf['override_conf']
                        overriding = override_conf.get('satellite_map')[new_link.name]
                        logger.warning("Do not override the configuration for: %s, with: %s. "
                                       "Please check whether this is necessary!",
                                       new_link.name, overriding)

            # First mix conf and override_conf to have our definitive conf
            for prop in getattr(self.cur_conf, 'override_conf', []):
                logger.debug("Overriden: %s / %s ", prop, getattr(received_conf_part, prop, None))
                logger.debug("Overriding: %s / %s ", prop, self.cur_conf['override_conf'])
                setattr(received_conf_part, prop, self.cur_conf['override_conf'].get(prop, None))

            # Scheduler modules
            if not self.have_modules:
                try:
                    logger.debug("Modules configuration: %s", self.cur_conf['modules'])
                    self.modules = unserialize(self.cur_conf['modules'], no_load=True)
                except AlignakClassLookupException as exp:  # pragma: no cover, simple protection
                    logger.error('Cannot un-serialize modules configuration '
                                 'received from arbiter: %s', exp)
                if self.modules:
                    logger.debug("I received some modules configuration: %s", self.modules)
                    self.have_modules = True

                    self.do_load_modules(self.modules)
                    # and start external modules too
                    self.modules_manager.start_external_instances()
                else:
                    logger.info("I do not have modules")

            if received_conf_part:
                logger.info("Loading configuration...")

                # Propagate the global parameters to the configuration items
                received_conf_part.explode_global_conf()

                # We give the configuration to our scheduler
                self.sched.reset()
                self.sched.load_conf(self.cur_conf['instance_id'],
                                     self.cur_conf['instance_name'],
                                     received_conf_part)

                # Once loaded, the scheduler has an inner pushed_conf object
                logger.info("Loaded: %s", self.sched.pushed_conf)

                # Update the scheduler ticks according to the daemon configuration
                self.sched.update_recurrent_works_tick(self)

                # We must update our pushed configuration macros with correct values
                # from the configuration parameters
                # self.sched.pushed_conf.fill_resource_macros_names_macros()

                # Creating the Macroresolver Class & unique instance
                m_solver = MacroResolver()
                m_solver.init(received_conf_part)

                # Now create the external commands manager
                # We are an applyer: our role is not to dispatch commands, but to apply them
                ecm = ExternalCommandManager(
                    received_conf_part, 'applyer', self.sched,
                    received_conf_part.accept_passive_unknown_check_results,
                    received_conf_part.log_external_commands)

                # Scheduler needs to know about this external command manager to use it if necessary
                self.sched.external_commands_manager = ecm

                # Ok now we can load the retention data
                self.sched.retention_load()

                # Log hosts/services initial states
                self.sched.log_initial_states()

            # Create brok new conf
            brok = Brok({'type': 'new_conf', 'data': {}})
            self.sched.add_brok(brok)

            # Initialize connection with all our satellites
            logger.info("Initializing connection with my satellites:")
            my_satellites = self.get_links_of_type(s_type='')
            for satellite in list(my_satellites.values()):
                logger.info("- : %s/%s", satellite.type, satellite.name)
                if not self.daemon_connection_init(satellite):
                    logger.error("Satellite connection failed: %s", satellite)

            if received_conf_part:
                # Enable the scheduling process
                logger.info("Loaded: %s", self.sched.pushed_conf)
                self.sched.start_scheduling()

        # Now I have a configuration!
        self.have_conf = True

    def clean_previous_run(self):
        """Clean variables from previous configuration

        :return: None
        """
        # Execute the base class treatment...
        super(Alignak, self).clean_previous_run()

        # Clean all lists
        self.pollers.clear()
        self.reactionners.clear()
        self.brokers.clear()

    def get_daemon_stats(self, details=False):
        """Increase the stats provided by the Daemon base class

        :return: stats dictionary
        :rtype: dict
        """
        # Call the base Daemon one
        res = super(Alignak, self).get_daemon_stats(details=details)

        res.update({'name': self.name, 'type': self.type, 'monitored_objects': {}})

        counters = res['counters']

        # Satellites counters
        counters['brokers'] = len(self.brokers)
        counters['pollers'] = len(self.pollers)
        counters['reactionners'] = len(self.reactionners)
        counters['receivers'] = len(self.receivers)

        if not self.sched:
            return res

        # # Hosts/services problems counters
        # m_solver = MacroResolver()
        # counters['hosts_problems'] = m_solver._get_total_host_problems()
        # counters['hosts_unhandled_problems'] = m_solver._get_total_host_problems_unhandled()
        # counters['services_problems'] = m_solver._get_total_service_problems()
        # counters['services_unhandled_problems'] = m_solver._get_total_service_problems_unhandled()

        # Get statistics from the scheduler
        scheduler_stats = self.sched.get_scheduler_stats(details=details)
        res['counters'].update(scheduler_stats['counters'])
        scheduler_stats.pop('counters')
        res.update(scheduler_stats)

        return res

    def get_monitoring_problems(self):
        """Get the current scheduler livesynthesis

        :return: live synthesis and problems dictionary
        :rtype: dict
        """
        res = {}
        if not self.sched:
            return res

        # Get statistics from the scheduler
        scheduler_stats = self.sched.get_scheduler_stats(details=True)
        if 'livesynthesis' in scheduler_stats:
            res['livesynthesis'] = scheduler_stats['livesynthesis']
        if 'problems' in scheduler_stats:
            res['problems'] = scheduler_stats['problems']

        return res

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            # Start the daemon mode
            if not self.do_daemon_init_and_start():
                self.exit_on_error(message="Daemon initialization error", exit_code=3)

            #  We wait for initial conf
            self.wait_for_initial_conf()
            if self.new_conf:
                # Setup the received configuration
                self.setup_new_conf()

                # Now the main loop
                self.do_main_loop()
                logger.info("Exited from the main loop.")

                # On main loop exit, call the scheduler after run process
                self.sched.after_run()

            self.request_stop()
        except Exception:  # pragma: no cover, this should never happen indeed ;)
            self.exit_on_exception(traceback.format_exc())
            raise
Ejemplo n.º 8
0
class Alignak(BaseSatellite):
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'daemon_type': StringProp(default='scheduler'),
        'pidfile': PathProp(default='schedulerd.pid'),
        'port': IntegerProp(default=7768),
        'local_log': PathProp(default='schedulerd.log'),
    })

    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 port=None,
                 local_log=None,
                 daemon_name=None):
        self.daemon_name = 'scheduler'
        if daemon_name:
            self.daemon_name = daemon_name

        BaseSatellite.__init__(self, self.daemon_name, config_file, is_daemon,
                               do_replace, debug, debug_file, port, local_log)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_broks_send = 0
        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}

    def compensate_system_time_change(self, difference,
                                      timeperiods):  # pragma: no cover,
        # not with unit tests
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        logger.warning(
            "A system time change of %d has been detected. Compensating...",
            difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in self.sched.checks.values():
            # Already launch checks should not be touch
            if chk.status == 'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = self.sched.find_item_by_id(chk.ref)
                new_t = max(0, t_to_go + difference)
                timeperiod = timeperiods[ref.check_period]
                if timeperiod is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = timeperiod.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = 'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in self.sched.actions.values():
            # Already launch checks should not be touch
            if act.status == 'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref_id = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == 'notification':
                    ref = self.sched.find_item_by_id(ref_id)
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        notification_period = self.sched.timeperiods[
                            ref.notification_period]
                        new_t = notification_period.get_next_valid_time_from_t(
                            new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def manage_signal(self, sig, frame):
        """Manage signals caught by the daemon
        signal.SIGUSR1 : dump_memory
        signal.SIGUSR2 : dump_object (nothing)
        signal.SIGTERM, signal.SIGINT : terminate process

        :param sig: signal caught by daemon
        :type sig: str
        :param frame: current stack frame
        :type frame:
        :return: None
        TODO: Refactor with Daemon one
        """
        logger.info("scheduler process %d received a signal: %s", os.getpid(),
                    str(sig))
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            self.sched.need_dump_memory = True
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            logger.info("scheduler process %d is dying...", os.getpid())
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)

    def do_loop_turn(self):
        """Scheduler loop turn
        Basically wait initial conf and run

        :return: None
        """
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        logger.info("New configuration received")
        self.setup_new_conf()
        logger.info(
            "[%s] New configuration loaded, scheduling for Alignak: %s",
            self.name, self.sched.alignak_name)
        self.sched.run()

    def setup_new_conf(self):  # pylint: disable=too-many-statements
        """Setup new conf received for scheduler

        :return: None
        """
        with self.conf_lock:
            self.clean_previous_run()
            new_conf = self.new_conf
            logger.info("[%s] Sending us a configuration", self.name)
            conf_raw = new_conf['conf']
            override_conf = new_conf['override_conf']
            modules = new_conf['modules']
            satellites = new_conf['satellites']
            instance_name = new_conf['instance_name']

            # Ok now we can save the retention data
            if hasattr(self.sched, 'conf'):
                self.sched.update_retention_file(forced=True)

            # horay, we got a name, we can set it in our stats objects
            statsmgr.register(instance_name,
                              'scheduler',
                              statsd_host=new_conf['statsd_host'],
                              statsd_port=new_conf['statsd_port'],
                              statsd_prefix=new_conf['statsd_prefix'],
                              statsd_enabled=new_conf['statsd_enabled'])

            t00 = time.time()
            try:
                conf = unserialize(conf_raw)
            except AlignakClassLookupException as exp:  # pragma: no cover, simple protection
                logger.error(
                    'Cannot un-serialize configuration received from arbiter: %s',
                    exp)
            logger.debug("Conf received at %d. Un-serialized in %d secs", t00,
                         time.time() - t00)
            self.new_conf = None

            if 'scheduler_name' in new_conf:
                name = new_conf['scheduler_name']
            else:
                name = instance_name
            self.name = name

            # Set my own process title
            self.set_proctitle(self.name)

            logger.info("[%s] Received a new configuration, containing: ",
                        self.name)
            for key in new_conf:
                logger.info("[%s] - %s", self.name, key)
            logger.info("[%s] configuration identifiers: %s (%s)", self.name,
                        new_conf['conf_uuid'], new_conf['push_flavor'])

            # Tag the conf with our data
            self.conf = conf
            self.conf.push_flavor = new_conf['push_flavor']
            self.conf.alignak_name = new_conf['alignak_name']
            self.conf.instance_name = instance_name
            self.conf.skip_initial_broks = new_conf['skip_initial_broks']
            self.conf.accept_passive_unknown_check_results = \
                new_conf['accept_passive_unknown_check_results']

            self.cur_conf = conf
            self.override_conf = override_conf
            self.modules = unserialize(modules, True)
            self.satellites = satellites

            # Now We create our pollers, reactionners and brokers
            for sat_type in ['pollers', 'reactionners', 'brokers']:
                if sat_type not in satellites:
                    continue
                for sat_id in satellites[sat_type]:
                    # Must look if we already have it
                    sats = getattr(self, sat_type)
                    sat = satellites[sat_type][sat_id]

                    sats[sat_id] = sat

                    if sat['name'] in override_conf['satellitemap']:
                        sat = dict(sat)  # make a copy
                        sat.update(override_conf['satellitemap'][sat['name']])

                    proto = 'http'
                    if sat['use_ssl']:
                        proto = 'https'
                    uri = '%s://%s:%s/' % (proto, sat['address'], sat['port'])

                    sats[sat_id]['uri'] = uri
                    sats[sat_id]['con'] = None
                    sats[sat_id]['running_id'] = 0
                    sats[sat_id]['last_connection'] = 0
                    sats[sat_id]['connection_attempt'] = 0
                    sats[sat_id]['max_failed_connections'] = 3
                    setattr(self, sat_type, sats)
                logger.debug("We have our %s: %s ", sat_type,
                             satellites[sat_type])
                logger.info("We have our %s:", sat_type)
                for daemon in satellites[sat_type].values():
                    logger.info(" - %s ", daemon['name'])

            # First mix conf and override_conf to have our definitive conf
            for prop in self.override_conf:
                val = self.override_conf[prop]
                setattr(self.conf, prop, val)

            if self.conf.use_timezone != '':
                logger.info("Setting our timezone to %s",
                            str(self.conf.use_timezone))
                os.environ['TZ'] = self.conf.use_timezone
                time.tzset()

            self.do_load_modules(self.modules)

            logger.info("Loading configuration.")
            self.conf.explode_global_conf()  # pylint: disable=E1101

            # we give sched it's conf
            self.sched.reset()
            self.sched.load_conf(self.conf)
            self.sched.load_satellites(self.pollers, self.reactionners,
                                       self.brokers)

            # We must update our Config dict macro with good value
            # from the config parameters
            self.sched.conf.fill_resource_macros_names_macros()

            # Creating the Macroresolver Class & unique instance
            m_solver = MacroResolver()
            m_solver.init(self.conf)

            # self.conf.dump()
            # self.conf.quick_debug()

            # Now create the external commands manager
            # We are an applyer: our role is not to dispatch commands, but to apply them
            ecm = ExternalCommandManager(self.conf, 'applyer', self.sched)

            # Scheduler needs to know about this external command manager to use it if necessary
            self.sched.set_external_commands_manager(ecm)
            # Update External Commands Manager
            self.sched.external_commands_manager.accept_passive_unknown_check_results = \
                self.sched.conf.accept_passive_unknown_check_results

            # We clear our schedulers managed (it's us :) )
            # and set ourselves in it
            self.schedulers = {self.conf.uuid: self.sched}  # pylint: disable=E1101

            # Ok now we can load the retention data
            self.sched.retention_load()

            # Create brok new conf
            brok = Brok({'type': 'new_conf', 'data': {}})
            self.sched.add_brok(brok)

    def what_i_managed(self):
        # pylint: disable=no-member
        """Get my managed dict (instance id and push_flavor)

        :return: dict containing instance_id key and push flavor value
        :rtype: dict
        """
        if hasattr(self, 'conf'):
            return {self.conf.uuid: self.conf.push_flavor}  # pylint: disable=E1101

        return {}

    def clean_previous_run(self):
        """Clean variables from previous configuration

        :return: None
        """
        # Clean all lists
        self.pollers.clear()
        self.reactionners.clear()
        self.brokers.clear()

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            self.setup_alignak_logger()

            # Look if we are enabled or not. If ok, start the daemon mode
            self.look_for_early_exit()

            # todo:
            # This function returns False if some problem is detected during initialization
            # (eg. communication port not free)
            # Perharps we should stop the initialization process and exit?
            if not self.do_daemon_init_and_start():
                return

            self.load_modules_manager(self.name)

            self.uri = self.http_daemon.uri
            logger.info("[Scheduler] General interface is at: %s", self.uri)

            self.do_mainloop()
        except Exception:
            self.print_unrecoverable(traceback.format_exc())
            raise
Ejemplo n.º 9
0
class Alignak(BaseSatellite):
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile': PathProp(default='schedulerd.pid'),
        'port': IntegerProp(default=7768),
        'local_log': PathProp(default='schedulerd.log'),
    })

    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 profile=''):

        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon,
                               do_replace, debug, debug_file)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}

    def compensate_system_time_change(self, difference):
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        logger.warning(
            "A system time change of %d has been detected. Compensating...",
            difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in self.sched.checks.values():
            # Already launch checks should not be touch
            if chk.status == 'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = chk.ref
                new_t = max(0, t_to_go + difference)
                if ref.check_period is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = 'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in self.sched.actions.values():
            # Already launch checks should not be touch
            if act.status == 'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == 'notification':
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        new_t = ref.notification_period.get_next_valid_time_from_t(
                            new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def manage_signal(self, sig, frame):
        """Manage signals caught by the daemon
        signal.SIGUSR1 : dump_memory
        signal.SIGUSR2 : dump_object (nothing)
        signal.SIGTERM, signal.SIGINT : terminate process

        :param sig: signal caught by daemon
        :type sig: str
        :param frame: current stack frame
        :type frame:
        :return: None
        TODO: Refactor with Daemon one
        """
        logger.warning("%s > Received a SIGNAL %s", process.current_process(),
                       sig)
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            self.sched.need_dump_memory = True
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)

    def do_loop_turn(self):
        """Scheduler loop turn
        Basically wait initial conf and run

        :return: None
        """
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        logger.info("New configuration received")
        self.setup_new_conf()
        logger.info("New configuration loaded")
        self.sched.run()

    def setup_new_conf(self):
        """Setup new conf received for scheduler

        :return: None
        """
        with self.conf_lock:
            new_c = self.new_conf
            conf_raw = new_c['conf']
            override_conf = new_c['override_conf']
            modules = new_c['modules']
            satellites = new_c['satellites']
            instance_name = new_c['instance_name']
            push_flavor = new_c['push_flavor']
            skip_initial_broks = new_c['skip_initial_broks']
            accept_passive_unknown_chk_res = new_c[
                'accept_passive_unknown_check_results']
            api_key = new_c['api_key']
            secret = new_c['secret']
            http_proxy = new_c['http_proxy']
            statsd_host = new_c['statsd_host']
            statsd_port = new_c['statsd_port']
            statsd_prefix = new_c['statsd_prefix']
            statsd_enabled = new_c['statsd_enabled']

            # horay, we got a name, we can set it in our stats objects
            statsmgr.register(self.sched,
                              instance_name,
                              'scheduler',
                              api_key=api_key,
                              secret=secret,
                              http_proxy=http_proxy,
                              statsd_host=statsd_host,
                              statsd_port=statsd_port,
                              statsd_prefix=statsd_prefix,
                              statsd_enabled=statsd_enabled)

            t00 = time.time()
            conf = cPickle.loads(conf_raw)
            logger.debug("Conf received at %d. Unserialized in %d secs", t00,
                         time.time() - t00)
            self.new_conf = None

            # Tag the conf with our data
            self.conf = conf
            self.conf.push_flavor = push_flavor
            self.conf.instance_name = instance_name
            self.conf.skip_initial_broks = skip_initial_broks
            self.conf.accept_passive_unknown_check_results = accept_passive_unknown_chk_res

            self.cur_conf = conf
            self.override_conf = override_conf
            self.modules = modules
            self.satellites = satellites
            # self.pollers = self.app.pollers

            if self.conf.human_timestamp_log:
                # pylint: disable=E1101
                logger.set_human_format()

            # Now We create our pollers
            for pol_id in satellites['pollers']:
                # Must look if we already have it
                already_got = pol_id in self.pollers
                poll = satellites['pollers'][pol_id]
                self.pollers[pol_id] = poll

                if poll['name'] in override_conf['satellitemap']:
                    poll = dict(poll)  # make a copy
                    poll.update(override_conf['satellitemap'][poll['name']])

                proto = 'http'
                if poll['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, poll['address'], poll['port'])
                self.pollers[pol_id]['uri'] = uri
                self.pollers[pol_id]['last_connection'] = 0

            # Now We create our reactionners
            for reac_id in satellites['reactionners']:
                # Must look if we already have it
                already_got = reac_id in self.reactionners
                reac = satellites['reactionners'][reac_id]
                self.reactionners[reac_id] = reac

                if reac['name'] in override_conf['satellitemap']:
                    reac = dict(reac)  # make a copy
                    reac.update(override_conf['satellitemap'][reac['name']])

                proto = 'http'
                if poll['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, reac['address'], reac['port'])
                self.reactionners[reac_id]['uri'] = uri
                self.reactionners[reac_id]['last_connection'] = 0

            # First mix conf and override_conf to have our definitive conf
            for prop in self.override_conf:
                val = self.override_conf[prop]
                setattr(self.conf, prop, val)

            if self.conf.use_timezone != '':
                logger.debug("Setting our timezone to %s",
                             str(self.conf.use_timezone))
                os.environ['TZ'] = self.conf.use_timezone
                time.tzset()

            if len(self.modules) != 0:
                logger.debug("I've got %s modules", str(self.modules))

            # TODO: if scheduler had previous modules instanciated it must clean them!
            self.do_load_modules(self.modules)

            logger.info("Loading configuration.")
            self.conf.explode_global_conf()

            # we give sched it's conf
            self.sched.reset()
            self.sched.load_conf(self.conf)
            self.sched.load_satellites(self.pollers, self.reactionners)

            # We must update our Config dict macro with good value
            # from the config parameters
            self.sched.conf.fill_resource_macros_names_macros()
            # print "DBG: got macros", self.sched.conf.macros

            # Creating the Macroresolver Class & unique instance
            m_solver = MacroResolver()
            m_solver.init(self.conf)

            # self.conf.dump()
            # self.conf.quick_debug()

            # Now create the external commander
            # it's a applyer: it role is not to dispatch commands,
            # but to apply them
            ecm = ExternalCommandManager(self.conf, 'applyer')

            # Scheduler need to know about external command to
            # activate it if necessary
            self.sched.load_external_command(ecm)

            # External command need the sched because he can raise checks
            ecm.load_scheduler(self.sched)

            # We clear our schedulers managed (it's us :) )
            # and set ourself in it
            self.schedulers = {self.conf.instance_id: self.sched}

    def what_i_managed(self):
        """Get my managed dict (instance id and push_flavor)

        :return: dict containing instance_id key and push flavor value
        :rtype: dict
        """
        if hasattr(self, 'conf'):
            return {self.conf.instance_id: self.conf.push_flavor}
        else:
            return {}

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            self.load_config_file()
            # Setting log level
            logger.setLevel(self.log_level)
            # Force the debug level if the daemon is said to start with such level
            if self.debug:
                logger.setLevel('DEBUG')

            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            self.load_modules_manager()

            self.uri = self.http_daemon.uri
            logger.info("[scheduler] General interface is at: %s", self.uri)
            self.do_mainloop()
        except Exception, exp:
            self.print_unrecoverable(traceback.format_exc())
            raise
Ejemplo n.º 10
0
class Alignak(BaseSatellite):
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='schedulerd.pid'),
        'port':      IntegerProp(default=7768),
        'local_log': PathProp(default='schedulerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''):

        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug,
                               debug_file)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}

    def compensate_system_time_change(self, difference):
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        logger.warning("A system time change of %d has been detected. Compensating...", difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in self.sched.checks.values():
            # Already launch checks should not be touch
            if chk.status == 'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = chk.ref
                new_t = max(0, t_to_go + difference)
                if ref.check_period is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = 'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in self.sched.actions.values():
            # Already launch checks should not be touch
            if act.status == 'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == 'notification':
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        new_t = ref.notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def manage_signal(self, sig, frame):
        """Manage signals caught by the daemon
        signal.SIGUSR1 : dump_memory
        signal.SIGUSR2 : dump_object (nothing)
        signal.SIGTERM, signal.SIGINT : terminate process

        :param sig: signal caught by daemon
        :type sig: str
        :param frame: current stack frame
        :type frame:
        :return: None
        TODO: Refactor with Daemon one
        """
        logger.warning("%s > Received a SIGNAL %s", process.current_process(), sig)
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            self.sched.need_dump_memory = True
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)

    def do_loop_turn(self):
        """Scheduler loop turn
        Basically wait initial conf and run

        :return: None
        """
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        logger.info("New configuration received")
        self.setup_new_conf()
        logger.info("New configuration loaded")
        self.sched.run()

    def setup_new_conf(self):
        """Setup new conf received for scheduler

        :return: None
        """
        with self.conf_lock:
            new_c = self.new_conf
            conf_raw = new_c['conf']
            override_conf = new_c['override_conf']
            modules = new_c['modules']
            satellites = new_c['satellites']
            instance_name = new_c['instance_name']
            push_flavor = new_c['push_flavor']
            skip_initial_broks = new_c['skip_initial_broks']
            accept_passive_unknown_chk_res = new_c['accept_passive_unknown_check_results']
            api_key = new_c['api_key']
            secret = new_c['secret']
            http_proxy = new_c['http_proxy']
            statsd_host = new_c['statsd_host']
            statsd_port = new_c['statsd_port']
            statsd_prefix = new_c['statsd_prefix']
            statsd_enabled = new_c['statsd_enabled']

            # horay, we got a name, we can set it in our stats objects
            statsmgr.register(self.sched, instance_name, 'scheduler',
                              api_key=api_key, secret=secret, http_proxy=http_proxy,
                              statsd_host=statsd_host, statsd_port=statsd_port,
                              statsd_prefix=statsd_prefix, statsd_enabled=statsd_enabled)

            t00 = time.time()
            conf = cPickle.loads(conf_raw)
            logger.debug("Conf received at %d. Unserialized in %d secs", t00, time.time() - t00)
            self.new_conf = None

            # Tag the conf with our data
            self.conf = conf
            self.conf.push_flavor = push_flavor
            self.conf.instance_name = instance_name
            self.conf.skip_initial_broks = skip_initial_broks
            self.conf.accept_passive_unknown_check_results = accept_passive_unknown_chk_res

            self.cur_conf = conf
            self.override_conf = override_conf
            self.modules = modules
            self.satellites = satellites
            # self.pollers = self.app.pollers

            if self.conf.human_timestamp_log:
                logger.set_human_format()

            # Now We create our pollers
            for pol_id in satellites['pollers']:
                # Must look if we already have it
                already_got = pol_id in self.pollers
                poll = satellites['pollers'][pol_id]
                self.pollers[pol_id] = poll

                if poll['name'] in override_conf['satellitemap']:
                    poll = dict(poll)  # make a copy
                    poll.update(override_conf['satellitemap'][poll['name']])

                proto = 'http'
                if poll['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, poll['address'], poll['port'])
                self.pollers[pol_id]['uri'] = uri
                self.pollers[pol_id]['last_connection'] = 0

            # Now We create our reactionners
            for reac_id in satellites['reactionners']:
                # Must look if we already have it
                already_got = reac_id in self.reactionners
                reac = satellites['reactionners'][reac_id]
                self.reactionners[reac_id] = reac

                if reac['name'] in override_conf['satellitemap']:
                    reac = dict(reac)  # make a copy
                    reac.update(override_conf['satellitemap'][reac['name']])

                proto = 'http'
                if poll['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, reac['address'], reac['port'])
                self.reactionners[reac_id]['uri'] = uri
                self.reactionners[reac_id]['last_connection'] = 0

            # First mix conf and override_conf to have our definitive conf
            for prop in self.override_conf:
                val = self.override_conf[prop]
                setattr(self.conf, prop, val)

            if self.conf.use_timezone != '':
                logger.debug("Setting our timezone to %s", str(self.conf.use_timezone))
                os.environ['TZ'] = self.conf.use_timezone
                time.tzset()

            if len(self.modules) != 0:
                logger.debug("I've got %s modules", str(self.modules))

            # TODO: if scheduler had previous modules instanciated it must clean them!
            self.modules_manager.set_modules(self.modules)
            self.do_load_modules()

            logger.info("Loading configuration.")
            self.conf.explode_global_conf()

            # we give sched it's conf
            self.sched.reset()
            self.sched.load_conf(self.conf)
            self.sched.load_satellites(self.pollers, self.reactionners)

            # We must update our Config dict macro with good value
            # from the config parameters
            self.sched.conf.fill_resource_macros_names_macros()
            # print "DBG: got macros", self.sched.conf.macros

            # Creating the Macroresolver Class & unique instance
            m_solver = MacroResolver()
            m_solver.init(self.conf)

            # self.conf.dump()
            # self.conf.quick_debug()

            # Now create the external commander
            # it's a applyer: it role is not to dispatch commands,
            # but to apply them
            ecm = ExternalCommandManager(self.conf, 'applyer')

            # Scheduler need to know about external command to
            # activate it if necessary
            self.sched.load_external_command(ecm)

            # External command need the sched because he can raise checks
            ecm.load_scheduler(self.sched)

            # We clear our schedulers managed (it's us :) )
            # and set ourself in it
            self.schedulers = {self.conf.instance_id: self.sched}

    def what_i_managed(self):
        """Get my managed dict (instance id and push_flavor)

        :return: dict containing instance_id key and push flavor value
        :rtype: dict
        """
        if hasattr(self, 'conf'):
            return {self.conf.instance_id: self.conf.push_flavor}
        else:
            return {}

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            self.load_config_file()
            # Setting log level
            logger.setLevel(self.log_level)
            # Force the debug level if the daemon is said to start with such level
            if self.debug:
                logger.setLevel('DEBUG')

            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            self.load_modules_manager()

            self.uri = self.http_daemon.uri
            logger.info("[scheduler] General interface is at: %s", self.uri)
            self.do_mainloop()
        except Exception, exp:
            self.print_unrecoverable(traceback.format_exc())
            raise