コード例 #1
0
class SchedulerLink(SatelliteLink):
    """
    Class to manage the scheduler information
    """

    # Ok we lie a little here because we are a mere link in fact
    my_type = 'scheduler'

    properties = SatelliteLink.properties.copy()
    properties.update({
        'type':
            StringProp(default=u'scheduler', fill_brok=['full_status'], to_send=True),
        'scheduler_name':
            StringProp(default='', fill_brok=['full_status']),
        'port':
            IntegerProp(default=7768, fill_brok=['full_status'], to_send=True),
        'weight':
            IntegerProp(default=1, fill_brok=['full_status']),
        'skip_initial_broks':
            BoolProp(default=False, fill_brok=['full_status'], to_send=True),
        'accept_passive_unknown_check_results':
            BoolProp(default=False, fill_brok=['full_status'], to_send=True),
    })

    running_properties = SatelliteLink.running_properties.copy()
    running_properties.update({
        # 'conf':
        #     StringProp(default=None),
        # 'cfg':
        #     DictProp(default={}),
        'need_conf':
            StringProp(default=True),
        'external_commands':
            StringProp(default=[]),
    })

    def get_override_configuration(self):
        """
        Some parameters can give as 'overridden parameters' like use_timezone
        so they will be mixed (in the scheduler) with the standard conf sent by the arbiter

        :return: dictionary of properties
        :rtype: dict
        """
        res = {}
        properties = self.__class__.properties
        for prop, entry in list(properties.items()):
            if entry.override:
                res[prop] = getattr(self, prop)
        return res
コード例 #2
0
class BrokerLink(SatelliteLink):
    """
    Class to manage the broker information
    """
    my_type = 'broker'
    properties = SatelliteLink.properties.copy()
    properties.update({
        'type':
        StringProp(default=u'broker', fill_brok=['full_status'], to_send=True),
        'broker_name':
        StringProp(default='', fill_brok=['full_status']),
        'port':
        IntegerProp(default=7772, fill_brok=['full_status'], to_send=True),
        'initialized':
        BoolProp(default=False, fill_brok=['full_status'], to_send=True),
    })

    def prepare_for_conf(self):
        """Initialize the pushed configuration dictionary
        with the inner properties that are to be propagated to the satellite link.

        :return: None
        """
        super(BrokerLink, self).prepare_for_conf()

        self.cfg.update({
            'satellites': {
                'receivers': {},
                'pollers': {},
                'reactionners': {}
            }
        })
コード例 #3
0
class ArbiterLink(SatelliteLink):
    """
    Class to manage the link to Arbiter daemon.
    With it, a master arbiter can communicate with  a spare Arbiter daemon
    """
    my_type = 'arbiter'
    properties = SatelliteLink.properties.copy()
    properties.update({
        'type':
        StringProp(default=u'arbiter', fill_brok=['full_status'],
                   to_send=True),
        'arbiter_name':
        StringProp(default='', fill_brok=['full_status']),
        'host_name':
        StringProp(default=socket.gethostname(), to_send=True),
        'port':
        IntegerProp(default=7770, to_send=True),
        'last_master_speak':
        FloatProp(default=0.0)
    })

    def is_me(self):  # pragma: no cover, seems not to be used anywhere
        """Check if parameter name if same than name of this object

        TODO: is it useful?

        :return: true if parameter name if same than this name
        :rtype: bool
        """
        logger.info(
            "And arbiter is launched with the hostname:%s "
            "from an arbiter point of view of addr:%s", self.host_name,
            socket.getfqdn())
        return self.host_name == socket.getfqdn(
        ) or self.host_name == socket.gethostname()

    def do_not_run(self):
        """Check if satellite running or not
        If not, try to run

        :return: true if satellite not running
        :rtype: bool
        """
        logger.debug("[%s] do_not_run", self.name)

        try:
            self.con.get('_do_not_run')
            return True
        except HTTPClientConnectionException as exp:  # pragma: no cover, simple protection
            self.add_failed_check_attempt("Connection error when "
                                          "sending do not run: %s" % str(exp))
            self.set_dead()
        except HTTPClientTimeoutException as exp:  # pragma: no cover, simple protection
            self.add_failed_check_attempt("Connection timeout when "
                                          "sending do not run: %s" % str(exp))
        except HTTPClientException as exp:
            self.add_failed_check_attempt("Error when "
                                          "sending do not run: %s" % str(exp))

        return False
コード例 #4
0
class Reactionner(Satellite):
    """
    This class is an application that launches actions for the schedulers
    Actions can be:
       Notifications
       Event handlers

    When running the Reactionner will :
      Respond to pings from Arbiter
      Listen for new configurations from Arbiter

    The configuration consists of a list of Schedulers for which
    the Reactionner will launch actions for.
    """
    do_checks = False  # I do not do checks
    do_actions = True
    my_type = 'reactionner'

    properties = Satellite.properties.copy()
    properties.update({
        'type':
            StringProp(default='reactionner'),
        'port':
            IntegerProp(default=7769)
    })

    def __init__(self, **kwargs):
        """Reactionner daemon initialisation

        :param kwargs: command line arguments
        """
        super(Reactionner, self).__init__(kwargs.get('daemon_name',
                                                     'Default-reactionner'), **kwargs)
コード例 #5
0
class PollerLink(SatelliteLink):
    """
    Class to manage the link between Arbiter and Poller. With this, an arbiter
    can communicate with a poller
    """
    my_type = 'poller'
    # To_send: send or not to satellite conf
    properties = SatelliteLink.properties.copy()
    properties.update({
        'type':
        StringProp(default=u'poller', fill_brok=['full_status'], to_send=True),
        'poller_name':
        StringProp(default='', fill_brok=['full_status']),
        'port':
        IntegerProp(default=7771, fill_brok=['full_status'], to_send=True),
        # 'min_workers':
        #     IntegerProp(default=0, fill_brok=['full_status'], to_send=True),
        # 'max_workers':
        #     IntegerProp(default=30, fill_brok=['full_status'], to_send=True),
        # 'processes_by_worker':
        #     IntegerProp(default=256, fill_brok=['full_status'], to_send=True),
        # 'worker_polling_interval':
        #     IntegerProp(default=1, to_send=True),
        'poller_tags':
        ListProp(default=['None'], to_send=True),
    })
コード例 #6
0
class Businessimpactmodulation(Item):
    """Businessimpactmodulation class is simply a modulation of the business impact value
    (of a Host/Service) during a modulation period.
    """
    my_type = 'businessimpactmodulation'

    properties = Item.properties.copy()
    properties.update({
        'business_impact_modulation_name': StringProp(),
        'business_impact': IntegerProp(),
        'modulation_period': StringProp(default=''),
    })

    def __init__(self, params=None, parsing=True):
        super(Businessimpactmodulation, self).__init__(params, parsing=parsing)

        # Ok just put None as modulation_period, means 24x7
        if not hasattr(self, 'modulation_period'):
            self.modulation_period = '24x7'

    def get_name(self):
        """Accessor to business_impact_modulation_name attribute

        :return: business impact modulation name
        :rtype: str
        """
        if hasattr(self, 'business_impact_modulation_name'):
            return self.business_impact_modulation_name
        return 'Unnamed'
コード例 #7
0
class Serviceescalation(Item):
    """Serviceescalation class is used to implement notification escalation for services

    TODO: Why this class does not inherit from fusionsupervision.objects.Escalation.
          Maybe we can merge it
    """
    my_type = 'serviceescalation'

    properties = Item.properties.copy()
    properties.update({
        'host_name':
        StringProp(),
        'hostgroup_name':
        StringProp(),
        'service_description':
        StringProp(),
        'first_notification':
        IntegerProp(),
        'last_notification':
        IntegerProp(),
        'notification_interval':
        IntegerProp(default=30),  # like Nagios value
        'escalation_period':
        StringProp(default=''),
        'escalation_options':
        ListProp(default=['w', 'x', 'c', 'r'], split_on_comma=True),
        'contacts':
        ListProp(default=[], merging='join', split_on_comma=True),
        'contact_groups':
        ListProp(default=[], merging='join', split_on_comma=True),
        'first_notification_time':
        IntegerProp(),
        'last_notification_time':
        IntegerProp(),
    })

    def __init__(self, params=None, parsing=True):
        if params is None:
            params = {}

        for prop in ['escalation_options']:
            if prop in params:
                params[prop] = [p.replace('u', 'x') for p in params[prop]]
        super(Serviceescalation, self).__init__(params, parsing=parsing)
コード例 #8
0
class ReceiverLink(SatelliteLink):
    """
    Class to manage the receiver information
    """
    my_type = 'receiver'
    properties = SatelliteLink.properties.copy()
    properties.update({
        'type':
        StringProp(default='receiver', fill_brok=['full_status'],
                   to_send=True),
        'receiver_name':
        StringProp(default='', fill_brok=['full_status'], to_send=True),
        'port':
        IntegerProp(default=7772, fill_brok=['full_status'], to_send=True),
    })
コード例 #9
0
class Poller(Satellite):
    """Poller class. Referenced as "app" in most Interface

    """
    do_checks = True  # I do checks
    do_actions = False  # but no actions
    my_type = 'poller'

    properties = Satellite.properties.copy()
    properties.update({
        'type': StringProp(default='poller'),
        'port': IntegerProp(default=7771)
    })

    def __init__(self, **kwargs):
        """Poller daemon initialisation

        :param kwargs: command line arguments
        """
        super(Poller,
              self).__init__(kwargs.get('daemon_name', 'Default-poller'),
                             **kwargs)
コード例 #10
0
class ReactionnerLink(SatelliteLink):
    """
    Class to manage the reactionner information
    """
    my_type = 'reactionner'
    properties = SatelliteLink.properties.copy()
    properties.update({
        'type':
            StringProp(default='reactionner', fill_brok=['full_status'], to_send=True),
        'reactionner_name':
            StringProp(default='', fill_brok=['full_status']),
        'port':
            IntegerProp(default=7769, fill_brok=['full_status'], to_send=True),
        # 'min_workers':
        #     IntegerProp(default=1, fill_brok=['full_status'], to_send=True),
        # 'max_workers':
        #     IntegerProp(default=30, fill_brok=['full_status'], to_send=True),
        # 'processes_by_worker':
        #     IntegerProp(default=256, fill_brok=['full_status'], to_send=True),
        # 'worker_polling_interval':
        #     IntegerProp(default=1, to_send=True),
        'reactionner_tags':
            ListProp(default=['None'], to_send=True),
    })
コード例 #11
0
class ActionBase(FusionsupervisionObject):
    # pylint: disable=too-many-instance-attributes
    """
    This abstract class is used to have a common base for both actions (event handlers and
    notifications) and checks.

    The Action may be on internal one if it does require to use a Worker process to run the
    action because the Scheduler is able to resolve the action by itseld.

    This class is specialized according to the running OS. Currently, only Linux/Unix like OSes
    are tested
    """
    process = None

    properties = {
        'is_a': StringProp(default=u''),
        'type': StringProp(default=u''),
        'internal': BoolProp(default=False),
        'creation_time': FloatProp(default=0.0),
        '_is_orphan': BoolProp(default=False),
        '_in_timeout': BoolProp(default=False),
        'status': StringProp(default=ACT_STATUS_SCHEDULED),
        'exit_status': IntegerProp(default=3),
        'output': StringProp(default=u'', fill_brok=['full_status']),
        'long_output': StringProp(default=u'', fill_brok=['full_status']),
        'perf_data': StringProp(default=u'', fill_brok=['full_status']),
        't_to_go': FloatProp(default=0.0),
        'check_time': IntegerProp(default=0),
        'last_poll': IntegerProp(default=0),
        'execution_time': FloatProp(default=0.0),
        'wait_time': FloatProp(default=0.001),
        'u_time': FloatProp(default=0.0),
        's_time': FloatProp(default=0.0),
        'reactionner_tag': StringProp(default=u'None'),
        'env': DictProp(default={}),
        'module_type': StringProp(default=u'fork', fill_brok=['full_status']),
        'my_worker': StringProp(default=u'none'),
        'command': StringProp(default=''),
        'timeout': IntegerProp(default=10),
        'ref': StringProp(default=u'unset'),
        'ref_type': StringProp(default=u'unset'),
        'my_scheduler': StringProp(default=u'unassigned'),
    }

    def __init__(self, params=None, parsing=False):
        super(ActionBase, self).__init__(params, parsing=parsing)

        # Set a creation time only if not provided
        if not params or 'creation_time' not in params:
            self.creation_time = time.time()
        # Set actions log only if not provided
        if not params or 'log_actions' not in params:
            self.log_actions = 'ALIGNAK_LOG_ACTIONS' in os.environ

        # Fill default parameters
        self.fill_default()

    def is_launchable(self, timestamp):
        """Check if this action can be launched based on current time

        :param timestamp: time to compare
        :type timestamp: int
        :return: True if timestamp >= self.t_to_go, False otherwise
        :rtype: bool
        """
        if self.t_to_go is None:
            return False
        return timestamp >= self.t_to_go

    def get_local_environnement(self):
        """
        Mix the environment and the environment variables into a new local
        environment dictionary

        Note: We cannot just update the global os.environ because this
        would effect all other checks.

        :return: local environment variables
        :rtype: dict
        """
        # Do not use copy.copy() here, as the resulting copy still
        # changes the real environment (it is still a os._Environment
        # instance).
        local_env = os.environ.copy()
        for local_var in self.env:
            local_env[local_var] = self.env[local_var]
        return local_env

    def execute(self):
        """Start this action command in a subprocess.

        :raise: ActionError
            'toomanyopenfiles' if too many opened files on the system
            'no_process_launched' if arguments parsing failed
            'process_launch_failed': if the process launch failed

        :return: reference to the started process
        :rtype: psutil.Process
        """
        self.status = ACT_STATUS_LAUNCHED
        self.check_time = time.time()
        self.wait_time = 0.0001
        self.last_poll = self.check_time

        # Get a local env variables with our additional values
        self.local_env = self.get_local_environnement()

        # Initialize stdout and stderr.
        self.stdoutdata = ''
        self.stderrdata = ''

        logger.debug("Launch command: '%s', ref: %s, timeout: %s",
                     self.command, self.ref, self.timeout)
        if self.log_actions:
            if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING':
                logger.warning("Launch command: '%s'", self.command)
            else:
                logger.info("Launch command: '%s'", self.command)

        return self._execute()  # OS specific part

    def get_outputs(self, out, max_plugins_output_length):
        """Get check outputs from single output (split perfdata etc).

        Updates output, perf_data and long_output attributes.

        :param out: output data of a check
        :type out: str
        :param max_output: max plugin data length
        :type max_output: int
        :return: None
        """
        # Squeeze all output after max_plugins_output_length
        out = out[:max_plugins_output_length]
        # manage escaped pipes
        out = out.replace(r'\|', '___PROTECT_PIPE___')
        # Then cuts by lines
        elts = out.split('\n')
        # For perf data
        elts_line1 = elts[0].split('|')

        # First line before | is output, strip it
        self.output = elts_line1[0].strip().replace('___PROTECT_PIPE___', '|')
        try:
            self.output = self.output.decode('utf8', 'ignore')
        except UnicodeEncodeError:
            pass
        except AttributeError:
            pass

        # Init perfdata as empty
        self.perf_data = ''
        # After | it is perfdata, strip it
        if len(elts_line1) > 1:
            self.perf_data = elts_line1[1].strip().replace(
                '___PROTECT_PIPE___', '|')

        # Now manage others lines. Before the | it's long_output
        # And after it's all perf_data, \n joined
        long_output = []
        in_perfdata = False
        for line in elts[1:]:
            # if already in perfdata, direct append
            if in_perfdata:
                self.perf_data += ' ' + line.strip().replace(
                    '___PROTECT_PIPE___', '|')
            else:  # not already in perf_data, search for the | part :)
                elts = line.split('|', 1)
                # The first part will always be long_output
                long_output.append(elts[0].strip().replace(
                    '___PROTECT_PIPE___', '|'))
                if len(elts) > 1:
                    in_perfdata = True
                    self.perf_data += ' ' + elts[1].strip().replace(
                        '___PROTECT_PIPE___', '|')

        # long_output is all non output and performance data, joined with \n
        self.long_output = '\n'.join(long_output)
        # Get sure the performance data are stripped
        self.perf_data = self.perf_data.strip()

        logger.debug("Command result for '%s': %d, %s", self.command,
                     self.exit_status, self.output)

        if self.log_actions:
            if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING':
                logger.warning("Check result for '%s': %d, %s", self.command,
                               self.exit_status, self.output)
                if self.perf_data:
                    logger.warning("Performance data for '%s': %s",
                                   self.command, self.perf_data)
            else:
                logger.info("Check result for '%s': %d, %s", self.command,
                            self.exit_status, self.output)
                if self.perf_data:
                    logger.info("Performance data for '%s': %s", self.command,
                                self.perf_data)

    def check_finished(self, max_plugins_output_length):
        # pylint: disable=too-many-branches
        """Handle action if it is finished (get stdout, stderr, exit code...)

        :param max_plugins_output_length: max plugin data length
        :type max_plugins_output_length: int
        :return: None
        """
        self.last_poll = time.time()

        _, _, child_utime, child_stime, _ = os.times()

        # Not yet finished...
        if self.process.poll() is None:
            # We must wait, but checks are variable in time so we do not wait the same
            # for a little check or a long ping. So we do like TCP: slow start with a very
            # shot time (0.0001 s) increased *2 but do not wait more than 0.5 s.
            self.wait_time = min(self.wait_time * 2, 0.5)
            now = time.time()
            # This log is really spamming... uncomment if you really need this information :)
            # logger.debug("%s - Process pid=%d is still alive", now, self.process.pid)

            # Get standard outputs in non blocking mode from the process streams
            stdout = no_block_read(self.process.stdout)
            stderr = no_block_read(self.process.stderr)

            try:
                self.stdoutdata += stdout.decode("utf-8")
                self.stderrdata += stderr.decode("utf-8")
            except AttributeError:
                pass

            if (now - self.check_time) > self.timeout:
                logger.warning(
                    "Process pid=%d spent too much time: %.2f seconds",
                    self.process.pid, now - self.check_time)
                self._in_timeout = True
                self._kill()
                self.status = ACT_STATUS_TIMEOUT
                self.execution_time = now - self.check_time
                self.exit_status = 3

                if self.log_actions:
                    if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING':
                        logger.warning("Action '%s' exited on timeout (%d s)",
                                       self.command, self.timeout)
                    else:
                        logger.info("Action '%s' exited on timeout (%d s)",
                                    self.command, self.timeout)

                # Do not keep the process objcet
                del self.process

                # Replace stdout with stderr if stdout is empty
                self.stdoutdata = self.stdoutdata.strip()
                if not self.stdoutdata:
                    self.stdoutdata = self.stderrdata

                # Now grep what we want in the output
                self.get_outputs(self.stdoutdata, max_plugins_output_length)

                # We can clean the useless properties now
                del self.stdoutdata
                del self.stderrdata

                # Get the user and system time
                _, _, n_child_utime, n_child_stime, _ = os.times()
                self.u_time = n_child_utime - child_utime
                self.s_time = n_child_stime - child_stime

                return
            return

        logger.debug("Process pid=%d exited with %d", self.process.pid,
                     self.process.returncode)

        if fcntl:
            # Get standard outputs in non blocking mode from the process streams
            stdout = no_block_read(self.process.stdout)
            stderr = no_block_read(self.process.stderr)
        else:
            # Get standard outputs from the communicate function
            (stdout, stderr) = self.process.communicate()

        try:
            self.stdoutdata += stdout.decode("utf-8")
        except (UnicodeDecodeError, AttributeError):
            self.stdoutdata += stdout

        try:
            self.stderrdata += stderr.decode("utf-8")
        except (UnicodeDecodeError, AttributeError):
            self.stderrdata += stderr

        self.exit_status = self.process.returncode
        if self.log_actions:
            if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING':
                logger.warning("Action '%s' exited with code %d", self.command,
                               self.exit_status)
            else:
                logger.info("Action '%s' exited with code %d", self.command,
                            self.exit_status)

        # We do not need the process now
        del self.process

        # check for bad syntax in command line:
        if (self.stderrdata.find('sh: -c: line 0: unexpected EOF') >= 0 or
            (self.stderrdata.find('sh: -c: ') >= 0
             and self.stderrdata.find(': Syntax') >= 0 or
             self.stderrdata.find('Syntax error: Unterminated quoted string')
             >= 0)):
            logger.warning("Bad syntax in command line!")
            # Very, very ugly. But subprocess._handle_exitstatus does
            # not see a difference between a regular "exit 1" and a
            # bailing out shell. Strange, because strace clearly shows
            # a difference. (exit_group(1) vs. exit_group(257))
            self.stdoutdata = self.stdoutdata + self.stderrdata
            self.exit_status = 3

        # Make sure that exit code is a valid exit code
        if self.exit_status not in VALID_EXIT_STATUS:
            self.exit_status = 3

        # Replace stdout with stderr if stdout is empty
        self.stdoutdata = self.stdoutdata.strip()
        if not self.stdoutdata:
            self.stdoutdata = self.stderrdata

        # Now grep what we want in the output
        self.get_outputs(self.stdoutdata, max_plugins_output_length)

        # We can clean the useless properties now
        del self.stdoutdata
        del self.stderrdata

        self.status = ACT_STATUS_DONE
        self.execution_time = time.time() - self.check_time

        # Also get the system and user times
        _, _, n_child_utime, n_child_stime, _ = os.times()
        self.u_time = n_child_utime - child_utime
        self.s_time = n_child_stime - child_stime

    def copy_shell__(self, new_i):
        """Create all attributes listed in 'ONLY_COPY_PROP' and return `self` with these attributes.

        :param new_i: object to
        :type new_i: object
        :return: object with new properties added
        :rtype: object
        """
        for prop in ONLY_COPY_PROP:
            setattr(new_i, prop, getattr(self, prop))
        return new_i

    def got_shell_characters(self):
        """Check if the command_attribute (command line) has shell characters
        Shell characters are : '!', '$', '^', '&', '*', '(', ')', '~', '[', ']',
                               '|', '{', '}', ';', '<', '>', '?', '`'

        :return: True if one shell character is found, False otherwise
        :rtype: bool
        """
        return any(c in SHELLCHARS for c in self.command)

    def _execute(self, force_shell=False):
        """Execute action in a subprocess

        :return: None
        """
        pass

    def _kill(self):
        """Kill the action and close fds
        :return: None
        """
        pass
コード例 #12
0
class Acknowledge(FusionsupervisionObject):  # pylint: disable=R0903
    """
    Allows you to acknowledge the current problem for the specified service.
    By acknowledging the current problem, future notifications (for the same
    service state) are disabled.

    If the acknowledge is "sticky", the acknowledgement will remain until
    the service returns to an OK state. Otherwise the acknowledgement will automatically
    be removed when the service state changes.

    If the acknowledge is "notify", a notification will be sent out to contacts
    indicating that the current service problem has been acknowledged and when the
    acknowledge is cleared.
    """

    my_type = 'acknowledge'
    properties = {
        'sticky': BoolProp(default=True),
        'notify': BoolProp(default=False),
        'end_time': IntegerProp(default=0),
        'author': StringProp(default=u'FusionSupervision Engine'),
        'comment': StringProp(default=u''),
        'comment_id': StringProp(default=u'')
    }

    def __init__(self, params=None, parsing=False):
        super(Acknowledge, self).__init__(params, parsing=parsing)

        self.fill_default()

    def serialize(self):
        """This function serialize into a simple dict object.
        It is used when transferring data to other daemons over the network (http)

        Here we directly return all attributes

        :return: json representation of a Acknowledge
        :rtype: dict
        """
        return {
            'uuid': self.uuid,
            'ref': self.ref,
            'sticky': self.sticky,
            'notify': self.notify,
            'end_time': self.end_time,
            'author': self.author,
            'comment': self.comment
        }

    def get_raise_brok(self, host_name, service_name=''):
        """Get a start acknowledge brok

        :param host_name:
        :param service_name:
        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        data = self.serialize()
        data['host'] = host_name
        if service_name != '':
            data['service'] = service_name

        return Brok({'type': 'acknowledge_raise', 'data': data})

    def get_expire_brok(self, host_name, service_name=''):
        """Get an expire acknowledge brok

        :type item: item
        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        data = self.serialize()
        data['host'] = host_name
        if service_name != '':
            data['service'] = service_name

        return Brok({'type': 'acknowledge_expire', 'data': data})
コード例 #13
0
class Timeperiod(Item):
    """
    Class to manage a timeperiod
    A timeperiod is defined with range time (hours) of week to do action
    and add day exceptions (like non working days)
    """
    my_type = 'timeperiod'

    properties = Item.properties.copy()
    properties.update({
        'timeperiod_name':
        StringProp(fill_brok=['full_status']),
        'alias':
        StringProp(default=u'', fill_brok=['full_status']),
        'use':
        ListProp(default=[]),
        'register':
        IntegerProp(default=1),

        # These are needed if a broker module calls methods on timeperiod objects
        'dateranges':
        ListProp(default=[], fill_brok=['full_status']),
        'exclude':
        ListProp(default=[], fill_brok=['full_status']),
        'unresolved':
        ListProp(default=[], fill_brok=['full_status']),
        'invalid_entries':
        ListProp(default=[], fill_brok=['full_status']),
        'is_active':
        BoolProp(default=False),
        'activated_once':
        BoolProp(default=False),
    })
    running_properties = Item.running_properties.copy()

    def __init__(self, params=None, parsing=True):

        if params is None:
            params = {}

        # Get standard params
        standard_params = dict([(k, v) for k, v in list(params.items())
                                if k in self.__class__.properties])
        # Get timeperiod params (monday, tuesday, ...)
        timeperiod_params = dict([(k, v) for k, v in list(params.items())
                                  if k not in self.__class__.properties])

        if 'dateranges' in standard_params and isinstance(standard_params['dateranges'], list) \
                and standard_params['dateranges'] \
                and isinstance(standard_params['dateranges'][0], dict):
            new_list = []
            for elem in standard_params['dateranges']:
                cls = get_fusionsupervision_class(
                    elem['__sys_python_module__'])
                if cls:
                    new_list.append(cls(elem['content']))
            # We recreate the object
            self.dateranges = new_list
            # And remove prop, to prevent from being overridden
            del standard_params['dateranges']
        # Handle standard params
        super(Timeperiod, self).__init__(params=standard_params,
                                         parsing=parsing)
        self.cache = {}  # For tuning purpose only
        self.invalid_cache = {}  # same but for invalid search

        # We use the uuid presence to assume we are reserializing
        if 'uuid' in params:
            self.uuid = params['uuid']
        else:
            # Initial creation here, uuid already created in super
            self.unresolved = []
            self.dateranges = []
            self.exclude = []
            self.invalid_entries = []
            self.is_active = False
            self.activated_once = False

            # Handle timeperiod params
            for key, value in list(timeperiod_params.items()):
                if isinstance(value, list):
                    if value:
                        value = value[-1]
                    else:
                        value = ''
                self.unresolved.append(key + ' ' + value)

    def serialize(self):
        """This function serialize into a simple dict object.
        It is used when transferring data to other daemons over the network (http)

        Here we directly return all attributes

        :return: json representation of a Timeperiod
        :rtype: dict
        """
        res = super(Timeperiod, self).serialize()

        res['dateranges'] = []
        for elem in self.dateranges:
            res['dateranges'].append({
                '__sys_python_module__':
                "%s.%s" % (elem.__module__, elem.__class__.__name__),
                'content':
                elem.serialize()
            })

        return res

    def get_name(self):
        """
        Get the name of the timeperiod

        :return: the timeperiod name string
        :rtype: str
        """
        return getattr(self, 'timeperiod_name', 'unknown_timeperiod')

    def get_raw_import_values(self):  # pragma: no cover, deprecation
        """
        Get some properties of timeperiod (timeperiod is a bit different
        from classic item)

        TODO: never called anywhere, still useful?

        :return: a dictionnary of some properties
        :rtype: dict
        """
        properties = ['timeperiod_name', 'alias', 'use', 'register']
        res = {}
        for prop in properties:
            if hasattr(self, prop):
                val = getattr(self, prop)
                res[prop] = val
        # Now the unresolved one. The only way to get ride of same key things is to put
        # directly the full value as the key
        for other in self.unresolved:
            res[other] = ''
        return res

    def is_time_valid(self, timestamp):
        """
        Check if a time is valid or not

        :return: time is valid or not
        :rtype: bool
        """
        if hasattr(self, 'exclude'):
            for daterange in self.exclude:
                if daterange.is_time_valid(timestamp):
                    return False
        for daterange in self.dateranges:
            if daterange.is_time_valid(timestamp):
                return True
        return False

    # will give the first time > t which is valid
    def get_min_from_t(self, timestamp):
        """
        Get the first time > timestamp which is valid

        :param timestamp: number of seconds
        :type timestamp: int
        :return: number of seconds
        :rtype: int
        TODO: not used, so delete it
        """
        mins_incl = []
        for daterange in self.dateranges:
            mins_incl.append(daterange.get_min_from_t(timestamp))
        return min(mins_incl)

    # will give the first time > t which is not valid
    def get_not_in_min_from_t(self, first):
        """

        :return: None
        TODO: not used, so delete it
        """
        pass

    def find_next_valid_time_from_cache(self, timestamp):
        """
        Get the next valid time from cache

        :param timestamp: number of seconds
        :type timestamp: int
        :return: Nothing or time in seconds
        :rtype: None or int
        """
        try:
            return self.cache[timestamp]
        except KeyError:
            return None

    def find_next_invalid_time_from_cache(self, timestamp):
        """
        Get the next invalid time from cache

        :param timestamp: number of seconds
        :type timestamp: int
        :return: Nothing or time in seconds
        :rtype: None or int
        """
        try:
            return self.invalid_cache[timestamp]
        except KeyError:
            return None

    def check_and_log_activation_change(self):
        """
        Will look for active/un-active change of timeperiod.
        In case it change, we log it like:
        [1327392000] TIMEPERIOD TRANSITION: <name>;<from>;<to>

        States of is_active:
        -1: default value when start
        0: when timeperiod end
        1: when timeperiod start

        :return: None or a brok if TP changed
        """
        now = int(time.time())

        was_active = self.is_active
        self.is_active = self.is_time_valid(now)

        # If we got a change, log it!
        if self.is_active != was_active:
            _from = 0
            _to = 0
            # If it's the start, get a special value for was
            if not self.activated_once:
                _from = -1
                self.activated_once = True
            if was_active:
                _from = 1
            if self.is_active:
                _to = 1

            # Now raise the log
            brok = make_monitoring_log(
                'info', 'TIMEPERIOD TRANSITION: %s;%d;%d' %
                (self.get_name(), _from, _to))
            return brok
        return None

    def clean_cache(self):
        """
        Clean cache with entries older than now because not used in future ;)

        :return: None
        """
        now = int(time.time())
        t_to_del = []
        for timestamp in self.cache:
            if timestamp < now:
                t_to_del.append(timestamp)
        for timestamp in t_to_del:
            del self.cache[timestamp]

        # same for the invalid cache
        t_to_del = []
        for timestamp in self.invalid_cache:
            if timestamp < now:
                t_to_del.append(timestamp)
        for timestamp in t_to_del:
            del self.invalid_cache[timestamp]

    def get_next_valid_time_from_t(self, timestamp):
        # pylint: disable=too-many-branches
        """
        Get next valid time. If it's in cache, get it, otherwise define it.
        The limit to find it is 1 year.

        :param timestamp: number of seconds
        :type timestamp: int or float
        :return: Nothing or time in seconds
        :rtype: None or int
        """
        timestamp = int(timestamp)
        original_t = timestamp

        res_from_cache = self.find_next_valid_time_from_cache(timestamp)
        if res_from_cache is not None:
            return res_from_cache

        still_loop = True

        # Loop for all minutes...
        while still_loop:
            local_min = None

            # Ok, not in cache...
            dr_mins = []

            for daterange in self.dateranges:
                dr_mins.append(daterange.get_next_valid_time_from_t(timestamp))

            s_dr_mins = sorted([d for d in dr_mins if d is not None])

            for t01 in s_dr_mins:
                if not self.exclude and still_loop:
                    # No Exclude so we are good
                    local_min = t01
                    still_loop = False
                else:
                    for timeperiod in self.exclude:
                        if not timeperiod.is_time_valid(t01) and still_loop:
                            # OK we found a date that is not valid in any exclude timeperiod
                            local_min = t01
                            still_loop = False

            if local_min is None:
                # Looking for next invalid date
                exc_mins = []
                if s_dr_mins != []:
                    for timeperiod in self.exclude:
                        exc_mins.append(
                            timeperiod.get_next_invalid_time_from_t(
                                s_dr_mins[0]))

                s_exc_mins = sorted([d for d in exc_mins if d is not None])

                if s_exc_mins != []:
                    local_min = s_exc_mins[0]

            if local_min is None:
                still_loop = False
            else:
                timestamp = local_min
                # No loop more than one year
                if timestamp > original_t + 3600 * 24 * 366 + 1:
                    still_loop = False
                    local_min = None

        # Ok, we update the cache...
        self.cache[original_t] = local_min
        return local_min

    def get_next_invalid_time_from_t(self, timestamp):
        # pylint: disable=too-many-branches
        """
        Get the next invalid time

        :param timestamp: timestamp in seconds (of course)
        :type timestamp: int or float
        :return: timestamp of next invalid time
        :rtype: int or float
        """
        timestamp = int(timestamp)
        original_t = timestamp

        dr_mins = []
        for daterange in self.dateranges:
            timestamp = original_t
            cont = True
            while cont:
                start = daterange.get_next_valid_time_from_t(timestamp)
                if start is not None:
                    end = daterange.get_next_invalid_time_from_t(start)
                    dr_mins.append((start, end))
                    timestamp = end
                else:
                    cont = False
                if timestamp > original_t + (3600 * 24 * 365):
                    cont = False
        periods = merge_periods(dr_mins)

        # manage exclude periods
        dr_mins = []
        for exclude in self.exclude:
            for daterange in exclude.dateranges:
                timestamp = original_t
                cont = True
                while cont:
                    start = daterange.get_next_valid_time_from_t(timestamp)
                    if start is not None:
                        end = daterange.get_next_invalid_time_from_t(start)
                        dr_mins.append((start, end))
                        timestamp = end
                    else:
                        cont = False
                    if timestamp > original_t + (3600 * 24 * 365):
                        cont = False
        if not dr_mins:
            periods_exclude = []
        else:
            periods_exclude = merge_periods(dr_mins)

        if len(periods) >= 1:
            # if first valid period is after original timestamp, the first invalid time
            # is the original timestamp
            if periods[0][0] > original_t:
                return original_t
            # check the first period + first period of exclude
            if len(periods_exclude) >= 1:
                if periods_exclude[0][0] < periods[0][1]:
                    return periods_exclude[0][0]
            return periods[0][1]
        return original_t

    def is_correct(self):
        """Check if this object configuration is correct ::

        * Check if dateranges of timeperiod are valid
        * Call our parent class is_correct checker

        :return: True if the configuration is correct, otherwise False if at least one daterange
        is not correct
        :rtype: bool
        """
        state = True
        for daterange in self.dateranges:
            good = daterange.is_correct()
            if not good:
                self.add_error("[timeperiod::%s] invalid daterange '%s'" %
                               (self.get_name(), daterange))
            state &= good

        # Warn about non correct entries
        for entry in self.invalid_entries:
            self.add_error("[timeperiod::%s] invalid entry '%s'" %
                           (self.get_name(), entry))

        return super(Timeperiod, self).is_correct() and state

    def __str__(self):  # pragma: no cover
        """
        Get readable object

        :return: this object in readable format
        :rtype: str
        """
        string = ''
        string += str(self.__dict__) + '\n'
        for elt in self.dateranges:
            string += str(elt)
            (start, end) = elt.get_start_and_end_time()
            start = time.asctime(time.localtime(start))
            end = time.asctime(time.localtime(end))
            string += "\nStart and end:" + str((start, end))
        string += '\nExclude'
        for elt in self.exclude:
            string += str(elt)

        return string

    def resolve_daterange(self, dateranges, entry):
        # pylint: disable=too-many-return-statements,too-many-statements,
        # pylint: disable=too-many-branches,too-many-locals
        """
        Try to solve dateranges (special cases)

        :param dateranges: dateranges
        :type dateranges: list
        :param entry: property of timeperiod
        :type entry: string
        :return: None
        """
        res = re.search(
            r'(\d{4})-(\d{2})-(\d{2}) - (\d{4})-(\d{2})-(\d{2}) / (\d+)[\s\t]*([0-9:, -]+)',
            entry)
        if res is not None:
            (syear, smon, smday, eyear, emon, emday, skip_interval,
             other) = res.groups()
            data = {
                'syear': syear,
                'smon': smon,
                'smday': smday,
                'swday': 0,
                'swday_offset': 0,
                'eyear': eyear,
                'emon': emon,
                'emday': emday,
                'ewday': 0,
                'ewday_offset': 0,
                'skip_interval': skip_interval,
                'other': other
            }
            dateranges.append(CalendarDaterange(data))
            return

        res = re.search(r'(\d{4})-(\d{2})-(\d{2}) / (\d+)[\s\t]*([0-9:, -]+)',
                        entry)
        if res is not None:
            (syear, smon, smday, skip_interval, other) = res.groups()
            eyear = syear
            emon = smon
            emday = smday
            data = {
                'syear': syear,
                'smon': smon,
                'smday': smday,
                'swday': 0,
                'swday_offset': 0,
                'eyear': eyear,
                'emon': emon,
                'emday': emday,
                'ewday': 0,
                'ewday_offset': 0,
                'skip_interval': skip_interval,
                'other': other
            }
            dateranges.append(CalendarDaterange(data))
            return

        res = re.search(
            r'(\d{4})-(\d{2})-(\d{2}) - (\d{4})-(\d{2})-(\d{2})[\s\t]*([0-9:, -]+)',
            entry)
        if res is not None:
            (syear, smon, smday, eyear, emon, emday, other) = res.groups()
            data = {
                'syear': syear,
                'smon': smon,
                'smday': smday,
                'swday': 0,
                'swday_offset': 0,
                'eyear': eyear,
                'emon': emon,
                'emday': emday,
                'ewday': 0,
                'ewday_offset': 0,
                'skip_interval': 0,
                'other': other
            }
            dateranges.append(CalendarDaterange(data))
            return

        res = re.search(r'(\d{4})-(\d{2})-(\d{2})[\s\t]*([0-9:, -]+)', entry)
        if res is not None:
            (syear, smon, smday, other) = res.groups()
            eyear = syear
            emon = smon
            emday = smday
            data = {
                'syear': syear,
                'smon': smon,
                'smday': smday,
                'swday': 0,
                'swday_offset': 0,
                'eyear': eyear,
                'emon': emon,
                'emday': emday,
                'ewday': 0,
                'ewday_offset': 0,
                'skip_interval': 0,
                'other': other
            }
            dateranges.append(CalendarDaterange(data))
            return

        res = re.search(
            r'([a-z]*) ([\d-]+) ([a-z]*) - ([a-z]*) ([\d-]+) ([a-z]*) / (\d+)[\s\t]*([0-9:, -]+)',
            entry)
        if res is not None:
            (swday, swday_offset, smon, ewday, ewday_offset, emon,
             skip_interval, other) = res.groups()
            smon_id = Daterange.get_month_id(smon)
            emon_id = Daterange.get_month_id(emon)
            swday_id = Daterange.get_weekday_id(swday)
            ewday_id = Daterange.get_weekday_id(ewday)
            data = {
                'syear': 0,
                'smon': smon_id,
                'smday': 0,
                'swday': swday_id,
                'swday_offset': swday_offset,
                'eyear': 0,
                'emon': emon_id,
                'emday': 0,
                'ewday': ewday_id,
                'ewday_offset': ewday_offset,
                'skip_interval': skip_interval,
                'other': other
            }
            dateranges.append(MonthWeekDayDaterange(data))
            return

        res = re.search(
            r'([a-z]*) ([\d-]+) - ([a-z]*) ([\d-]+) / (\d+)[\s\t]*([0-9:, -]+)',
            entry)
        if res is not None:
            (t00, smday, t01, emday, skip_interval, other) = res.groups()
            if t00 in Daterange.weekdays and t01 in Daterange.weekdays:
                swday = Daterange.get_weekday_id(t00)
                ewday = Daterange.get_weekday_id(t01)
                swday_offset = smday
                ewday_offset = emday
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': 0,
                    'swday': swday,
                    'swday_offset': swday_offset,
                    'eyear': 0,
                    'emon': 0,
                    'emday': 0,
                    'ewday': ewday,
                    'ewday_offset': ewday_offset,
                    'skip_interval': skip_interval,
                    'other': other
                }
                dateranges.append(WeekDayDaterange(data))
                return

            if t00 in Daterange.months and t01 in Daterange.months:
                smon = Daterange.get_month_id(t00)
                emon = Daterange.get_month_id(t01)
                data = {
                    'syear': 0,
                    'smon': smon,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': emon,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': skip_interval,
                    'other': other
                }
                dateranges.append(MonthDateDaterange(data))
                return

            if t00 == 'day' and t01 == 'day':
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': 0,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': skip_interval,
                    'other': other
                }
                dateranges.append(MonthDayDaterange(data))
                return

        res = re.search(
            r'([a-z]*) ([\d-]+) - ([\d-]+) / (\d+)[\s\t]*([0-9:, -]+)', entry)
        if res is not None:
            (t00, smday, emday, skip_interval, other) = res.groups()
            if t00 in Daterange.weekdays:
                swday = Daterange.get_weekday_id(t00)
                swday_offset = smday
                ewday = swday
                ewday_offset = emday
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': 0,
                    'swday': swday,
                    'swday_offset': swday_offset,
                    'eyear': 0,
                    'emon': 0,
                    'emday': 0,
                    'ewday': ewday,
                    'ewday_offset': ewday_offset,
                    'skip_interval': skip_interval,
                    'other': other
                }
                dateranges.append(WeekDayDaterange(data))
                return

            if t00 in Daterange.months:
                smon = Daterange.get_month_id(t00)
                emon = smon
                data = {
                    'syear': 0,
                    'smon': smon,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': emon,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': skip_interval,
                    'other': other
                }
                dateranges.append(MonthDateDaterange(data))
                return

            if t00 == 'day':
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': 0,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': skip_interval,
                    'other': other
                }
                dateranges.append(MonthDayDaterange(data))
                return

        res = re.search(
            r'([a-z]*) ([\d-]+) ([a-z]*) - ([a-z]*) ([\d-]+) ([a-z]*) [\s\t]*([0-9:, -]+)',
            entry)
        if res is not None:
            (swday, swday_offset, smon, ewday, ewday_offset, emon,
             other) = res.groups()
            smon_id = Daterange.get_month_id(smon)
            emon_id = Daterange.get_month_id(emon)
            swday_id = Daterange.get_weekday_id(swday)
            ewday_id = Daterange.get_weekday_id(ewday)
            data = {
                'syear': 0,
                'smon': smon_id,
                'smday': 0,
                'swday': swday_id,
                'swday_offset': swday_offset,
                'eyear': 0,
                'emon': emon_id,
                'emday': 0,
                'ewday': ewday_id,
                'ewday_offset': ewday_offset,
                'skip_interval': 0,
                'other': other
            }
            dateranges.append(MonthWeekDayDaterange(data))
            return

        res = re.search(r'([a-z]*) ([\d-]+) - ([\d-]+)[\s\t]*([0-9:, -]+)',
                        entry)
        if res is not None:
            (t00, smday, emday, other) = res.groups()
            if t00 in Daterange.weekdays:
                swday = Daterange.get_weekday_id(t00)
                swday_offset = smday
                ewday = swday
                ewday_offset = emday
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': 0,
                    'swday': swday,
                    'swday_offset': swday_offset,
                    'eyear': 0,
                    'emon': 0,
                    'emday': 0,
                    'ewday': ewday,
                    'ewday_offset': ewday_offset,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(WeekDayDaterange(data))
                return

            if t00 in Daterange.months:
                smon = Daterange.get_month_id(t00)
                emon = smon
                data = {
                    'syear': 0,
                    'smon': smon,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': emon,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(MonthDateDaterange(data))
                return

            if t00 == 'day':
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': 0,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(MonthDayDaterange(data))
                return

        res = re.search(
            r'([a-z]*) ([\d-]+) - ([a-z]*) ([\d-]+)[\s\t]*([0-9:, -]+)', entry)
        if res is not None:
            (t00, smday, t01, emday, other) = res.groups()
            if t00 in Daterange.weekdays and t01 in Daterange.weekdays:
                swday = Daterange.get_weekday_id(t00)
                ewday = Daterange.get_weekday_id(t01)
                swday_offset = smday
                ewday_offset = emday
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': 0,
                    'swday': swday,
                    'swday_offset': swday_offset,
                    'eyear': 0,
                    'emon': 0,
                    'emday': 0,
                    'ewday': ewday,
                    'ewday_offset': ewday_offset,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(WeekDayDaterange(data))
                return

            if t00 in Daterange.months and t01 in Daterange.months:
                smon = Daterange.get_month_id(t00)
                emon = Daterange.get_month_id(t01)
                data = {
                    'syear': 0,
                    'smon': smon,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': emon,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(MonthDateDaterange(data))
                return

            if t00 == 'day' and t01 == 'day':
                data = {
                    'syear': 0,
                    'smon': 0,
                    'smday': smday,
                    'swday': 0,
                    'swday_offset': 0,
                    'eyear': 0,
                    'emon': 0,
                    'emday': emday,
                    'ewday': 0,
                    'ewday_offset': 0,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(MonthDayDaterange(data))
                return

        res = re.search(r'([a-z]*) ([\d-]+) ([a-z]*)[\s\t]*([0-9:, -]+)',
                        entry)
        if res is not None:
            (t00, t02, t01, other) = res.groups()
            if t00 in Daterange.weekdays and t01 in Daterange.months:
                swday = Daterange.get_weekday_id(t00)
                smon = Daterange.get_month_id(t01)
                emon = smon
                ewday = swday
                ewday_offset = t02
                data = {
                    'syear': 0,
                    'smon': smon,
                    'smday': 0,
                    'swday': swday,
                    'swday_offset': t02,
                    'eyear': 0,
                    'emon': emon,
                    'emday': 0,
                    'ewday': ewday,
                    'ewday_offset': ewday_offset,
                    'skip_interval': 0,
                    'other': other
                }
                dateranges.append(MonthWeekDayDaterange(data))
                return
            if not t01:
                if t00 in Daterange.weekdays:
                    swday = Daterange.get_weekday_id(t00)
                    swday_offset = t02
                    ewday = swday
                    ewday_offset = swday_offset
                    data = {
                        'syear': 0,
                        'smon': 0,
                        'smday': 0,
                        'swday': swday,
                        'swday_offset': swday_offset,
                        'eyear': 0,
                        'emon': 0,
                        'emday': 0,
                        'ewday': ewday,
                        'ewday_offset': ewday_offset,
                        'skip_interval': 0,
                        'other': other
                    }
                    dateranges.append(WeekDayDaterange(data))
                    return
                if t00 in Daterange.months:
                    smon = Daterange.get_month_id(t00)
                    emon = smon
                    emday = t02
                    data = {
                        'syear': 0,
                        'smon': smon,
                        'smday': t02,
                        'swday': 0,
                        'swday_offset': 0,
                        'eyear': 0,
                        'emon': emon,
                        'emday': emday,
                        'ewday': 0,
                        'ewday_offset': 0,
                        'skip_interval': 0,
                        'other': other
                    }
                    dateranges.append(MonthDateDaterange(data))
                    return
                if t00 == 'day':
                    emday = t02
                    data = {
                        'syear': 0,
                        'smon': 0,
                        'smday': t02,
                        'swday': 0,
                        'swday_offset': 0,
                        'eyear': 0,
                        'emon': 0,
                        'emday': emday,
                        'ewday': 0,
                        'ewday_offset': 0,
                        'skip_interval': 0,
                        'other': other
                    }
                    dateranges.append(MonthDayDaterange(data))
                    return

        res = re.search(r'([a-z]*)[\s\t]+([0-9:, -]+)', entry)
        if res is not None:
            (t00, other) = res.groups()
            if t00 in Daterange.weekdays:
                day = t00
                data = {'day': day, 'other': other}
                dateranges.append(StandardDaterange(data))
                return
        logger.info("[timeentry::%s] no match for %s", self.get_name(), entry)
        self.invalid_entries.append(entry)

    def apply_inheritance(self):
        """
        Inherite no properties and no custom variables for timeperiod

        :return: None
        """
        pass

    def explode(self):
        """
        Try to resolve all unresolved elements

        :return: None
        """
        for entry in self.unresolved:
            self.resolve_daterange(self.dateranges, entry)
        self.unresolved = []

    def linkify(self, timeperiods):
        """
        Will make timeperiod in exclude with id of the timeperiods

        :param timeperiods: Timeperiods object
        :type timeperiods:
        :return: None
        """
        new_exclude = []
        if hasattr(self, 'exclude') and self.exclude != []:
            logger.debug("[timeentry::%s] have excluded %s", self.get_name(),
                         self.exclude)
            excluded_tps = self.exclude
            for tp_name in excluded_tps:
                timepriod = timeperiods.find_by_name(tp_name.strip())
                if timepriod is not None:
                    new_exclude.append(timepriod.uuid)
                else:
                    msg = "[timeentry::%s] unknown %s timeperiod" % (
                        self.get_name(), tp_name)
                    self.add_error(msg)
        self.exclude = new_exclude

    def check_exclude_rec(self):
        # pylint: disable=access-member-before-definition
        """
        Check if this timeperiod is tagged

        :return: if tagged return false, if not true
        :rtype: bool
        """
        if self.rec_tag:
            msg = "[timeentry::%s] is in a loop in exclude parameter" % (
                self.get_name())
            self.add_error(msg)
            return False
        self.rec_tag = True
        for timeperiod in self.exclude:
            timeperiod.check_exclude_rec()
        return True

    def fill_data_brok_from(self, data, brok_type):
        """
        Add timeperiods from brok

        :param data: timeperiod dictionnary
        :type data: dict
        :param brok_type: brok type
        :type brok_type: string
        :return: None
        """
        cls = self.__class__
        # Now config properties
        for prop, entry in list(cls.properties.items()):
            # Is this property intended for broking?
            # if 'fill_brok' in entry:
            if brok_type in entry.fill_brok:
                if hasattr(self, prop):
                    data[prop] = getattr(self, prop)
                elif entry.has_default:
                    data[prop] = entry.default
コード例 #14
0
class Module(Item):
    """
    Class to manage a module
    """
    my_type = 'module'

    properties = Item.properties.copy()
    properties.update({
        'name':
        StringProp(default=u'unset'),
        'type':
        StringProp(default=u'unset'),
        'daemon':
        StringProp(default=u'unset'),
        'python_name':
        StringProp(),
        'enabled':
        BoolProp(default=True),

        # Old "deprecated" property - replaced with name
        'module_alias':
        StringProp(),
        # Old "deprecated" property - replaced with type
        'module_types':
        ListProp(default=[u''], split_on_comma=True),
        # Allow a module to be related some other modules
        'modules':
        ListProp(default=[''], split_on_comma=True),

        # Module log level
        'log_level':
        StringProp(default=u'INFO'),

        # Local statsd daemon for collecting daemon metrics
        'statsd_host':
        StringProp(default=u'localhost'),
        'statsd_port':
        IntegerProp(default=8125),
        'statsd_prefix':
        StringProp(default=u'fusionsupervision'),
        'statsd_enabled':
        BoolProp(default=False)
    })

    macros = {}

    def __init__(self, params=None, parsing=True):
        # Must be declared in this function rather than as class variable. This because the
        # modules may have some properties that are not the same from one instance to another.
        # Other objects very often have the same properties... but not the modules!
        self.properties = Item.properties.copy()
        self.properties.update({
            'name':
            StringProp(default=u'unset'),
            'type':
            StringProp(default=u'unset'),
            'daemon':
            StringProp(default=u'unset'),
            'python_name':
            StringProp(),
            # Old "deprecated" property - replaced with name
            'module_alias':
            StringProp(),
            # Old "deprecated" property - replaced with type
            'module_types':
            ListProp(default=[''], split_on_comma=True),
            # Allow a module to be related some other modules
            'modules':
            ListProp(default=[''], split_on_comma=True),
            'enabled':
            BoolProp(default=True),

            # Module log level
            'log_level':
            StringProp(default=u'INFO'),

            # Local statsd daemon for collecting daemon metrics
            'statsd_host':
            StringProp(default=u'localhost'),
            'statsd_port':
            IntegerProp(default=8125),
            'statsd_prefix':
            StringProp(default=u'fusionsupervision'),
            'statsd_enabled':
            BoolProp(default=False)
        })

        # Manage the missing module name
        if params and 'name' not in params:
            if 'module_alias' in params:
                params['name'] = params['module_alias']
            else:
                params['name'] = "Unnamed"
        if params and 'module_alias' not in params:
            if 'name' in params:
                params['module_alias'] = params['name']
            else:
                params['module_alias'] = "Unnamed"

        super(Module, self).__init__(params, parsing=parsing)

        self.fill_default()

        # Remove extra Item base class properties...
        for prop in [
                'customs', 'plus', 'downtimes', 'old_properties',
                'configuration_errors', 'configuration_warnings'
        ]:
            if getattr(self, prop, None):
                delattr(self, prop)

    def __repr__(self):  # pragma: no cover
        return '<%r %r, module: %r, type(s): %r />' % \
               (self.__class__.__name__, self.name, getattr(self, 'python_name', 'Unknown'),
                getattr(self, 'type', 'Unknown'))

    __str__ = __repr__

    def get_name(self):
        """
        Get name of module

        :return: Name of module
        :rtype: str
        """
        return getattr(self, 'name', self.module_alias)

    def get_types(self):
        """
        Get types of the module

        :return: Types of the module
        :rtype: str
        """
        return getattr(self, 'module_types', 'Untyped module')

    def is_a_module(self, module_type):
        """
        Is the module of the required type?

        :param module_type: module type to check
        :type: str
        :return: True / False
        """
        if hasattr(self, 'type'):
            return module_type in self.type
        return module_type in self.module_types

    def serialize(self):
        """A module may have some properties that are not defined in the class properties list.
        Serializing a module is the same as serializing an Item but we also also include all the
        existing properties that are not defined in the properties or running_properties
        class list.

        We must also exclude the reference to the daemon that loaded the module!
        """
        res = super(Module, self).serialize()

        cls = self.__class__
        for prop in self.__dict__:
            if prop in cls.properties or prop in cls.running_properties or prop in [
                    'properties', 'my_daemon'
            ]:
                continue
            res[prop] = getattr(self, prop)

        return res
コード例 #15
0
    def __init__(self, params=None, parsing=True):
        # Must be declared in this function rather than as class variable. This because the
        # modules may have some properties that are not the same from one instance to another.
        # Other objects very often have the same properties... but not the modules!
        self.properties = Item.properties.copy()
        self.properties.update({
            'name':
            StringProp(default=u'unset'),
            'type':
            StringProp(default=u'unset'),
            'daemon':
            StringProp(default=u'unset'),
            'python_name':
            StringProp(),
            # Old "deprecated" property - replaced with name
            'module_alias':
            StringProp(),
            # Old "deprecated" property - replaced with type
            'module_types':
            ListProp(default=[''], split_on_comma=True),
            # Allow a module to be related some other modules
            'modules':
            ListProp(default=[''], split_on_comma=True),
            'enabled':
            BoolProp(default=True),

            # Module log level
            'log_level':
            StringProp(default=u'INFO'),

            # Local statsd daemon for collecting daemon metrics
            'statsd_host':
            StringProp(default=u'localhost'),
            'statsd_port':
            IntegerProp(default=8125),
            'statsd_prefix':
            StringProp(default=u'fusionsupervision'),
            'statsd_enabled':
            BoolProp(default=False)
        })

        # Manage the missing module name
        if params and 'name' not in params:
            if 'module_alias' in params:
                params['name'] = params['module_alias']
            else:
                params['name'] = "Unnamed"
        if params and 'module_alias' not in params:
            if 'name' in params:
                params['module_alias'] = params['name']
            else:
                params['module_alias'] = "Unnamed"

        super(Module, self).__init__(params, parsing=parsing)

        self.fill_default()

        # Remove extra Item base class properties...
        for prop in [
                'customs', 'plus', 'downtimes', 'old_properties',
                'configuration_errors', 'configuration_warnings'
        ]:
            if getattr(self, prop, None):
                delattr(self, prop)
コード例 #16
0
class Resultmodulation(Item):
    """Resultmodulation class is simply a modulation of a check result exit code
    during a modulation_period.

    """
    my_type = 'resultmodulation'

    properties = Item.properties.copy()
    properties.update({
        'resultmodulation_name': StringProp(),
        'exit_codes_match': IntListProp(default=[]),
        'exit_code_modulation': IntegerProp(default=None),
        'modulation_period': StringProp(default=None),
    })

    special_properties = ('modulation_period', )

    def get_name(self):
        """Accessor to resultmodulation_name attribute

        :return: result modulation name
        :rtype: str
        """
        if hasattr(self, 'resultmodulation_name'):
            return self.resultmodulation_name
        return 'Unnamed'

    def is_active(self, timperiods):
        """
        Know if this result modulation is active now

        :return: True is we are in the period, otherwise False
        :rtype: bool
        """
        now = int(time.time())
        timperiod = timperiods[self.modulation_period]
        if not timperiod or timperiod.is_time_valid(now):
            return True
        return False

    def module_return(self, return_code, timeperiods):
        """Module the exit code if necessary ::

        * modulation_period is legit
        * exit_code_modulation
        * return_code in exit_codes_match

        :param return_code: actual code returned by the check
        :type return_code: int
        :return: return_code modulated if necessary (exit_code_modulation)
        :rtype: int
        """
        # Only if in modulation_period of modulation_period == None
        if self.is_active(timeperiods):
            # Try to change the exit code only if a new one is defined
            if self.exit_code_modulation is not None:
                # First with the exit_code_match
                if return_code in self.exit_codes_match:
                    return_code = self.exit_code_modulation

        return return_code
コード例 #17
0
class Command(Item):
    """
    Class to manage a command
    A command is an external command that a poller module runs to
    check if something is ok or not
    """
    __metaclass__ = AutoSlots

    my_type = "command"

    properties = Item.properties.copy()
    properties.update({
        'command_name': StringProp(fill_brok=['full_status']),
        'command_line': StringProp(fill_brok=['full_status']),
        'poller_tag': StringProp(default=u'None'),
        'reactionner_tag': StringProp(default=u'None'),
        'module_type': StringProp(default=None),
        'timeout': IntegerProp(default=-1),
        'enable_environment_macros': BoolProp(default=False),
    })

    def __init__(self, params=None, parsing=True):

        if params is None:
            params = {}
        super(Command, self).__init__(params, parsing=parsing)

        if not hasattr(self, 'timeout'):
            self.timeout = -1

        if not hasattr(self, 'enable_environment_macros'):
            self.enable_environment_macros = False
        if not hasattr(self, 'poller_tag'):
            self.poller_tag = u'None'
        if not hasattr(self, 'reactionner_tag'):
            self.reactionner_tag = u'None'
        if not hasattr(self, 'module_type'):
            # If the command start with a _, set the module_type
            # as the name of the command, without the _
            if getattr(self, 'command_line', '').startswith('_'):
                # For an internal command...
                self.module_type = u'internal'
                # module_type = getattr(self, 'command_line', '').split(' ')[0]
                # # and we remove the first _
                # self.module_type = module_type[1:]
            # If no command starting with _, be fork :)
            else:
                self.module_type = u'fork'

    def get_name(self):
        """
        Get the name of the command

        :return: the command name string
        :rtype: str
        """
        return self.command_name

    def fill_data_brok_from(self, data, brok_type):
        """
        Add properties to data if fill_brok of these class properties
        is same as brok_type

        :param data: dictionnary of this command
        :type data: dict
        :param brok_type: type of brok
        :type brok_type: str
        :return: None
        """
        cls = self.__class__
        # Now config properties
        for prop, entry in list(cls.properties.items()):
            # Is this property intended for broking?
            # if 'fill_brok' in entry[prop]:
            if brok_type in entry.fill_brok:
                if hasattr(self, prop):
                    data[prop] = getattr(self, prop)
                # elif 'default' in entry[prop]:
                #    data[prop] = entry.default

    def is_correct(self):
        """Check if this object configuration is correct ::

        * Check our own specific properties
        * Call our parent class is_correct checker

        :return: True if the configuration is correct, otherwise False
        :rtype: bool
        """
        state = True

        # _internal_host_check is for having an host check result
        # without running a check plugin
        if self.command_name.startswith('_internal_host_check'):
            # Command line may contain: [state_id][;output]
            parameters = self.command_line.split(';')
            if len(parameters) < 2:
                self.command_name = "_internal_host_check;0;Host assumed to be UP"
                self.add_warning(
                    "[%s::%s] has no defined state nor output. Changed to %s" %
                    (self.my_type, self.command_name, self.command_name))
            elif len(parameters) < 3:
                state = 3
                try:
                    state = int(parameters[1])
                except ValueError:
                    self.add_warning(
                        "[%s::%s] required a non integer state: %s. Using 3." %
                        (self.my_type, self.command_name, parameters[1]))

                if state > 4:
                    self.add_warning(
                        "[%s::%s] required an impossible state: %d. Using 3." %
                        (self.my_type, self.command_name, state))

                output = {
                    0: "UP",
                    1: "DOWN",
                    2: "DOWN",
                    3: "UNKNOWN",
                    4: "UNREACHABLE",
                }[state]
                self.command_name = "_internal_host_check;Host assumed to be %s" % output

                self.add_warning(
                    "[%s::%s] has no defined output. Changed to %s" %
                    (self.my_type, self.command_name, self.command_name))
            elif len(parameters) > 3:
                self.command_name = "%s;%s;%s" % (parameters[0], parameters[1],
                                                  parameters[2])

                self.add_warning(
                    "[%s::%s] has too many parameters. Changed to %s" %
                    (self.my_type, self.command_name, self.command_name))

        return super(Command, self).is_correct() and state
コード例 #18
0
class Notification(Action):  # pylint: disable=R0902
    """Notification class, inherits from action class. Used to notify contacts
     and execute notification command defined in configuration

    """

    # AutoSlots create the __slots__ with properties and
    # running_properties names
    __metaclass__ = AutoSlots

    my_type = 'notification'

    properties = Action.properties.copy()
    properties.update({
        'is_a':
            StringProp(default=u'notification'),
        'start_time':
            IntegerProp(default=0, fill_brok=['full_status']),
        'end_time':
            IntegerProp(default=0, fill_brok=['full_status']),
        'contact_name':
            StringProp(default=u'', fill_brok=['full_status']),
        'host_name':
            StringProp(default=u'', fill_brok=['full_status']),
        'service_description':
            StringProp(default=u'', fill_brok=['full_status']),
        'reason_type':
            IntegerProp(default=1, fill_brok=['full_status']),
        'state':
            IntegerProp(default=0, fill_brok=['full_status']),
        'ack_author':
            StringProp(default=u'', fill_brok=['full_status']),
        'ack_data':
            StringProp(default=u'', fill_brok=['full_status']),
        'escalated':
            BoolProp(default=False, fill_brok=['full_status']),
        'command_call':
            StringProp(default=None),
        'contact':
            StringProp(default=None),
        'notif_nb':
            IntegerProp(default=1),
        'command':
            StringProp(default=u'UNSET'),
        'enable_environment_macros':
            BoolProp(default=False),
        # Keep a list of currently active escalations
        'already_start_escalations':
            SetProp(default=set()),
        'type':
            StringProp(default=u'PROBLEM'),

        # For authored notifications (eg. downtime...)
        'author':
            StringProp(default=u'n/a', fill_brok=['full_status']),
        'author_name':
            StringProp(default=u'n/a', fill_brok=['full_status']),
        'author_alias':
            StringProp(default=u'n/a', fill_brok=['full_status']),
        'author_comment':
            StringProp(default=u'n/a', fill_brok=['full_status']),

        # All contacts that were notified
        'recipients':
            ListProp(default=[])
    })

    macros = {
        'NOTIFICATIONTYPE':             'type',
        'NOTIFICATIONRECIPIENTS':       'recipients',
        'NOTIFICATIONISESCALATED':      'escalated',
        'NOTIFICATIONAUTHOR':           'author',
        'NOTIFICATIONAUTHORNAME':       'author_name',
        'NOTIFICATIONAUTHORALIAS':      'author_alias',
        'NOTIFICATIONCOMMENT':          'author_comment',
        'NOTIFICATIONNUMBER':           'notif_nb',
        'NOTIFICATIONID':               'uuid',
        'HOSTNOTIFICATIONNUMBER':       'notif_nb',
        'HOSTNOTIFICATIONID':           'uuid',
        'SERVICENOTIFICATIONNUMBER':    'notif_nb',
        'SERVICENOTIFICATIONID':        'uuid'
    }

    def __init__(self, params=None, parsing=False):
        super(Notification, self).__init__(params, parsing=parsing)
        self.fill_default()

    def __str__(self):  # pragma: no cover
        return "Notification %s, item: %s, type: %s, status: %s, command:'%s'" \
               % (self.uuid, self.ref, self.type, self.status, self.command)

    def is_administrative(self):
        """Check if this notification is "administrative"

        :return: True in type not in ('PROBLEM', 'RECOVERY'), False otherwise
        :rtype: bool
        """
        if self.type in ('PROBLEM', 'RECOVERY'):
            return False

        return True

    def get_return_from(self, notif):
        """Setter of exit_status and execution_time attributes

        :param notif: notification to get data from
        :type notif: fusionsupervision.notification.Notification
        :return: None
        """
        self.exit_status = notif.exit_status
        self.execution_time = notif.execution_time

    def fill_data_brok_from(self, data, brok_type):
        """Fill data with info of item by looking at brok_type
        in props of properties or running_properties

        :param data: data to fill
        :type data:
        :param brok_type: type of brok
        :type brok_type:
        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        cls = self.__class__
        # Now config properties
        for prop, entry in list(cls.properties.items()):
            if brok_type in entry.fill_brok:
                data[prop] = getattr(self, prop)

    def get_initial_status_brok(self):
        """Get a initial status brok

        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        data = {'uuid': self.uuid}
        self.fill_data_brok_from(data, 'full_status')
        return Brok({'type': 'notification_raise', 'data': data})

    def serialize(self):
        """This function serialize into a simple dict object.
        It is used when transferring data to other daemons over the network (http)

        Here we directly return all attributes

        :return: json representation of a Timeperiod
        :rtype: dict
        """
        res = super(Notification, self).serialize()

        if res['command_call'] is not None:
            if not isinstance(res['command_call'], string_types) and \
                    not isinstance(res['command_call'], dict):
                res['command_call'] = res['command_call'].serialize()
        return res
コード例 #19
0
class Escalation(Item):
    """Escalation class is used to implement notification escalation

    """
    my_type = 'escalation'

    properties = Item.properties.copy()
    properties.update({
        'escalation_name':
            StringProp(),
        'host_name':
            StringProp(default=''),
        'hostgroup_name':
            StringProp(''),
        'service_description':
            StringProp(default=''),
        'first_notification':
            IntegerProp(),
        'last_notification':
            IntegerProp(),
        'first_notification_time':
            IntegerProp(),
        'last_notification_time':
            IntegerProp(),
        # As a default don't use the notification_interval defined in
        # the escalation, but the one defined in the object
        'notification_interval':
            IntegerProp(default=-1),
        'escalation_period':
            StringProp(default=''),
        'escalation_options':
            ListProp(default=['d', 'x', 'r', 'w', 'c'], split_on_comma=True),
        'contacts':
            ListProp(default=[], split_on_comma=True),
        'contact_groups':
            ListProp(default=[], split_on_comma=True),
    })

    running_properties = Item.running_properties.copy()
    running_properties.update({
        'time_based': BoolProp(default=False),
    })

    special_properties = ('contacts', 'contact_groups',
                          'first_notification_time', 'last_notification_time')
    special_properties_time_based = ('contacts', 'contact_groups',
                                     'first_notification', 'last_notification')

    def __init__(self, params=None, parsing=True):
        if params is None:
            params = {}

        for prop in ['escalation_options']:
            if prop in params:
                params[prop] = [p.replace('u', 'x') for p in params[prop]]
        super(Escalation, self).__init__(params, parsing=parsing)

    def get_name(self):
        """Accessor to escalation_name attribute

        :return: escalation name
        :rtype: str
        """
        return self.escalation_name

    def is_eligible(self, timestamp, status, notif_number, in_notif_time, interval, escal_period):
        # pylint: disable=too-many-return-statements
        """Check if the escalation is eligible (notification is escalated or not)

        Escalation is NOT eligible in ONE of the following condition is fulfilled::

        * escalation is not time based and notification number not in range
          [first_notification;last_notification] (if last_notif == 0, it's infinity)
        * escalation is time based and notification time not in range
          [first_notification_time;last_notification_time] (if last_notif_time == 0, it's infinity)
        * status does not matches escalation_options ('WARNING' <=> 'w' ...)
        * escalation_period is not legit for this time (now usually)

        :param timestamp: timestamp to check if timeperiod is valid
        :type timestamp: int
        :param status: item status (one of the small_states key)
        :type status: str
        :param notif_number: current notification number
        :type notif_number: int
        :param in_notif_time: current notification time
        :type in_notif_time: int
        :param interval: time interval length
        :type interval: int
        :return: True if no condition has been fulfilled, otherwise False
        :rtype: bool
        """
        short_states = {
            u'WARNING': 'w', u'UNKNOWN': 'u', u'CRITICAL': 'c',
            u'RECOVERY': 'r', u'FLAPPING': 'f', u'DOWNTIME': 's',
            u'DOWN': 'd', u'UNREACHABLE': 'x', u'OK': 'o', u'UP': 'o'
        }

        # If we are not time based, we check notification numbers:
        if not self.time_based:
            # Begin with the easy cases
            if notif_number < self.first_notification:
                return False

            # self.last_notification = 0 mean no end
            if self.last_notification and notif_number > self.last_notification:
                return False
        # Else we are time based, we must check for the good value
        else:
            # Begin with the easy cases
            if in_notif_time < self.first_notification_time * interval:
                return False

            if self.last_notification_time and \
                    in_notif_time > self.last_notification_time * interval:
                return False

        # If our status is not good, we bail out too
        if status in short_states and short_states[status] not in self.escalation_options:
            return False

        # Maybe the time is not in our escalation_period
        if escal_period is not None and not escal_period.is_time_valid(timestamp):
            return False

        # Ok, I do not see why not escalade. So it's True :)
        return True

    def get_next_notif_time(self, t_wished, status, creation_time, interval, escal_period):
        """Get the next notification time for the escalation
        Only legit for time based escalation

        :param t_wished: time we would like to send a new notification (usually now)
        :type t_wished:
        :param status: status of the host or service
        :type status:
        :param creation_time: time the notification was created
        :type creation_time:
        :param interval: time interval length
        :type interval: int
        :return: timestamp for next notification or None
        :rtype: int | None
        """
        short_states = {u'WARNING': 'w', u'UNKNOWN': 'u', u'CRITICAL': 'c',
                        u'RECOVERY': 'r', u'FLAPPING': 'f', u'DOWNTIME': 's',
                        u'DOWN': 'd', u'UNREACHABLE': 'u', u'OK': 'o', u'UP': 'o'}

        # If we are not time based, we bail out!
        if not self.time_based:
            return None

        # Check if we are valid
        if status in short_states and short_states[status] not in self.escalation_options:
            return None

        # Look for the min of our future validity
        start = self.first_notification_time * interval + creation_time

        # If we are after the classic next time, we are not asking for a smaller interval
        if start > t_wished:
            return None

        # Maybe the time we found is not a valid one....
        if escal_period is not None and not escal_period.is_time_valid(start):
            return None

        # Ok so I ask for my start as a possibility for the next notification time
        return start

    def is_correct(self):
        """Check if this object configuration is correct ::

        * Check our own specific properties
        * Call our parent class is_correct checker

        :return: True if the configuration is correct, otherwise False
        :rtype: bool
        """
        state = True

        # Internal checks before executing inherited function...

        # If we got the _time parameters, we are time based. Unless, we are not :)
        if hasattr(self, 'first_notification_time') or hasattr(self, 'last_notification_time'):
            self.time_based = True

        # Ok now we manage special cases...
        if not hasattr(self, 'contacts') and not hasattr(self, 'contact_groups'):
            self.add_error('%s: I do not have contacts nor contact_groups' % (self.get_name()))
            state = False

        # If time_based or not, we do not check all properties
        if self.time_based:
            if not hasattr(self, 'first_notification_time'):
                self.add_error('%s: I do not have first_notification_time' % (self.get_name()))
                state = False
            if not hasattr(self, 'last_notification_time'):
                self.add_error('%s: I do not have last_notification_time' % (self.get_name()))
                state = False
        else:  # we check classical properties
            if not hasattr(self, 'first_notification'):
                self.add_error('%s: I do not have first_notification' % (self.get_name()))
                state = False
            if not hasattr(self, 'last_notification'):
                self.add_error('%s: I do not have last_notification' % (self.get_name()))
                state = False

        # Change the special_properties definition according to time_based ...
        save_special_properties = self.special_properties
        if self.time_based:
            self.special_properties = self.special_properties_time_based

        state_parent = super(Escalation, self).is_correct()

        if self.time_based:
            self.special_properties = save_special_properties

        return state_parent and state
コード例 #20
0
class Check(Action):  # pylint: disable=R0902
    """Check class implements monitoring concepts of checks :(status, state, output)
    Check instance are used to store monitoring plugins data (exit status, output)
    and used by schedule to raise alert, reschedule check etc.

    """
    # AutoSlots create the __slots__ with properties and
    # running_properties names

    # FIXME : re-enable AutoSlots if possible
    # __metaclass__ = AutoSlots

    my_type = 'check'

    properties = Action.properties.copy()
    properties.update({
        'is_a':
        StringProp(default=u'check'),
        'state':
        IntegerProp(default=0),
        'depend_on':
        ListProp(default=[]),
        'depend_on_me':
        ListProp(default=[], split_on_comma=False),
        'passive_check':
        BoolProp(default=False),
        'freshness_expiry_check':
        BoolProp(default=False),
        'poller_tag':
        StringProp(default=u'None'),
        'dependency_check':
        BoolProp(default=False),
    })

    def __init__(self, params=None, parsing=False):
        super(Check, self).__init__(params, parsing=parsing)

        if self.command.startswith('_'):
            self.internal = True

    def __str__(self):  # pragma: no cover
        return "Check %s %s, item: %s, status: %s, command:'%s'" % \
               (self.uuid, "active" if not self.passive_check else "passive",
                self.ref, self.status, self.command)

    def get_return_from(self, check):
        """Update check data from action (notification for instance)

        :param check: action to get data from
        :type check: fusionsupervision.action.Action
        :return: None
        """
        for prop in [
                'exit_status', 'output', 'long_output', 'check_time',
                'execution_time', 'perf_data', 'u_time', 's_time'
        ]:
            setattr(self, prop, getattr(check, prop))

    def set_type_active(self):
        """Set this check as an active one (indeed, not passive)

        :return: None
        """
        self.passive_check = False

    def set_type_passive(self):
        """Set this check as a passive one

        :return: None
        """
        self.passive_check = True

    def is_dependent(self):
        """Getter for dependency_check attribute

        :return: True if this check was created for a dependent one, False otherwise
        :rtype: bool
        """
        return self.dependency_check

    def serialize(self):
        """This function serializes into a simple dict object.

        The only usage is to send to poller, and it does not need to have the
        depend_on and depend_on_me properties.

        :return: json representation of a Check
        :rtype: dict
        """
        res = super(Check, self).serialize()
        if 'depend_on' in res:
            del res['depend_on']
        if 'depend_on_me' in res:
            del res['depend_on_me']
        return res
コード例 #21
0
class Contact(Item):
    """Host class implements monitoring concepts for contact.
    For example it defines host_notification_period, service_notification_period etc.
    """
    my_type = 'contact'

    properties = Item.properties.copy()
    properties.update({
        'contact_name':
        StringProp(fill_brok=['full_status']),
        'alias':
        StringProp(default=u'', fill_brok=['full_status']),
        'contactgroups':
        ListProp(default=[], fill_brok=['full_status']),
        'host_notifications_enabled':
        BoolProp(default=True, fill_brok=['full_status']),
        'service_notifications_enabled':
        BoolProp(default=True, fill_brok=['full_status']),
        'host_notification_period':
        StringProp(default='', fill_brok=['full_status']),
        'service_notification_period':
        StringProp(default='', fill_brok=['full_status']),
        'host_notification_options':
        ListProp(default=[''], fill_brok=['full_status'], split_on_comma=True),
        'service_notification_options':
        ListProp(default=[''], fill_brok=['full_status'], split_on_comma=True),
        # To be consistent with notificationway object attributes
        'host_notification_commands':
        ListProp(default=[], fill_brok=['full_status']),
        'service_notification_commands':
        ListProp(default=[], fill_brok=['full_status']),
        'min_business_impact':
        IntegerProp(default=0, fill_brok=['full_status']),
        'email':
        StringProp(default=u'none', fill_brok=['full_status']),
        'pager':
        StringProp(default=u'none', fill_brok=['full_status']),
        'address1':
        StringProp(default=u'none', fill_brok=['full_status']),
        'address2':
        StringProp(default=u'none', fill_brok=['full_status']),
        'address3':
        StringProp(default=u'none', fill_brok=['full_status']),
        'address4':
        StringProp(default=u'none', fill_brok=['full_status']),
        'address5':
        StringProp(default=u'none', fill_brok=['full_status']),
        'address6':
        StringProp(default=u'none', fill_brok=['full_status']),
        'can_submit_commands':
        BoolProp(default=False, fill_brok=['full_status']),
        'is_admin':
        BoolProp(default=False, fill_brok=['full_status']),
        'expert':
        BoolProp(default=False, fill_brok=['full_status']),
        'retain_status_information':
        BoolProp(default=True, fill_brok=['full_status']),
        'notificationways':
        ListProp(default=[], fill_brok=['full_status']),
        'password':
        StringProp(default=u'NOPASSWORDSET', fill_brok=['full_status']),
    })

    running_properties = Item.running_properties.copy()
    running_properties.update({
        'modified_attributes':
        IntegerProp(default=0, fill_brok=['full_status'], retention=True),
        'modified_host_attributes':
        IntegerProp(default=0, fill_brok=['full_status'], retention=True),
        'modified_service_attributes':
        IntegerProp(default=0, fill_brok=['full_status'], retention=True),
        'in_scheduled_downtime':
        BoolProp(default=False,
                 fill_brok=['full_status', 'check_result'],
                 retention=True),
        'broks':
        ListProp(default=[]),  # and here broks raised
        'customs':
        DictProp(default={}, fill_brok=['full_status']),
    })

    # This tab is used to transform old parameters name into new ones
    # so from Nagios2 format, to Nagios3 ones.
    # Or FusionSupervision Engine deprecated names like criticity
    old_properties = {
        'min_criticity': 'min_business_impact',
    }

    macros = {
        'CONTACTNAME': 'contact_name',
        'CONTACTALIAS': 'alias',
        'CONTACTEMAIL': 'email',
        'CONTACTPAGER': 'pager',
        'CONTACTADDRESS1': 'address1',
        'CONTACTADDRESS2': 'address2',
        'CONTACTADDRESS3': 'address3',
        'CONTACTADDRESS4': 'address4',
        'CONTACTADDRESS5': 'address5',
        'CONTACTADDRESS6': 'address6',
        'CONTACTGROUPNAME': 'get_groupname',
        'CONTACTGROUPNAMES': 'get_groupnames'
    }

    special_properties = ('service_notification_commands',
                          'host_notification_commands',
                          'service_notification_period',
                          'host_notification_period',
                          'service_notification_options',
                          'host_notification_options', 'contact_name')

    simple_way_parameters = ('service_notification_period',
                             'host_notification_period',
                             'service_notification_options',
                             'host_notification_options',
                             'service_notification_commands',
                             'host_notification_commands',
                             'min_business_impact')

    def __init__(self, params=None, parsing=True):
        if params is None:
            params = {}

        # At deserialization, thoses are dict
        # TODO: Separate parsing instance from recreated ones
        for prop in [
                'service_notification_commands', 'host_notification_commands'
        ]:
            if prop in params and isinstance(params[prop], list) and params[prop] \
                    and isinstance(params[prop][0], dict):
                new_list = [
                    CommandCall(elem, parsing=parsing) for elem in params[prop]
                ]
                # We recreate the object
                setattr(self, prop, new_list)
                # And remove prop, to prevent from being overridden
                del params[prop]
        super(Contact, self).__init__(params, parsing=parsing)

    def __str__(self):  # pragma: no cover
        return '<Contact %s, uuid=%s, use: %s />' \
               % (self.get_name(), self.uuid, getattr(self, 'use', None))

    __repr__ = __str__

    def serialize(self):
        res = super(Contact, self).serialize()

        for prop in [
                'service_notification_commands', 'host_notification_commands'
        ]:
            if getattr(self, prop) is None:
                res[prop] = None
            else:
                res[prop] = [elem.serialize() for elem in getattr(self, prop)]

        return res

    def get_name(self):
        """Get contact name

        :return: contact name
        :rtype: str
        """
        if self.is_tpl():
            return "tpl-%s" % (getattr(self, 'name', 'unnamed'))
        return getattr(self, 'contact_name', 'unnamed')

    def get_groupname(self):
        """
        Get the first group name whose contact belongs to
        :return: group name
        :rtype: str
        """
        if self.contactgroups:
            return self.contactgroups[0]
        return 'Unknown'

    def get_groupnames(self):
        """
        Get all the groups name whose contact belongs to
        :return: comma separated list of the groups names
        :rtype: str
        """
        if self.contactgroups:
            return ', '.join(self.contactgroups)
        return 'Unknown'

    def want_service_notification(self,
                                  notifways,
                                  timeperiods,
                                  timestamp,
                                  state,
                                  n_type,
                                  business_impact,
                                  cmd=None):
        """Check if notification options match the state of the service

        :param timestamp: time we want to notify the contact (usually now)
        :type timestamp: int
        :param state: host or service state ("WARNING", "CRITICAL" ..)
        :type state: str
        :param n_type: type of notification ("PROBLEM", "RECOVERY" ..)
        :type n_type: str
        :param business_impact: impact of this service
        :type business_impact: int
        :param cmd: command launched to notify the contact
        :type cmd: str
        :return: True if contact wants notification, otherwise False
        :rtype: bool
        """
        if not self.service_notifications_enabled:
            return False

        # If we are in downtime, we do not want notification
        for downtime_id in self.downtimes:
            downtime = self.downtimes[downtime_id]
            if downtime.is_in_effect:
                self.in_scheduled_downtime = True
                return False
        self.in_scheduled_downtime = False

        # Now the rest is for sub notificationways. If one is OK, we are ok
        # We will filter in another phase
        for notifway_id in self.notificationways:
            notifway = notifways[notifway_id]
            nw_b = notifway.want_service_notification(timeperiods, timestamp,
                                                      state, n_type,
                                                      business_impact, cmd)
            if nw_b:
                return True

        # Oh... no one is ok for it? so no, sorry
        return False

    def want_host_notification(self,
                               notifways,
                               timeperiods,
                               timestamp,
                               state,
                               n_type,
                               business_impact,
                               cmd=None):
        """Check if notification options match the state of the host

        :param timestamp: time we want to notify the contact (usually now)
        :type timestamp: int
        :param state: host or service state ("UP", "DOWN" ..)
        :type state: str
        :param n_type: type of notification ("PROBLEM", "RECOVERY" ..)
        :type n_type: str
        :param business_impact: impact of this host
        :type business_impact: int
        :param cmd: command launch to notify the contact
        :type cmd: str
        :return: True if contact wants notification, otherwise False
        :rtype: bool
        """
        if not self.host_notifications_enabled:
            return False

        # If we are in downtime, we do not want notification
        for downtime in self.downtimes:
            if downtime.is_in_effect:
                self.in_scheduled_downtime = True
                return False
        self.in_scheduled_downtime = False

        # Now it's all for sub notificationways. If one is OK, we are OK
        # We will filter in another phase
        for notifway_id in self.notificationways:
            notifway = notifways[notifway_id]
            nw_b = notifway.want_host_notification(timeperiods, timestamp,
                                                   state, n_type,
                                                   business_impact, cmd)
            if nw_b:
                return True

        # Oh, nobody..so NO :)
        return False

    def get_notification_commands(self, notifways, n_type, command_name=False):
        """Get notification commands for object type

        :param notifways: list of fusionsupervision.objects.NotificationWay objects
        :type notifways: NotificationWays
        :param n_type: object type (host or service)
        :type n_type: string
        :param command_name: True to update the inner property with the name of the command,
                             False to update with the Command objects list
        :type command_name: bool
        :return: command list
        :rtype: list[fusionsupervision.objects.command.Command]
        """
        res = []

        for notifway_id in self.notificationways:
            notifway = notifways[notifway_id]
            res.extend(notifway.get_notification_commands(n_type))

        # Update inner notification commands property with command name or command
        if command_name:
            setattr(self, n_type + '_notification_commands',
                    [c.get_name() for c in res])
        else:
            setattr(self, n_type + '_notification_commands', res)

        return res

    def is_correct(self):
        """Check if this object configuration is correct ::

        * Check our own specific properties
        * Call our parent class is_correct checker

        :return: True if the configuration is correct, otherwise False
        :rtype: bool
        """
        state = True
        cls = self.__class__

        # Internal checks before executing inherited function...

        # There is a case where there is no nw: when there is not special_prop defined
        # at all!!
        if not self.notificationways:
            for prop in self.special_properties:
                if not hasattr(self, prop):
                    msg = "[contact::%s] %s property is missing" % (
                        self.get_name(), prop)
                    self.add_error(msg)
                    state = False

        if not hasattr(self, 'contact_name'):
            if hasattr(self, 'alias'):
                # Use the alias if we miss the contact_name
                self.contact_name = self.alias

        for char in cls.illegal_object_name_chars:
            if char not in self.contact_name:
                continue

            msg = "[contact::%s] %s character not allowed in contact_name" \
                  % (self.get_name(), char)
            self.add_error(msg)
            state = False

        return super(Contact, self).is_correct() and state

    def raise_enter_downtime_log_entry(self):
        """Raise CONTACT DOWNTIME ALERT entry (info level)
        Format is : "CONTACT DOWNTIME ALERT: *get_name()*;STARTED;
                      Contact has entered a period of scheduled downtime"
        Example : "CONTACT DOWNTIME ALERT: test_contact;STARTED;
                    Contact has entered a period of scheduled downtime"

        :return: None
        """
        brok = make_monitoring_log(
            'info', "CONTACT DOWNTIME ALERT: %s;STARTED; "
            "Contact has entered a period of scheduled downtime" %
            self.get_name())
        self.broks.append(brok)

    def raise_exit_downtime_log_entry(self):
        """Raise CONTACT DOWNTIME ALERT entry (info level)
        Format is : "CONTACT DOWNTIME ALERT: *get_name()*;STOPPED;
                      Contact has entered a period of scheduled downtime"
        Example : "CONTACT DOWNTIME ALERT: test_contact;STOPPED;
                    Contact has entered a period of scheduled downtime"

        :return: None
        """
        brok = make_monitoring_log(
            'info', "CONTACT DOWNTIME ALERT: %s;STOPPED; "
            "Contact has exited from a period of scheduled downtime" %
            self.get_name())
        self.broks.append(brok)

    def raise_cancel_downtime_log_entry(self):
        """Raise CONTACT DOWNTIME ALERT entry (info level)
        Format is : "CONTACT DOWNTIME ALERT: *get_name()*;CANCELLED;
                      Contact has entered a period of scheduled downtime"
        Example : "CONTACT DOWNTIME ALERT: test_contact;CANCELLED;
                    Contact has entered a period of scheduled downtime"

        :return: None
        """
        brok = make_monitoring_log(
            'info', "CONTACT DOWNTIME ALERT: %s;CANCELLED; "
            "Scheduled downtime for contact has been cancelled." %
            self.get_name())
        self.broks.append(brok)
コード例 #22
0
class Fusionsupervision(BaseSatellite):
    # pylint: disable=too-many-instance-attributes
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'type':
            StringProp(default='scheduler'),
        'port':
            IntegerProp(default=7768)
    })

    def __init__(self, **kwargs):
        """Scheduler daemon initialisation

        :param kwargs: command line arguments
        """
        super(Fusionsupervision, self).__init__(kwargs.get('daemon_name', 'Default-scheduler'), **kwargs)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        self.brokers = {}
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # This because it is the Satellite that has thes properties and I am a Satellite
        # todo: change this?
        # Broks are stored in each broker link, not locally
        # self.broks = []
        self.broks_lock = threading.RLock()

        # Modules are only loaded one time
        self.have_modules = False

        self.first_scheduling = False

    def get_broks(self, broker_name):
        """Send broks to a specific broker

        :param broker_name: broker name to send broks
        :type broker_name: str
        :greturn: dict of brok for this broker
        :rtype: dict[fusionsupervision.brok.Brok]
        """
        logger.debug("Broker %s requests my broks list", broker_name)
        res = []
        if not broker_name:
            return res

        for broker_link in list(self.brokers.values()):
            if broker_name == broker_link.name:
                for brok in sorted(broker_link.broks, key=lambda x: x.creation_time):
                    # Only provide broks that did not yet sent to our external modules
                    if getattr(brok, 'sent_to_externals', False):
                        res.append(brok)
                        brok.got = True
                broker_link.broks = [b for b in broker_link.broks if not getattr(b, 'got', False)]
                logger.debug("Providing %d broks to %s", len(res), broker_name)
                break
        else:
            logger.warning("Got a brok request from an unknown broker: %s", broker_name)

        return res

    def compensate_system_time_change(self, difference):  # pragma: no cover,
        # pylint: disable=too-many-branches
        # not with unit tests
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        super(Fusionsupervision, self).compensate_system_time_change(difference)

        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in list(self.sched.checks.values()):
            # Already launch checks should not be touch
            if chk.status == u'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = self.sched.find_item_by_id(chk.ref)
                new_t = max(0, t_to_go + difference)
                timeperiod = self.sched.timeperiods[ref.check_period]
                if timeperiod is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = timeperiod.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = u'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in list(self.sched.actions.values()):
            # Already launch checks should not be touch
            if act.status == u'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref_id = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == u'notification':
                    ref = self.sched.find_item_by_id(ref_id)
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        notification_period = self.sched.timeperiods[ref.notification_period]
                        new_t = notification_period.get_next_valid_time_from_t(new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def do_before_loop(self):
        """Stop the scheduling process"""
        if self.sched:
            self.sched.stop_scheduling()

    def do_loop_turn(self):
        """Scheduler loop turn

        Simply run the FusionSupervision Engine scheduler loop

        This is called when a configuration got received by the scheduler daemon. As of it,
        check if the first scheduling has been done... and manage this.

        :return: None
        """
        if not self.first_scheduling:
            # Ok, now all is initialized, we can make the initial broks
            logger.info("First scheduling launched")
            _t0 = time.time()
            # Program start brok
            self.sched.initial_program_status()
            # First scheduling
            self.sched.schedule()
            statsmgr.timer('first_scheduling', time.time() - _t0)
            logger.info("First scheduling done")

            # Connect to our passive satellites if needed
            for satellite in [s for s in list(self.pollers.values()) if s.passive]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s", satellite)

            for satellite in [s for s in list(self.reactionners.values()) if s.passive]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s", satellite)

            # Ticks are for recurrent function call like consume, del zombies etc
            self.sched.ticks = 0
            self.first_scheduling = True

        # Each loop turn, execute the daemon specific treatment...
        # only if the daemon has a configuration to manage
        if self.sched.pushed_conf:
            # If scheduling is not yet enabled, enable scheduling
            if not self.sched.must_schedule:
                self.sched.start_scheduling()
                self.sched.before_run()
            self.sched.run()
        else:
            logger.warning("#%d - No monitoring configuration to scheduler...",
                           self.loop_count)

    def get_managed_configurations(self):
        """Get the configurations managed by this scheduler

        The configuration managed by a scheduler is the self configuration got
        by the scheduler during the dispatching.

        :return: a dict of scheduler links with instance_id as key and
        hash, push_flavor and configuration identifier as values
        :rtype: dict
        """
        # for scheduler_link in list(self.schedulers.values()):
        #     res[scheduler_link.instance_id] = {
        #         'hash': scheduler_link.hash,
        #         'push_flavor': scheduler_link.push_flavor,
        #         'managed_conf_id': scheduler_link.managed_conf_id
        #     }

        res = {}
        if self.sched.pushed_conf and self.cur_conf and 'instance_id' in self.cur_conf:
            res[self.cur_conf['instance_id']] = {
                'hash': self.cur_conf['hash'],
                'push_flavor': self.cur_conf['push_flavor'],
                'managed_conf_id': self.cur_conf['managed_conf_id']
            }
        logger.debug("Get managed configuration: %s", res)
        return res

    def setup_new_conf(self):
        # pylint: disable=too-many-statements, too-many-branches, too-many-locals
        """Setup new conf received for scheduler

        :return: None
        """
        # Execute the base class treatment...
        super(Fusionsupervision, self).setup_new_conf()

        # ...then our own specific treatment!
        with self.conf_lock:
            # self_conf is our own configuration from the fusionsupervision environment
            # self_conf = self.cur_conf['self_conf']
            logger.debug("Got config: %s", self.cur_conf)
            if 'conf_part' not in self.cur_conf:
                self.cur_conf['conf_part'] = None
            conf_part = self.cur_conf['conf_part']

            # Ok now we can save the retention data
            if self.sched.pushed_conf is not None:
                self.sched.update_retention()

            # Get the monitored objects configuration
            t00 = time.time()
            received_conf_part = None
            try:
                received_conf_part = unserialize(conf_part)
                assert received_conf_part is not None
            except AssertionError as exp:
                # This to indicate that no configuration is managed by this scheduler...
                logger.warning("No managed configuration received from arbiter")
            except FusionsupervisionClassLookupException as exp:  # pragma: no cover
                # This to indicate that the new configuration is not managed...
                self.new_conf = {
                    "_status": "Cannot un-serialize configuration received from arbiter",
                    "_error": str(exp)
                }
                logger.error(self.new_conf)
                logger.error("Back trace of the error:\n%s", traceback.format_exc())
                return
            except Exception as exp:  # pylint: disable=broad-except
                # This to indicate that the new configuration is not managed...
                self.new_conf = {
                    "_status": "Cannot un-serialize configuration received from arbiter",
                    "_error": str(exp)
                }
                logger.error(self.new_conf)
                self.exit_on_exception(exp, str(self.new_conf))

            # if not received_conf_part:
            #     return

            logger.info("Monitored configuration %s received at %d. Un-serialized in %d secs",
                        received_conf_part, t00, time.time() - t00)
            logger.info("Scheduler received configuration : %s", received_conf_part)

            # Now we create our pollers, reactionners and brokers
            for link_type in ['pollers', 'reactionners', 'brokers']:
                if link_type not in self.cur_conf['satellites']:
                    logger.error("Missing %s in the configuration!", link_type)
                    continue

                my_satellites = getattr(self, link_type, {})
                received_satellites = self.cur_conf['satellites'][link_type]
                for link_uuid in received_satellites:
                    rs_conf = received_satellites[link_uuid]
                    logger.debug("- received %s - %s: %s", rs_conf['instance_id'],
                                 rs_conf['type'], rs_conf['name'])

                    # Must look if we already had a configuration and save our broks
                    already_got = rs_conf['instance_id'] in my_satellites
                    broks = []
                    actions = {}
                    wait_homerun = {}
                    external_commands = {}
                    running_id = 0
                    if already_got:
                        logger.warning("I already got: %s", rs_conf['instance_id'])
                        # Save some information
                        running_id = my_satellites[link_uuid].running_id
                        (broks, actions,
                         wait_homerun, external_commands) = \
                            my_satellites[link_uuid].get_and_clear_context()
                        # Delete the former link
                        del my_satellites[link_uuid]

                    # My new satellite link...
                    new_link = SatelliteLink.get_a_satellite_link(link_type[:-1],
                                                                  rs_conf)
                    my_satellites[new_link.uuid] = new_link
                    logger.info("I got a new %s satellite: %s", link_type[:-1], new_link)

                    new_link.running_id = running_id
                    new_link.external_commands = external_commands
                    new_link.broks = broks
                    new_link.wait_homerun = wait_homerun
                    new_link.actions = actions

                    # Replacing the satellite address and port by those defined in satellite_map
                    if new_link.name in self.cur_conf['override_conf'].get('satellite_map', {}):
                        override_conf = self.cur_conf['override_conf']
                        overriding = override_conf.get('satellite_map')[new_link.name]
                        logger.warning("Do not override the configuration for: %s, with: %s. "
                                       "Please check whether this is necessary!",
                                       new_link.name, overriding)

            # First mix conf and override_conf to have our definitive conf
            for prop in getattr(self.cur_conf, 'override_conf', []):
                logger.debug("Overriden: %s / %s ", prop, getattr(received_conf_part, prop, None))
                logger.debug("Overriding: %s / %s ", prop, self.cur_conf['override_conf'])
                setattr(received_conf_part, prop, self.cur_conf['override_conf'].get(prop, None))

            # Scheduler modules
            if not self.have_modules:
                try:
                    logger.debug("Modules configuration: %s", self.cur_conf['modules'])
                    self.modules = unserialize(self.cur_conf['modules'], no_load=True)
                except FusionsupervisionClassLookupException as exp:  # pragma: no cover, simple protection
                    logger.error('Cannot un-serialize modules configuration '
                                 'received from arbiter: %s', exp)
                if self.modules:
                    logger.debug("I received some modules configuration: %s", self.modules)
                    self.have_modules = True

                    self.do_load_modules(self.modules)
                    # and start external modules too
                    self.modules_manager.start_external_instances()
                else:
                    logger.info("I do not have modules")

            if received_conf_part:
                logger.info("Loading configuration...")

                # Propagate the global parameters to the configuration items
                received_conf_part.explode_global_conf()

                # We give the configuration to our scheduler
                self.sched.reset()
                self.sched.load_conf(self.cur_conf['instance_id'],
                                     self.cur_conf['instance_name'],
                                     received_conf_part)

                # Once loaded, the scheduler has an inner pushed_conf object
                logger.info("Loaded: %s", self.sched.pushed_conf)

                # Update the scheduler ticks according to the daemon configuration
                self.sched.update_recurrent_works_tick(self)

                # We must update our pushed configuration macros with correct values
                # from the configuration parameters
                # self.sched.pushed_conf.fill_resource_macros_names_macros()

                # Creating the Macroresolver Class & unique instance
                m_solver = MacroResolver()
                m_solver.init(received_conf_part)

                # Now create the external commands manager
                # We are an applyer: our role is not to dispatch commands, but to apply them
                ecm = ExternalCommandManager(
                    received_conf_part, 'applyer', self.sched,
                    received_conf_part.accept_passive_unknown_check_results,
                    received_conf_part.log_external_commands)

                # Scheduler needs to know about this external command manager to use it if necessary
                self.sched.external_commands_manager = ecm

                # Ok now we can load the retention data
                self.sched.retention_load()

                # Log hosts/services initial states
                self.sched.log_initial_states()

            # Create brok new conf
            brok = Brok({'type': 'new_conf', 'data': {}})
            self.sched.add_brok(brok)

            # Initialize connection with all our satellites
            logger.info("Initializing connection with my satellites:")
            my_satellites = self.get_links_of_type(s_type='')
            for satellite in list(my_satellites.values()):
                logger.info("- : %s/%s", satellite.type, satellite.name)
                if not self.daemon_connection_init(satellite):
                    logger.error("Satellite connection failed: %s", satellite)

            if received_conf_part:
                # Enable the scheduling process
                logger.info("Loaded: %s", self.sched.pushed_conf)
                self.sched.start_scheduling()

        # Now I have a configuration!
        self.have_conf = True

    def clean_previous_run(self):
        """Clean variables from previous configuration

        :return: None
        """
        # Execute the base class treatment...
        super(Fusionsupervision, self).clean_previous_run()

        # Clean all lists
        self.pollers.clear()
        self.reactionners.clear()
        self.brokers.clear()

    def get_daemon_stats(self, details=False):
        """Increase the stats provided by the Daemon base class

        :return: stats dictionary
        :rtype: dict
        """
        # Call the base Daemon one
        res = super(Fusionsupervision, self).get_daemon_stats(details=details)

        res.update({'name': self.name, 'type': self.type, 'monitored_objects': {}})

        counters = res['counters']

        # Satellites counters
        counters['brokers'] = len(self.brokers)
        counters['pollers'] = len(self.pollers)
        counters['reactionners'] = len(self.reactionners)
        counters['receivers'] = len(self.receivers)

        if not self.sched:
            return res

        # # Hosts/services problems counters
        # m_solver = MacroResolver()
        # counters['hosts_problems'] = m_solver._get_total_host_problems()
        # counters['hosts_unhandled_problems'] = m_solver._get_total_host_problems_unhandled()
        # counters['services_problems'] = m_solver._get_total_service_problems()
        # counters['services_unhandled_problems'] = m_solver._get_total_service_problems_unhandled()

        # Get statistics from the scheduler
        scheduler_stats = self.sched.get_scheduler_stats(details=details)
        res['counters'].update(scheduler_stats['counters'])
        scheduler_stats.pop('counters')
        res.update(scheduler_stats)

        return res

    def get_monitoring_problems(self):
        """Get the current scheduler livesynthesis

        :return: live synthesis and problems dictionary
        :rtype: dict
        """
        res = {}
        if not self.sched:
            return res

        # Get statistics from the scheduler
        scheduler_stats = self.sched.get_scheduler_stats(details=True)
        if 'livesynthesis' in scheduler_stats:
            res['livesynthesis'] = scheduler_stats['livesynthesis']
        if 'problems' in scheduler_stats:
            res['problems'] = scheduler_stats['problems']

        return res

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            # Start the daemon mode
            if not self.do_daemon_init_and_start():
                self.exit_on_error(message="Daemon initialization error", exit_code=3)

            #  We wait for initial conf
            self.wait_for_initial_conf()
            if self.new_conf:
                # Setup the received configuration
                self.setup_new_conf()

                # Now the main loop
                self.do_main_loop()
                logger.info("Exited from the main loop.")

                # On main loop exit, call the scheduler after run process
                self.sched.after_run()

            self.request_stop()
        except Exception:  # pragma: no cover, this should never happen indeed ;)
            self.exit_on_exception(traceback.format_exc())
            raise
コード例 #23
0
class Broker(BaseSatellite):
    """
    Class to manage a Broker daemon
    A Broker is used to get data from Scheduler and send them to modules. These modules in most
    cases export to other software, databases...
    """
    properties = BaseSatellite.properties.copy()
    properties.update({
        'type': StringProp(default='broker'),
        'port': IntegerProp(default=7772),
        'got_initial_broks': BoolProp(default=False)
    })

    def __init__(self, **kwargs):
        """Broker daemon initialisation

        :param kwargs: command line arguments
        """
        super(Broker,
              self).__init__(kwargs.get('daemon_name', 'Default-broker'),
                             **kwargs)

        # Our schedulers and arbiters are initialized in the base class

        # Our pollers, reactionners and receivers
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # Modules are load one time
        self.have_modules = False

        # All broks to manage
        self.external_broks = []  # broks to manage

        # broks raised internally by the broker
        self.internal_broks = []
        # broks raised by the arbiters, we need a lock so the push can be in parallel
        # to our current activities and won't lock the arbiter
        self.arbiter_broks = []
        self.arbiter_broks_lock = threading.RLock()

        self.timeout = 1.0

        self.http_interface = BrokerInterface(self)

    def add(self, elt):
        """Generic function to add objects to the daemon internal lists.
        Manage Broks, External commands and Messages (from modules queues)

        :param elt: object to add
        :type elt: fusionsupervision.FusionsupervisionObject
        :return: None
        """
        if isinstance(elt, Brok):
            # For brok, we tag the brok with our instance_id
            elt.instance_id = self.instance_id
            if elt.type == 'monitoring_log':
                # The brok is a monitoring event
                with self.events_lock:
                    self.events.append(elt)
                statsmgr.counter('events', 1)
            else:
                with self.broks_lock:
                    self.broks.append(elt)
            statsmgr.counter('broks.added', 1)
        elif isinstance(elt, ExternalCommand):
            logger.debug("Queuing an external command '%s'", str(elt.__dict__))
            with self.external_commands_lock:
                self.external_commands.append(elt)
                statsmgr.counter('external-commands.added', 1)
        # Maybe we got a Message from the modules, it's way to ask something
        # like from now a full data from a scheduler for example.
        elif isinstance(elt, Message):
            # We got a message, great!
            logger.debug(str(elt.__dict__))
            if elt.get_type() == 'NeedData':
                data = elt.get_data()
                # Full instance id means: I got no data for this scheduler
                # so give me all dumb-ass!
                if 'full_instance_id' in data:
                    c_id = data['full_instance_id']
                    source = getattr(elt, 'source',
                                     getattr(elt, '_source', None))
                    logger.info(
                        'The module %s is asking me to get all initial data '
                        'from the scheduler %d', source, c_id)
                    # so we just reset the connection and the running_id,
                    # it will just get all new things
                    try:
                        self.schedulers[c_id]['con'] = None
                        self.schedulers[c_id]['running_id'] = 0
                    except KeyError:  # maybe this instance was not known, forget it
                        logger.warning(
                            "the module %s ask me a full_instance_id "
                            "for an unknown ID (%d)!", source, c_id)
            # Maybe a module tells me that it's dead, I must log its last words...
            if elt.get_type() == 'ICrash':
                data = elt.get_data()
                logger.error(
                    'the module %s just crash! Please look at the traceback:',
                    data['name'])
                logger.error(data['trace'])

            statsmgr.counter('message.added', 1)
            # The module death will be looked for elsewhere and restarted.

    def manage_brok(self, brok):
        """Get a brok.
        We put brok data to the modules

        :param brok: object with data
        :type brok: object
        :return: None
        """
        # Unserialize the brok before consuming it
        brok.prepare()

        for module in self.modules_manager.get_internal_instances():
            try:
                _t0 = time.time()
                module.manage_brok(brok)
                statsmgr.timer('manage-broks.internal.%s' % module.get_name(),
                               time.time() - _t0)
            except Exception as exp:  # pylint: disable=broad-except
                logger.warning(
                    "The module %s raised an exception: %s, "
                    "I'm tagging it to restart later", module.get_name(),
                    str(exp))
                logger.exception(exp)
                self.modules_manager.set_to_restart(module)

    def get_internal_broks(self):
        """Get all broks from self.broks_internal_raised and append them to our broks
        to manage

        :return: None
        """
        statsmgr.gauge('get-new-broks-count.broker', len(self.internal_broks))
        # Add the broks to our global list
        self.external_broks.extend(self.internal_broks)
        self.internal_broks = []

    def get_arbiter_broks(self):
        """Get the broks from the arbiters,
        but as the arbiter_broks list can be push by arbiter without Global lock,
        we must protect this with a lock

        TODO: really? check this arbiter behavior!

        :return: None
        """
        with self.arbiter_broks_lock:
            statsmgr.gauge('get-new-broks-count.arbiter',
                           len(self.arbiter_broks))
            # Add the broks to our global list
            self.external_broks.extend(self.arbiter_broks)
            self.arbiter_broks = []

    def get_new_broks(self):
        """Get new broks from our satellites

        :return: None
        """
        for satellites in [
                self.schedulers, self.pollers, self.reactionners,
                self.receivers
        ]:
            for satellite_link in list(satellites.values()):
                logger.debug("Getting broks from %s", satellite_link)

                _t0 = time.time()
                try:
                    tmp_broks = satellite_link.get_broks(self.name)
                except LinkError:
                    logger.warning(
                        "Daemon %s connection failed, I could not get the broks!",
                        satellite_link)
                else:
                    if tmp_broks:
                        logger.debug("Got %d Broks from %s in %s",
                                     len(tmp_broks), satellite_link.name,
                                     time.time() - _t0)
                        statsmgr.gauge(
                            'get-new-broks-count.%s' % (satellite_link.name),
                            len(tmp_broks))
                        statsmgr.timer(
                            'get-new-broks-time.%s' % (satellite_link.name),
                            time.time() - _t0)
                        for brok in tmp_broks:
                            brok.instance_id = satellite_link.instance_id

                        # Add the broks to our global list
                        self.external_broks.extend(tmp_broks)

    # def do_stop(self):
    #     """Stop all children of this process
    #
    #     :return: None
    #     """
    #     # my_active_children = active_children()
    #     # for child in my_active_children:
    #     #     child.terminate()
    #     #     child.join(1)
    #     super(Broker, self).do_stop()

    def setup_new_conf(self):
        # pylint: disable=too-many-branches, too-many-locals
        """Broker custom setup_new_conf method

        This function calls the base satellite treatment and manages the configuration needed
        for a broker daemon:
        - get and configure its pollers, reactionners and receivers relation
        - configure the modules

        :return: None
        """
        # Execute the base class treatment...
        super(Broker, self).setup_new_conf()

        # ...then our own specific treatment!
        with self.conf_lock:
            # # self_conf is our own configuration from the fusionsupervision environment
            # self_conf = self.cur_conf['self_conf']
            self.got_initial_broks = False

            # Now we create our pollers, reactionners and receivers
            for link_type in ['pollers', 'reactionners', 'receivers']:
                if link_type not in self.cur_conf['satellites']:
                    logger.error("No %s in the configuration!", link_type)
                    continue

                my_satellites = getattr(self, link_type, {})
                received_satellites = self.cur_conf['satellites'][link_type]
                for link_uuid in received_satellites:
                    rs_conf = received_satellites[link_uuid]
                    logger.debug("- received %s - %s: %s",
                                 rs_conf['instance_id'], rs_conf['type'],
                                 rs_conf['name'])

                    # Must look if we already had a configuration and save our broks
                    already_got = rs_conf['instance_id'] in my_satellites
                    broks = []
                    actions = {}
                    wait_homerun = {}
                    external_commands = {}
                    running_id = 0
                    if already_got:
                        logger.warning("I already got: %s",
                                       rs_conf['instance_id'])
                        # Save some information
                        running_id = my_satellites[link_uuid].running_id
                        (broks, actions,
                         wait_homerun, external_commands) = \
                            my_satellites[link_uuid].get_and_clear_context()
                        # Delete the former link
                        del my_satellites[link_uuid]

                    # My new satellite link...
                    new_link = SatelliteLink.get_a_satellite_link(
                        link_type[:-1], rs_conf)
                    my_satellites[new_link.uuid] = new_link
                    logger.info("I got a new %s satellite: %s", link_type[:-1],
                                new_link)

                    new_link.running_id = running_id
                    new_link.external_commands = external_commands
                    new_link.broks = broks
                    new_link.wait_homerun = wait_homerun
                    new_link.actions = actions

                    # Replace satellite address and port by those defined in satellite_map
                    # todo: check if it is really necessary! Add a unit test for this
                    # Not sure about this because of the daemons/satellites configuration
                    # if new_link.name in self_conf.get('satellite_map', {}):
                    #     new_link = dict(new_link)  # make a copy
                    #     new_link.update(self_conf.get('satellite_map', {})[new_link.name])

            if not self.have_modules:
                try:
                    self.modules = unserialize(self.cur_conf['modules'],
                                               no_load=True)
                except FusionsupervisionClassLookupException as exp:  # pragma: no cover, simple protection
                    logger.error(
                        'Cannot un-serialize modules configuration '
                        'received from arbiter: %s', exp)
                if self.modules:
                    logger.info("I received some modules configuration: %s",
                                self.modules)
                    self.have_modules = True

                    # Ok now start, or restart them!
                    # Set modules, init them and start external ones
                    self.do_load_modules(self.modules)
                    # and start external modules too
                    self.modules_manager.start_external_instances()
                else:
                    logger.info("I do not have modules")

            # Initialize connection with my schedulers first
            logger.info("Initializing connection with my schedulers:")
            my_satellites = self.get_links_of_type(s_type='scheduler')
            for satellite in list(my_satellites.values()):
                logger.info("- %s/%s", satellite.type, satellite.name)
                if not self.daemon_connection_init(satellite):
                    logger.error("Satellite connection failed: %s", satellite)

            # Initialize connection with all our satellites
            logger.info("Initializing connection with my satellites:")
            for sat_type in ['arbiter', 'reactionner', 'poller', 'receiver']:
                my_satellites = self.get_links_of_type(s_type=sat_type)
                for satellite in list(my_satellites.values()):
                    logger.info("- %s/%s", satellite.type, satellite.name)
                    if not self.daemon_connection_init(satellite):
                        logger.error("Satellite connection failed: %s",
                                     satellite)

        # Now I have a configuration!
        self.have_conf = True

    def clean_previous_run(self):
        """Clean all (when we received new conf)

        :return: None
        """
        # Execute the base class treatment...
        super(Broker, self).clean_previous_run()

        # Clean all satellites relations
        self.pollers.clear()
        self.reactionners.clear()
        self.receivers.clear()

        # Clean our internal objects
        self.external_broks = self.external_broks[:]
        self.internal_broks = self.internal_broks[:]
        with self.arbiter_broks_lock:
            self.arbiter_broks = self.arbiter_broks[:]
        self.external_commands = self.external_commands[:]

        # And now modules
        # self.have_modules = False
        # self.modules_manager.clear_instances()

    def do_loop_turn(self):
        # pylint: disable=too-many-branches
        """Loop used to:
         * get initial status broks
         * check if modules are alive, if not restart them
         * get broks from ourself, the arbiters and our satellites
         * add broks to the queue of each external module
         * manage broks with each internal module

         If the internal broks management is longer than 0.8 seconds, postpone to hte next
         loop turn to avoid overloading the broker daemon.

         :return: None
        """
        if not self.got_initial_broks:
            # Asking initial broks from my schedulers
            my_satellites = self.get_links_of_type(s_type='scheduler')
            for satellite in list(my_satellites.values()):
                logger.info("Asking my initial broks from '%s'",
                            satellite.name)
                _t0 = time.time()
                try:
                    my_initial_broks = satellite.get_initial_broks(self.name)
                    statsmgr.timer('broks.initial.%s.time' % satellite.name,
                                   time.time() - _t0)
                    if not my_initial_broks:
                        logger.info("No initial broks were raised, "
                                    "my scheduler is not yet ready...")
                        return

                    self.got_initial_broks = True
                    logger.debug("Got %d initial broks from '%s'",
                                 my_initial_broks, satellite.name)
                    statsmgr.gauge('broks.initial.%s.count' % satellite.name,
                                   my_initial_broks)
                except LinkError as exp:
                    logger.warning(
                        "Scheduler connection failed, I could not get initial broks!"
                    )

        logger.debug("Begin Loop: still some old broks to manage (%d)",
                     len(self.external_broks))
        if self.external_broks:
            statsmgr.gauge('unmanaged.broks', len(self.external_broks))

        # Try to see if one of my module is dead, and restart previously dead modules
        self.check_and_del_zombie_modules()

        # Call modules that manage a starting tick pass
        _t0 = time.time()
        self.hook_point('tick')
        statsmgr.timer('hook.tick', time.time() - _t0)

        # Maybe the last loop we did raised some broks internally
        self.get_internal_broks()

        # Also reap broks sent from the arbiters
        self.get_arbiter_broks()

        # Now get broks from our distant daemons
        self.get_new_broks()

        # Get the list of broks not yet sent to our external modules
        _t0 = time.time()
        broks_to_send = [
            brok for brok in self.external_broks
            if getattr(brok, 'to_be_sent', True)
        ]
        statsmgr.gauge('get-new-broks-count.to_send', len(broks_to_send))

        # Send the broks to all external modules to_q queue so they can get the whole packet
        # beware, the sub-process/queue can be die/close, so we put to restart the whole module
        # instead of killing ourselves :)
        for module in self.modules_manager.get_external_instances():
            try:
                _t00 = time.time()
                queue_size = module.to_q.qsize()
                statsmgr.gauge(
                    'queues.external.%s.to.size' % module.get_name(),
                    queue_size)
                module.to_q.put(broks_to_send)
                statsmgr.timer('queues.external.%s.to.put' % module.get_name(),
                               time.time() - _t00)
            except Exception as exp:  # pylint: disable=broad-except
                # first we must find the modules
                logger.warning(
                    "Module %s queue exception: %s, I'm tagging it to restart later",
                    module.get_name(), str(exp))
                logger.exception(exp)
                self.modules_manager.set_to_restart(module)

        # No more need to send them
        for brok in broks_to_send:
            brok.to_be_sent = False
        logger.debug("Time to send %s broks (%d secs)", len(broks_to_send),
                     time.time() - _t0)

        # Make the internal modules manage the broks
        start = time.time()
        while self.external_broks:
            now = time.time()
            # Do not 'manage' more than 0.8s, we must get new broks almost every second
            if now - start > 0.8:
                logger.info(
                    "I did not yet managed all my broks, still %d broks",
                    len(self.external_broks))
                break

            # Get the first brok in the list
            brok = self.external_broks.pop(0)
            if self.modules_manager.get_internal_instances():
                self.manage_brok(brok)
                # Make a very short pause to avoid overloading
                self.make_a_pause(0.01, check_time_change=False)
            else:
                if getattr(brok, 'to_be_sent', False):
                    self.external_broks.append(brok)

        # Maybe our external modules raised 'objects', so get them
        if self.get_objects_from_from_queues():
            statsmgr.gauge('external-commands.got.count',
                           len(self.external_commands))
            statsmgr.gauge('broks.got.count', len(self.external_broks))

    def get_daemon_stats(self, details=False):
        """Increase the stats provided by the Daemon base class

        :return: stats dictionary
        :rtype: dict
        """
        # Call the base Daemon one
        res = super(Broker, self).get_daemon_stats(details=details)

        res.update({'name': self.name, 'type': self.type})

        counters = res['counters']
        counters['broks-external'] = len(self.external_broks)
        counters['broks-internal'] = len(self.internal_broks)
        counters['broks-arbiter'] = len(self.arbiter_broks)
        counters['satellites.pollers'] = len(self.pollers)
        counters['satellites.reactionners'] = len(self.reactionners)
        counters['satellites.receivers'] = len(self.receivers)

        return res

    def main(self):
        """Main function, will loop forever

        :return: None
        """
        try:
            # Start the daemon mode
            if not self.do_daemon_init_and_start():
                self.exit_on_error(message="Daemon initialization error",
                                   exit_code=3)

            #  We wait for initial conf
            self.wait_for_initial_conf()
            if self.new_conf:
                # Setup the received configuration
                self.setup_new_conf()

                # Restore retention data
                self.hook_point('load_retention')

                # Now the main loop
                self.do_main_loop()
                logger.info("Exited from the main loop.")

            self.request_stop()
        except Exception:  # pragma: no cover, this should never happen indeed ;)
            self.exit_on_exception(traceback.format_exc())
            raise
コード例 #24
0
class Satellite(BaseSatellite):  # pylint: disable=R0902
    """Satellite class.
    Sub-classed by Receiver, Reactionner and Poller

    """
    do_checks = False
    do_actions = False
    my_type = ''

    properties = BaseSatellite.properties.copy()
    properties.update({
        'passive':
            BoolProp(default=False),
        'max_plugins_output_length':
            IntegerProp(default=8192),
        'min_workers':
            IntegerProp(default=0, fill_brok=['full_status'], to_send=True),
        'max_workers':
            IntegerProp(default=0, fill_brok=['full_status'], to_send=True),
        'processes_by_worker':
            IntegerProp(default=256, fill_brok=['full_status'], to_send=True),
        'worker_polling_interval':
            IntegerProp(default=1, to_send=True),
        'poller_tags':
            ListProp(default=['None'], to_send=True),
        'reactionner_tags':
            ListProp(default=['None'], to_send=True),
    })

    def __init__(self, name, **kwargs):
        super(Satellite, self).__init__(name, **kwargs)

        # Move these properties to the base Daemon ?
        # todo: change this?
        # Keep broks so they can be eaten by a broker
        self.broks = []
        self.broks_lock = threading.RLock()

        # My active workers
        self.workers = {}

        # May be we are a passive daemon
        if self.passive:
            self.pre_log.append(("INFO", "Passive mode enabled."))

        # Our tags
        # ['None'] is the default tags
        if self.type in ['poller'] and self.poller_tags:
            self.pre_log.append(("INFO", "Poller tags: %s" % self.poller_tags))
        if self.type in ['reactionner'] and self.reactionner_tags:
            self.pre_log.append(("INFO", "Reactionner tags: %s" % self.reactionner_tags))

        # Now the limit part, 0 means the number of cpu of this machine :)
        cpu_count = psutil.cpu_count()
        # Do not use the logger in this function because it is not yet initialized...
        self.pre_log.append(("INFO",
                             "Detected %d CPUs" % cpu_count))
        if self.max_workers == 0:
            try:
                # Preserve one CPU if more than one detected
                self.max_workers = max(cpu_count - 1, 1)
            except NotImplementedError:  # pragma: no cover, simple protection
                self.max_workers = 1
        if self.min_workers == 0:
            try:
                self.min_workers = max(cpu_count - 1, 1)
            except NotImplementedError:  # pragma: no cover, simple protection
                self.min_workers = 1
        self.pre_log.append(("INFO",
                             "Using minimum %d workers, maximum %d workers, %d processes/worker"
                             % (self.min_workers, self.max_workers, self.processes_by_worker)))

        self.slave_q = None

        self.returns_queue = None
        self.q_by_mod = {}

        # Modules are only loaded one time
        self.have_modules = False

        # round robin queue ic
        self.rr_qid = 0

    def manage_action_return(self, action):
        """Manage action return from Workers
        We just put them into the corresponding sched
        and we clean unused properties like my_scheduler

        :param action: the action to manage
        :type action: fusionsupervision.action.Action
        :return: None
        """
        # Maybe our workers send us something else than an action
        # if so, just add this in other queues and return
        # todo: test a class instance
        if action.__class__.my_type not in ['check', 'notification', 'eventhandler']:
            self.add(action)
            return

        # Ok, it's a result. Get the concerned scheduler uuid
        scheduler_uuid = action.my_scheduler
        logger.debug("Got action return: %s / %s", scheduler_uuid, action.uuid)

        try:
            # Now that we know where to put the action result, we do not need any reference to
            # the scheduler nor the worker
            del action.my_scheduler
            del action.my_worker
        except AttributeError:  # pragma: no cover, simple protection
            logger.error("AttributeError Got action return: %s / %s", scheduler_uuid, action)

        # And we remove it from the actions queue of the scheduler too
        try:
            del self.schedulers[scheduler_uuid].actions[action.uuid]
        except KeyError as exp:
            logger.error("KeyError del scheduler action: %s / %s - %s",
                         scheduler_uuid, action.uuid, str(exp))

        # We tag it as "return wanted", and move it in the wait return queue
        try:
            self.schedulers[scheduler_uuid].wait_homerun[action.uuid] = action
        except KeyError:  # pragma: no cover, simple protection
            logger.error("KeyError Add home run action: %s / %s - %s",
                         scheduler_uuid, action.uuid, str(exp))

    def push_results(self):
        """Push the checks/actions results to our schedulers

        :return: None
        """
        # For all schedulers, we check for wait_homerun
        # and we send back results
        for scheduler_link_uuid in self.schedulers:
            scheduler_link = self.schedulers[scheduler_link_uuid]
            if not scheduler_link.active:
                logger.warning("My scheduler '%s' is not active currently", scheduler_link.name)
                continue

            if not scheduler_link.wait_homerun:
                # Nothing to push back...
                continue

            # NB: it's **mostly** safe for us to not use some lock around
            # this 'results' / sched['wait_homerun'].
            # Because it can only be modified (for adding new values) by the
            # same thread running this function (that is the main satellite
            # thread), and this occurs exactly in self.manage_action_return().
            # Another possibility is for the sched['wait_homerun'] to be
            # cleared within/by :
            # ISchedulers.get_results() -> Satelitte.get_return_for_passive()
            # This can so happen in an (http) client thread.
            results = scheduler_link.wait_homerun
            logger.debug("Pushing %d results to '%s'", len(results), scheduler_link.name)

            # So, at worst, some results would be received twice on the
            # scheduler level, which shouldn't be a problem given they are
            # indexed by their "action_id".

            scheduler_link.push_results(list(results.values()), self.name)
            results.clear()

    def create_and_launch_worker(self, module_name='fork'):
        """Create and launch a new worker, and put it into self.workers
         It can be mortal or not

        :param module_name: the module name related to the worker
                            default is "fork" for no module
                            Indeed, it is actually the module 'python_name'
        :type module_name: str
        :return: None
        """
        logger.info("Allocating new '%s' worker...", module_name)

        # If we are in the fork module, we do not specify a target
        target = None
        __warned = []
        if module_name == 'fork':
            target = None
        else:
            for module in self.modules_manager.instances:
                # First, see if the module name matches...
                if module.get_name() == module_name:
                    # ... and then if is a 'worker' module one or not
                    if not module.properties.get('worker_capable', False):
                        raise NotWorkerMod
                    target = module.work
            if target is None:
                if module_name not in __warned:
                    logger.warning("No target found for %s, NOT creating a worker for it...",
                                   module_name)
                    __warned.append(module_name)
                return
        # We give to the Worker the instance name of the daemon (eg. poller-master)
        # and not the daemon type (poller)
        queue = Queue()
        worker = Worker(module_name, queue, self.returns_queue, self.processes_by_worker,
                        max_plugins_output_length=self.max_plugins_output_length,
                        target=target, loaded_into=self.name)
        # worker.module_name = module_name
        # save this worker
        self.workers[worker.get_id()] = worker

        # And save the Queue of this worker, with key = worker id
        # self.q_by_mod[module_name][worker.uuid] = queue
        self.q_by_mod[module_name][worker.get_id()] = queue

        # Ok, all is good. Start it!
        worker.start()

        logger.info("Started '%s' worker: %s (pid=%d)",
                    module_name, worker.get_id(), worker.get_pid())

    def do_stop_workers(self):
        """Stop all workers

        :return: None
        """
        logger.info("Stopping all workers (%d)", len(self.workers))
        for worker in list(self.workers.values()):
            try:
                logger.info(" - stopping '%s'", worker.get_id())
                worker.terminate()
                worker.join(timeout=1)
                logger.info(" - stopped")
            # A already dead worker or in a worker
            except (AttributeError, AssertionError):
                pass
            except Exception as exp:  # pylint: disable=broad-except
                logger.error("exception: %s", str(exp))

    def do_stop(self):
        """Stop my workers and stop

        :return: None
        """
        self.do_stop_workers()

        super(Satellite, self).do_stop()

    def add(self, elt):
        """Generic function to add objects to the daemon internal lists.
        Manage Broks, External commands

        :param elt: object to add
        :type elt: fusionsupervision.FusionsupervisionObject
        :return: None
        """
        if isinstance(elt, Brok):
            # For brok, we tag the brok with our instance_id
            elt.instance_id = self.instance_id
            if elt.type == 'monitoring_log':
                # The brok is a monitoring event
                with self.events_lock:
                    self.events.append(elt)
                statsmgr.counter('events', 1)
            else:
                with self.broks_lock:
                    self.broks.append(elt)
            statsmgr.counter('broks.added', 1)
        elif isinstance(elt, ExternalCommand):
            logger.debug("Queuing an external command '%s'", str(elt.__dict__))
            with self.external_commands_lock:
                self.external_commands.append(elt)
            statsmgr.counter('external-commands.added', 1)

    def get_broks(self):
        """Get brok list from satellite

        :return: A copy of the broks list
        :rtype: list
        """
        res = copy.copy(self.broks)
        del self.broks[:]
        return res

    def check_and_del_zombie_workers(self):  # pragma: no cover, not with unit tests...
        # pylint: disable= not-callable
        """Check if worker are fine and kill them if not.
        Dispatch the actions in the worker to another one

        TODO: see if unit tests would allow to check this code?

        :return: None
        """
        # Active children make a join with everyone, useful :)
        # active_children()
        for p in active_children():
            logger.debug("got child: %s", p)

        w_to_del = []
        for worker in list(self.workers.values()):
            # If a worker goes down and we did not ask him, it's not
            # good: we can think that we have a worker and it's not True
            # So we del it
            logger.debug("checking if worker %s (pid=%d) is alive",
                         worker.get_id(), worker.get_pid())
            if not self.interrupted and not worker.is_alive():
                logger.warning("The worker %s (pid=%d) went down unexpectedly!",
                               worker.get_id(), worker.get_pid())
                # Terminate immediately
                worker.terminate()
                worker.join(timeout=1)
                w_to_del.append(worker.get_id())

        # OK, now really del workers from queues
        # And requeue the actions it was managed
        for worker_id in w_to_del:
            worker = self.workers[worker_id]

            # Del the queue of the module queue
            del self.q_by_mod[worker.module_name][worker.get_id()]

            for scheduler_uuid in self.schedulers:
                sched = self.schedulers[scheduler_uuid]
                for act in list(sched.actions.values()):
                    if act.status == ACT_STATUS_QUEUED and act.my_worker == worker_id:
                        # Got a check that will NEVER return if we do not restart it
                        self.assign_to_a_queue(act)

            # So now we can really forgot it
            del self.workers[worker_id]

    def adjust_worker_number_by_load(self):
        """Try to create the minimum workers specified in the configuration

        :return: None
        """
        if self.interrupted:
            logger.debug("Trying to adjust worker number. Ignoring because we are stopping.")
            return

        to_del = []
        logger.debug("checking worker count."
                     " Currently: %d workers, min per module : %d, max per module : %d",
                     len(self.workers), self.min_workers, self.max_workers)

        # I want at least min_workers by module then if I can, I add worker for load balancing
        for mod in self.q_by_mod:
            # At least min_workers
            todo = max(0, self.min_workers - len(self.q_by_mod[mod]))
            for _ in range(todo):
                try:
                    self.create_and_launch_worker(module_name=mod)
                # Maybe this modules is not a true worker one.
                # if so, just delete if from q_by_mod
                except NotWorkerMod:
                    to_del.append(mod)
                    break

        for mod in to_del:
            logger.warning("The module %s is not a worker one, I remove it from the worker list.",
                           mod)
            del self.q_by_mod[mod]
        # TODO: if len(workers) > 2*wish, maybe we can kill a worker?

    def _get_queue_for_the_action(self, action):
        """Find action queue for the action depending on the module.
        The id is found with action modulo on action id

        :param a: the action that need action queue to be assigned
        :type action: object
        :return: worker id and queue. (0, None) if no queue for the module_type
        :rtype: tuple
        """
        # get the module name, if not, take fork
        mod = getattr(action, 'module_type', 'fork')
        queues = list(self.q_by_mod[mod].items())

        # Maybe there is no more queue, it's very bad!
        if not queues:
            return (0, None)

        # if not get action round robin index to get action queue based
        # on the action id
        self.rr_qid = (self.rr_qid + 1) % len(queues)
        (worker_id, queue) = queues[self.rr_qid]

        # return the id of the worker (i), and its queue
        return (worker_id, queue)

    def add_actions(self, actions_list, scheduler_instance_id):
        """Add a list of actions to the satellite queues

        :param actions_list: Actions list to add
        :type actions_list: list
        :param scheduler_instance_id: sheduler link to assign the actions to
        :type scheduler_instance_id: SchedulerLink
        :return: None
        """
        # We check for new check in each schedulers and put the result in new_checks
        scheduler_link = None
        for scheduler_id in self.schedulers:
            logger.debug("Trying to add an action, scheduler: %s", self.schedulers[scheduler_id])
            if scheduler_instance_id == self.schedulers[scheduler_id].instance_id:
                scheduler_link = self.schedulers[scheduler_id]
                break
        else:
            logger.error("Trying to add actions from an unknwown scheduler: %s",
                         scheduler_instance_id)
            return
        if not scheduler_link:
            logger.error("Trying to add actions, but scheduler link is not found for: %s, "
                         "actions: %s", scheduler_instance_id, actions_list)
            return
        logger.debug("Found scheduler link: %s", scheduler_link)

        for action in actions_list:
            # First we look if the action is identified
            uuid = getattr(action, 'uuid', None)
            if uuid is None:
                try:
                    action = unserialize(action, no_load=True)
                    uuid = action.uuid
                except FusionsupervisionClassLookupException:
                    logger.error('Cannot un-serialize action: %s', action)
                    continue

            # If we already have this action, we are already working for it!
            if uuid in scheduler_link.actions:
                continue
            # Action is attached to a scheduler
            action.my_scheduler = scheduler_link.uuid
            scheduler_link.actions[action.uuid] = action
            self.assign_to_a_queue(action)

    def assign_to_a_queue(self, action):
        """Take an action and put it to a worker actions queue

        :param action: action to put
        :type action: fusionsupervision.action.Action
        :return: None
        """
        (worker_id, queue) = self._get_queue_for_the_action(action)
        if not worker_id:
            return

        # Tag the action as "in the worker i"
        action.my_worker = worker_id
        action.status = ACT_STATUS_QUEUED

        msg = Message(_type='Do', data=action, source=self.name)
        logger.debug("Queuing message: %s", msg)
        queue.put_nowait(msg)
        logger.debug("Queued")

    def get_new_actions(self):
        """ Wrapper function for do_get_new_actions
        For stats purpose

        :return: None
        TODO: Use a decorator for timing this function
        """
        try:
            _t0 = time.time()
            self.do_get_new_actions()
            statsmgr.timer('actions.got.time', time.time() - _t0)
        except RuntimeError:
            logger.error("Exception like issue #1007")

    def do_get_new_actions(self):
        """Get new actions from schedulers
        Create a Message and put into the module queue
        REF: doc/fusionsupervision-action-queues.png (1)

        :return: None
        """
        # Here are the differences between a poller and a reactionner:
        # Poller will only do checks,
        # Reactionner will do actions (notifications and event handlers)
        do_checks = self.__class__.do_checks
        do_actions = self.__class__.do_actions

        # We check and get the new actions to execute in each of our schedulers
        for scheduler_link_uuid in self.schedulers:
            scheduler_link = self.schedulers[scheduler_link_uuid]

            if not scheduler_link.active:
                logger.warning("My scheduler '%s' is not active currently", scheduler_link.name)
                continue

            logger.debug("get new actions, scheduler: %s", scheduler_link.name)

            # OK, go for it :)
            _t0 = time.time()
            actions = scheduler_link.get_actions({'do_checks': do_checks, 'do_actions': do_actions,
                                                  'poller_tags': self.poller_tags,
                                                  'reactionner_tags': self.reactionner_tags,
                                                  'worker_name': self.name,
                                                  'module_types': list(self.q_by_mod.keys())})
            if actions:
                logger.debug("Got %d actions from %s", len(actions), scheduler_link.name)
                # We 'tag' them with my_scheduler and put into queue for workers
                self.add_actions(actions, scheduler_link.instance_id)
                logger.debug("Got %d actions from %s in %s",
                             len(actions), scheduler_link.name, time.time() - _t0)
            statsmgr.gauge('actions.added.count.%s' % (scheduler_link.name), len(actions))

    def clean_previous_run(self):
        """Clean variables from previous configuration,
        such as schedulers, broks and external commands

        :return: None
        """
        # Execute the base class treatment...
        super(Satellite, self).clean_previous_run()

        # Clean my lists
        del self.broks[:]
        del self.events[:]

    def do_loop_turn(self):  # pylint: disable=too-many-branches
        """Satellite main loop::

        * Check and delete zombies actions / modules
        * Get returns from queues
        * Adjust worker number
        * Get new actions

        :return: None
        """
        # Try to see if one of my module is dead, and restart previously dead modules
        self.check_and_del_zombie_modules()

        # Also if some zombie workers exist...
        self.check_and_del_zombie_workers()

        # Call modules that manage a starting tick pass
        self.hook_point('tick')

        # Print stats for debug
        for _, sched in self.schedulers.items():
            for mod in self.q_by_mod:
                # In workers we've got actions sent to queue - queue size
                for (worker_id, queue) in list(self.q_by_mod[mod].items()):
                    try:
                        actions_count = queue.qsize()
                        results_count = self.returns_queue.qsize()
                        logger.debug("[%s][%s][%s] actions queued: %d, results queued: %d",
                                     sched.name, mod, worker_id, actions_count, results_count)
                        # Update the statistics
                        statsmgr.gauge('worker.%s.actions-queue-size' % worker_id,
                                       actions_count)
                        statsmgr.gauge('worker.%s.results-queue-size' % worker_id,
                                       results_count)
                    except (IOError, EOFError):
                        pass

        # todo temporaray deactivate all this stuff!
        # Before return or get new actions, see how we managed
        # the former ones: are they still in queue(s)? If so, we
        # must wait more or at least have more workers
        # wait_ratio = self.wait_ratio.get_load()
        # total_q = 0
        # try:
        #     for mod in self.q_by_mod:
        #         for queue in list(self.q_by_mod[mod].values()):
        #             total_q += queue.qsize()
        # except (IOError, EOFError):
        #     pass
        # if total_q != 0 and wait_ratio < 2 * self.worker_polling_interval:
        #     logger.debug("I decide to increase the wait ratio")
        #     self.wait_ratio.update_load(wait_ratio * 2)
        #     # self.wait_ratio.update_load(self.worker_polling_interval)
        # else:
        #     # Go to self.worker_polling_interval on normal run, if wait_ratio
        #     # was >2*self.worker_polling_interval,
        #     # it make it come near 2 because if < 2, go up :)
        #     self.wait_ratio.update_load(self.worker_polling_interval)
        # wait_ratio = self.wait_ratio.get_load()
        # statsmgr.timer('core.wait-ratio', wait_ratio)
        # if self.log_loop:
        #     logger.debug("[%s] wait ratio: %f", self.name, wait_ratio)

        # Maybe we do not have enough workers, we check for it
        # and launch the new ones if needed
        self.adjust_worker_number_by_load()

        # Manage all messages we've got in the last timeout
        # for queue in self.return_messages:
        try:
            logger.debug("[%s] manage action results: %d results",
                         self.name, self.returns_queue.qsize())
            while self.returns_queue.qsize():
                msg = self.returns_queue.get_nowait()
                if msg is None:
                    continue
                if not isinstance(msg, Message):
                    logger.warning("Should have received a Message, got a %s!", type(msg))
                    continue
                logger.debug("Got a message: %s", msg)
                if msg.get_type() == 'Done':
                    logger.debug("Got (from %s) an action result: %s",
                                 msg.get_source(), msg.get_data())
                    self.manage_action_return(msg.get_data())
                elif msg.get_type() == 'Stats':
                    logger.debug("Got (from %s) stats: %s",
                                 msg.get_source(), msg.get_data())
                    if msg.get_source() in self.workers:
                        self.workers[msg.get_source()].stats = msg.get_data()
                else:
                    logger.warning("Ignoring message of type: %s", msg.get_type())
        except Full:
            logger.warning("Returns queue is full")
        except Empty:
            logger.debug("Returns queue is empty")
        except (IOError, EOFError) as exp:
            logger.warning("My returns queue is no more available: %s", str(exp))
        except Exception as exp:  # pylint: disable=broad-except
            logger.error("Failed getting messages in returns queue: %s", str(exp))
            logger.error(traceback.format_exc())

        for _, sched in self.schedulers.items():
            if sched.wait_homerun:
                logger.debug("scheduler home run: %d results", len(sched.wait_homerun))

        if not self.passive:
            # If we are an active satellite, we do not initiate the check getting
            # and return
            try:
                # We send to our schedulers the results of all finished checks
                logger.debug("pushing results...")
                self.push_results()
            except LinkError as exp:
                logger.warning("Scheduler connection failed, I could not send my results!")

            try:
                # And we get the new actions from our schedulers
                logger.debug("getting new actions...")
                self.get_new_actions()
            except LinkError as exp:
                logger.warning("Scheduler connection failed, I could not get new actions!")

        # Get objects from our modules that are not Worker based
        if self.log_loop:
            logger.debug("[%s] get objects from queues", self.name)
        self.get_objects_from_from_queues()
        statsmgr.gauge('external-commands.count', len(self.external_commands))
        statsmgr.gauge('broks.count', len(self.broks))
        statsmgr.gauge('events.count', len(self.events))

    def do_post_daemon_init(self):
        """Do this satellite (poller or reactionner) post "daemonize" init

        :return: None
        """
        # We can open the Queue for fork AFTER
        self.q_by_mod['fork'] = {}

        # todo: check if this is always useful?
        self.returns_queue = Queue()

    def setup_new_conf(self):
        # pylint: disable=too-many-branches
        """Setup the new configuration received from Arbiter

        This function calls the base satellite treatment and manages the configuration needed
        for a simple satellite daemon that executes some actions (eg. poller or reactionner):
        - configure the passive mode
        - configure the workers
        - configure the tags
        - configure the modules

        :return: None
        """
        # Execute the base class treatment...
        super(Satellite, self).setup_new_conf()

        # ...then our own specific treatment!
        with self.conf_lock:
            logger.info("Received a new configuration")

            # self_conf is our own configuration from the fusionsupervision environment
            # self_conf = self.cur_conf['self_conf']

            # Now manage modules
            if not self.have_modules:
                try:
                    self.modules = unserialize(self.cur_conf['modules'], no_load=True)
                except FusionsupervisionClassLookupException as exp:  # pragma: no cover, simple protection
                    logger.error('Cannot un-serialize modules configuration '
                                 'received from arbiter: %s', exp)
                if self.modules:
                    logger.info("I received some modules configuration: %s", self.modules)
                    self.have_modules = True

                    for module in self.modules:
                        if module.name not in self.q_by_mod:
                            self.q_by_mod[module.name] = {}

                    self.do_load_modules(self.modules)
                    # and start external modules too
                    self.modules_manager.start_external_instances()
                else:
                    logger.info("I do not have modules")

            # Initialize connection with all our satellites
            logger.info("Initializing connection with my satellites:")
            my_satellites = self.get_links_of_type(s_type='')
            for satellite in list(my_satellites.values()):
                logger.info("- : %s/%s", satellite.type, satellite.name)
                if not self.daemon_connection_init(satellite):
                    logger.error("Satellite connection failed: %s", satellite)

        # Now I have a configuration!
        self.have_conf = True

    def get_daemon_stats(self, details=False):
        """Increase the stats provided by the Daemon base class

        :return: stats dictionary
        :rtype: dict
        """
        # call the daemon one
        res = super(Satellite, self).get_daemon_stats(details=details)

        counters = res['counters']
        counters['broks'] = len(self.broks)
        counters['events'] = len(self.events)
        counters['workers'] = len(self.workers)

        if self.workers:
            res['workers'] = {}
            for worker in list(self.workers.values()):
                stats = getattr(self.workers[worker.get_id()], 'stats', None)
                if stats:
                    res['workers'][worker.get_id()] = stats

        return res

    def main(self):
        """Main satellite function. Do init and then mainloop

        :return: None
        """
        try:
            # Start the daemon mode
            if not self.do_daemon_init_and_start():
                self.exit_on_error(message="Daemon initialization error", exit_code=3)

            self.do_post_daemon_init()

            # We wait for initial conf
            self.wait_for_initial_conf()
            if self.new_conf:
                # Setup the received configuration
                self.setup_new_conf()

                # Allocate Mortal Threads
                self.adjust_worker_number_by_load()

                # Now main loop
                self.do_main_loop()
                logger.info("Exited from the main loop.")

            self.request_stop()
        except Exception:  # pragma: no cover, this should never happen indeed ;)
            self.exit_on_exception(traceback.format_exc())
            raise
コード例 #25
0
class SatelliteLink(Item):
    # pylint: disable=too-many-instance-attributes
    """SatelliteLink is a common Class for links between
    Arbiter and other satellites. Used by the Dispatcher object.

    """
    # Next value used for auto generated instance_id
    _next_id = 1

    # All the class properties that are 'to_send' are stored in the 'global'
    # configuration to be pushed to the satellite when the configuration is dispatched
    properties = Item.properties.copy()
    properties.update({
        'instance_id':
        StringProp(to_send=True),

        # When this property is set, the Arbiter will launch the corresponding daemon
        'fusionsupervision_launched':
        BoolProp(default=False, fill_brok=['full_status'], to_send=True),
        # This property is set by the Arbiter when it detects that this daemon
        # is needed but not declared in the configuration
        'missing_daemon':
        BoolProp(default=False, fill_brok=['full_status']),

        # Sent to the satellites and used to check the managed configuration
        # Those are not to_send=True because they are updated by the configuration Dispatcher
        # and set when the daemon receives its configuration
        'managed_conf_id':
        StringProp(default=u''),
        'push_flavor':
        StringProp(default=u''),
        'hash':
        StringProp(default=u''),

        # A satellite link has the type/name of the daemon it is related to
        'type':
        StringProp(default=u'', fill_brok=['full_status'], to_send=True),
        'name':
        StringProp(default=u'', fill_brok=['full_status'], to_send=True),

        # Listening interface and address used by the other daemons
        'host':
        StringProp(default=u'0.0.0.0', to_send=True),
        'address':
        StringProp(default=u'127.0.0.1',
                   fill_brok=['full_status'],
                   to_send=True),
        'active':
        BoolProp(default=True, fill_brok=['full_status'], to_send=True),
        'short_timeout':
        IntegerProp(default=3, fill_brok=['full_status'], to_send=True),
        'long_timeout':
        IntegerProp(default=120, fill_brok=['full_status'], to_send=True),

        # the delay (seconds) between two ping retries
        'ping_period':
        IntegerProp(default=5),

        # The maximum number of retries before setting the daemon as dead
        'max_check_attempts':
        IntegerProp(default=3, fill_brok=['full_status']),

        # For a spare daemon link
        'spare':
        BoolProp(default=False, fill_brok=['full_status'], to_send=True),
        'spare_check_interval':
        IntegerProp(default=5, fill_brok=['full_status']),
        'spare_max_check_attempts':
        IntegerProp(default=3, fill_brok=['full_status']),
        'manage_sub_realms':
        BoolProp(default=True, fill_brok=['full_status'], to_send=True),
        'manage_arbiters':
        BoolProp(default=False, fill_brok=['full_status'], to_send=True),
        'modules':
        ListProp(default=[''], split_on_comma=True),
        'polling_interval':
        IntegerProp(default=5, fill_brok=['full_status'], to_send=True),
        'use_timezone':
        StringProp(default=u'NOTSET', to_send=True),
        'realm':
        StringProp(default=u'',
                   fill_brok=['full_status'],
                   brok_transformation=get_obj_name_two_args_and_void),
        'realm_name':
        StringProp(default=u''),
        'satellite_map':
        DictProp(default={}, elts_prop=AddrProp, to_send=True, override=True),
        'use_ssl':
        BoolProp(default=False, fill_brok=['full_status'], to_send=True),
        'hard_ssl_name_check':
        BoolProp(default=True, fill_brok=['full_status'], to_send=True),
        'passive':
        BoolProp(default=False, fill_brok=['full_status'], to_send=True),
    })

    running_properties = Item.running_properties.copy()
    running_properties.update(
        {
            'con': StringProp(default=None),
            'uri': StringProp(default=None),
            'reachable':  # Can be reached - assumed True as default ;)
            BoolProp(default=False, fill_brok=['full_status']),
            'alive':  # Is alive (attached process s launched...)
            BoolProp(default=False, fill_brok=['full_status']),
            'valid':  # Is valid (the daemon is the expected one)
            BoolProp(default=False, fill_brok=['full_status']),
            'need_conf':  # The daemon needs to receive a configuration
            BoolProp(default=True, fill_brok=['full_status']),
            'have_conf':  # The daemon has received a configuration
            BoolProp(default=False, fill_brok=['full_status']),
            'stopping':  # The daemon is requested to stop
            BoolProp(default=False, fill_brok=['full_status']),
            'running_id':  # The running identifier of my related daemon
            FloatProp(default=0, fill_brok=['full_status']),

            # the number of poll attempt from the arbiter dispatcher
            'attempt': IntegerProp(default=0, fill_brok=['full_status']),

            # the last connection attempt timestamp
            'last_connection': IntegerProp(default=0,
                                           fill_brok=['full_status']),
            # the number of failed attempt for the connection
            'connection_attempt': IntegerProp(default=0,
                                              fill_brok=['full_status']),
            'last_check': IntegerProp(default=0, fill_brok=['full_status']),
            'cfg_managed': DictProp(default=None),
            'cfg_to_manage': DictProp(default={}),
            'configuration_sent': BoolProp(default=False),
            'statistics': DictProp(default={}),
        })

    def __init__(self, params=None, parsing=True):
        """Initialize a SatelliteLink

        If parsing is True, we are initializing from a configuration, else we are initializing
        from a copy of another satellite link data. This is used when the daemons receive their
        configuration from the arbiter.

        When initializing from an arbiter configuration, an instance_id property must exist else
        a LinkError exception is raised!

        If a satellite_map property exists in the provided parameters, it will update
        the default existing one
        """
        super(SatelliteLink, self).__init__(params, parsing)

        logger.debug("Initialize a %s, params: %s", self.__class__.__name__,
                     params)

        # My interface context
        self.broks = []
        self.actions = {}
        self.wait_homerun = {}
        self.pushed_commands = []

        self.init_running_properties()

        if parsing:
            # Create a new satellite link identifier
            self.instance_id = u'%s_%d' % (self.__class__.__name__,
                                           self.__class__._next_id)
            self.__class__._next_id += 1
        elif 'instance_id' not in params:
            raise LinkError(
                "When not parsing a configuration, "
                "an instance_id must exist in the provided parameters")

        self.fill_default()

        # Hack for ascending compatibility with Shinken configuration
        try:
            # We received a configuration with a 'name' property...
            if self.name:
                setattr(self, "%s_name" % self.type, self.name)
            else:
                # We received a configuration without a 'name' property... old form!
                if getattr(self, "%s_name" % self.type, None):
                    setattr(self, 'name', getattr(self, "%s_name" % self.type))
                else:
                    self.name = "Unnamed %s" % self.type
                    setattr(self, "%s_name" % self.type, self.name)
        except KeyError:
            setattr(self, 'name', getattr(self, "%s_name" % self.type))

        # Initialize our satellite map, and update if required
        self.set_arbiter_satellite_map(params.get('satellite_map', {}))

        self.cfg = {'self_conf': {}, 'schedulers': {}, 'arbiters': {}}

        # Create the daemon connection
        self.create_connection()

    def __repr__(self):  # pragma: no cover
        return '<%s - %s/%s, %s//%s:%s, rid: %s, spare: %s, realm: %s, sub-realms: %s, ' \
               'managing: %s (%s) />' \
               % (self.instance_id, self.type, self.name,
                  self.scheme, self.address, self.port, self.running_id,
                  self.spare, self.realm, self.manage_sub_realms,
                  self.managed_conf_id, self.push_flavor)

    __str__ = __repr__

    @property
    def scheme(self):
        """Daemon interface scheme

        :return: http or https if the daemon uses SSL
        :rtype: str
        """
        _scheme = 'http'
        if self.use_ssl:
            _scheme = 'https'
        return _scheme

    @staticmethod
    def get_a_satellite_link(sat_type, sat_dict):
        """Get a SatelliteLink object for a given satellite type and a dictionary

        :param sat_type: type of satellite
        :param sat_dict: satellite configuration data
        :return:
        """
        cls = get_fusionsupervision_class(
            'fusionsupervision.objects.%slink.%sLink' %
            (sat_type, sat_type.capitalize()))
        return cls(params=sat_dict, parsing=False)

    def get_livestate(self):
        """Get the SatelliteLink live state.

        The live state is a tuple information containing a state identifier and a message, where:
            state is:
            - 0 for an up and running satellite
            - 1 if the satellite is not reachale
            - 2 if the satellite is dead
            - 3 else (not active)

        :return: tuple
        """
        livestate = 0
        if self.active:
            if not self.reachable:
                livestate = 1
            elif not self.alive:
                livestate = 2
        else:
            livestate = 3

        livestate_output = "%s/%s is %s" % (self.type, self.name, [
            "up and running.", "warning because not reachable.",
            "critical because not responding.", "not active by configuration."
        ][livestate])

        return (livestate, livestate_output)

    def set_arbiter_satellite_map(self, satellite_map=None):
        """
            satellite_map is the satellites map in current context:
                - A SatelliteLink is owned by an Arbiter
                - satellite_map attribute of a SatelliteLink is the map defined
                IN THE satellite configuration but for creating connections,
                we need to have the satellites map from the Arbiter point of view

        :return: None
        """
        self.satellite_map = {
            'address': self.address,
            'port': self.port,
            'use_ssl': self.use_ssl,
            'hard_ssl_name_check': self.hard_ssl_name_check
        }
        if satellite_map:
            self.satellite_map.update(satellite_map)

    def get_and_clear_context(self):
        """Get and clean all of our broks, actions, external commands and homerun

        :return: list of all broks of the satellite link
        :rtype: list
        """
        res = (self.broks, self.actions, self.wait_homerun,
               self.pushed_commands)
        self.broks = []
        self.actions = {}
        self.wait_homerun = {}
        self.pushed_commands = []
        return res

    def get_and_clear_broks(self):
        """Get and clean all of our broks

        :return: list of all broks of the satellite link
        :rtype: list
        """
        res = self.broks
        self.broks = []
        return res

    def prepare_for_conf(self):
        """Initialize the pushed configuration dictionary
        with the inner properties that are to be propagated to the satellite link.

        :return: None
        """
        logger.debug("- preparing: %s", self)
        self.cfg = {
            'self_conf': self.give_satellite_cfg(),
            'schedulers': {},
            'arbiters': {}
        }
        logger.debug("- prepared: %s", self.cfg)

    def give_satellite_cfg(self):
        """Get the default information for a satellite.

        Overridden by the specific satellites links

        :return: dictionary of information common to all the links
        :rtype: dict
        """
        # All the satellite link class properties that are 'to_send' are stored in a
        # dictionary to be pushed to the satellite when the configuration is dispatched
        res = {}
        properties = self.__class__.properties
        for prop, entry in list(properties.items()):
            if hasattr(self, prop) and entry.to_send:
                res[prop] = getattr(self, prop)
        return res

    def give_satellite_json(self):
        """Get the json information for a satellite.

        This to provide information that will be exposed by a daemon on its HTTP interface.

        :return: dictionary of information common to all the links
        :rtype: dict
        """
        daemon_properties = [
            'type', 'name', 'uri', 'spare', 'configuration_sent', 'realm_name',
            'manage_sub_realms', 'active', 'reachable', 'alive', 'passive',
            'last_check', 'polling_interval', 'max_check_attempts'
        ]

        (livestate, livestate_output) = self.get_livestate()
        res = {"livestate": livestate, "livestate_output": livestate_output}
        for sat_prop in daemon_properties:
            res[sat_prop] = getattr(self, sat_prop, 'not_yet_defined')
        return res

    def manages(self, cfg_part):
        """Tell if the satellite is managing this configuration part

        The managed configuration is formed as a dictionary indexed on the link instance_id:
         {
            u'SchedulerLink_1': {
                u'hash': u'4d08630a3483e1eac7898e7a721bd5d7768c8320',
                u'push_flavor': u'4d08630a3483e1eac7898e7a721bd5d7768c8320',
                u'managed_conf_id': [u'Config_1']
            }
        }

        Note that the managed configuration is a string array rather than a simple string...
        no special for this reason, probably due to the serialization when the configuration is
        pushed :/

        :param cfg_part: configuration part as prepare by the Dispatcher
        :type cfg_part: Conf
        :return: True if the satellite manages this configuration
        :rtype: bool
        """
        logger.debug(
            "Do I (%s/%s) manage: %s, my managed configuration(s): %s",
            self.type, self.name, cfg_part, self.cfg_managed)

        # If we do not yet manage a configuration
        if not self.cfg_managed:
            logger.info("I (%s/%s) do not manage (yet) any configuration!",
                        self.type, self.name)
            return False

        # Check in the schedulers list configurations
        for managed_cfg in list(self.cfg_managed.values()):
            # If not even the cfg_id in the managed_conf, bail out
            if managed_cfg['managed_conf_id'] == cfg_part.instance_id \
                    and managed_cfg['push_flavor'] == cfg_part.push_flavor:
                logger.debug("I do manage this configuration: %s", cfg_part)
                break
        else:
            logger.warning("I (%s/%s) do not manage this configuration: %s",
                           self.type, self.name, cfg_part)
            return False

        return True

    def create_connection(self):
        """Initialize HTTP connection with a satellite (con attribute) and
        set its uri attribute

        This is called on the satellite link initialization

        :return: None
        """
        # Create the HTTP client for the connection
        try:
            self.con = HTTPClient(
                address=self.satellite_map['address'],
                port=self.satellite_map['port'],
                short_timeout=self.short_timeout,
                long_timeout=self.long_timeout,
                use_ssl=self.satellite_map['use_ssl'],
                strong_ssl=self.satellite_map['hard_ssl_name_check'])
            self.uri = self.con.uri
        except HTTPClientException as exp:
            # logger.error("Error with '%s' when creating client: %s", self.name, str(exp))
            # Set the satellite as dead
            self.set_dead()
            raise LinkError("Error with '%s' when creating client: %s" %
                            (self.name, str(exp)))

    def set_alive(self):
        """Set alive, reachable, and reset attempts.
        If we change state, raise a status brok update

        alive, means the daemon is prenset in the system
        reachable, means that the HTTP connection is valid

        With this function we confirm that the daemon is reachable and, thus, we assume it is alive!

        :return: None
        """
        was_alive = self.alive
        self.alive = True
        self.reachable = True
        self.attempt = 0

        # We came from dead to alive! We must propagate the good news
        if not was_alive:
            logger.info("Setting %s satellite as alive :)", self.name)
            self.broks.append(self.get_update_status_brok())

    def set_dead(self):
        """Set the satellite into dead state:
        If we change state, raise a status brok update

        :return:None
        """
        was_alive = self.alive
        self.alive = False
        self.reachable = False
        self.attempt = 0
        # We will have to create a new connection...
        self.con = None

        # We are dead now! We must propagate the sad news...
        if was_alive and not self.stopping:
            logger.warning("Setting the satellite %s as dead :(", self.name)
            self.broks.append(self.get_update_status_brok())

    def add_failed_check_attempt(self, reason=''):
        """Set the daemon as unreachable and add a failed attempt
        if we reach the maximum attempts, set the daemon as dead

        :param reason: the reason of adding an attempts (stack trace sometimes)
        :type reason: str
        :return: None
        """
        self.reachable = False
        self.attempt = self.attempt + 1

        logger.debug("Failed attempt for %s (%d/%d), reason: %s", self.name,
                     self.attempt, self.max_check_attempts, reason)
        # Don't need to warn again and again if the satellite is already dead
        # Only warn when it is alive
        if self.alive:
            if not self.stopping:
                logger.warning("Add failed attempt for %s (%d/%d) - %s",
                               self.name, self.attempt,
                               self.max_check_attempts, reason)
            else:
                logger.info(
                    "Stopping... failed attempt for %s (%d/%d) - also probably stopping",
                    self.name, self.attempt, self.max_check_attempts)

        # If we reached the maximum attempts, set the daemon as dead
        if self.attempt >= self.max_check_attempts:
            if not self.stopping:
                logger.warning(
                    "Set %s as dead, too much failed attempts (%d), last problem is: %s",
                    self.name, self.max_check_attempts, reason)
            else:
                logger.info(
                    "Stopping... set %s as dead, too much failed attempts (%d)",
                    self.name, self.max_check_attempts)

            self.set_dead()

    def valid_connection(*outer_args, **outer_kwargs):
        # pylint: disable=unused-argument, no-method-argument
        """Check if the daemon connection is established and valid"""
        def decorator(func):  # pylint: disable=missing-docstring
            def decorated(*args, **kwargs):  # pylint: disable=missing-docstring
                # outer_args and outer_kwargs are the decorator arguments
                # args and kwargs are the decorated function arguments
                link = args[0]
                if not link.con:
                    raise LinkError("The connection is not created for %s" %
                                    link.name)
                if not link.running_id:
                    raise LinkError(
                        "The connection is not initialized for %s" % link.name)

                return func(*args, **kwargs)

            return decorated

        return decorator

    def communicate(*outer_args, **outer_kwargs):
        # pylint: disable=unused-argument, no-method-argument
        """Check if the daemon connection is authorized and valid"""
        def decorator(func):  # pylint: disable=missing-docstring
            def decorated(*args, **kwargs):  # pylint: disable=missing-docstring
                # outer_args and outer_kwargs are the decorator arguments
                # args and kwargs are the decorated function arguments
                fn_name = func.__name__
                link = args[0]
                if not link.alive:
                    logger.warning("%s is not alive for %s", link.name,
                                   fn_name)
                    return None

                try:
                    if not link.reachable:
                        raise LinkError("The %s %s is not reachable" %
                                        (link.type, link.name))

                    logger.debug("[%s] Calling: %s, %s, %s", link.name,
                                 fn_name, args, kwargs)
                    return func(*args, **kwargs)
                except HTTPClientConnectionException as exp:
                    # A Connection error is raised when the daemon connection cannot be established
                    # No way with the configuration parameters!
                    if not link.stopping:
                        logger.warning(
                            "A daemon (%s/%s) that we must be related with "
                            "cannot be connected: %s", link.type, link.name,
                            exp)
                    else:
                        logger.info(
                            "Stopping... daemon (%s/%s) cannot be connected. "
                            "It is also probably stopping or yet stopped.",
                            link.type, link.name)
                    link.set_dead()
                except (LinkError, HTTPClientTimeoutException) as exp:
                    link.add_failed_check_attempt("Connection timeout "
                                                  "with '%s': %s" %
                                                  (fn_name, str(exp)))
                    return False
                except HTTPClientDataException as exp:
                    # A Data error is raised when the daemon HTTP reponse is not 200!
                    # No way with the communication if some problems exist in the daemon interface!
                    # Abort all
                    err = "Some daemons that we must be related with " \
                          "have some interface problems. Sorry, I bail out"
                    logger.error(err)
                    os.sys.exit(err)
                except HTTPClientException as exp:
                    link.add_failed_check_attempt("Error with '%s': %s" %
                                                  (fn_name, str(exp)))

                return None

            return decorated

        return decorator

    @communicate()
    def get_running_id(self):
        """Send a HTTP request to the satellite (GET /identity)
        Used to get the daemon running identifier that allows to know if the daemon got restarted

        This is called on connection initialization or re-connection

        If the daemon is notreachable, this function will raise an exception and the caller
        will receive a False as return

        :return: Boolean indicating if the running id was received
        :type: bool
        """
        former_running_id = self.running_id

        logger.info("  get the running identifier for %s %s.", self.type,
                    self.name)
        # An exception is raised in this function if the daemon is not reachable
        self.running_id = self.con.get('identity')
        if isinstance(self.running_id, dict):
            self.running_id = self.running_id['running_id']

        if former_running_id == 0:
            if self.running_id:
                logger.info("  -> got: %s.", self.running_id)
                former_running_id = self.running_id

        # If the daemon has just started or has been restarted: it has a new running_id.
        if former_running_id != self.running_id:
            if former_running_id:
                logger.info(
                    "  -> The %s %s running identifier changed: %s. "
                    "The daemon was certainly restarted!", self.type,
                    self.name, self.running_id)
            # So we clear all verifications, they are obsolete now.
            logger.info(
                "The running id of the %s %s changed (%s), "
                "we must clear its context.", self.type, self.name,
                self.running_id)
            (_, _, _, _) = self.get_and_clear_context()

        # Set the daemon as alive
        self.set_alive()

        return True

    @valid_connection()
    @communicate()
    def stop_request(self, stop_now=False):
        """Send a stop request to the daemon

        :param stop_now: stop now or go to stop wait mode
        :type stop_now: bool
        :return: the daemon response (True)
        """
        logger.debug("Sending stop request to %s, stop now: %s", self.name,
                     stop_now)

        res = self.con.get('stop_request',
                           {'stop_now': '1' if stop_now else '0'})
        return res

    @valid_connection()
    @communicate()
    def update_infos(self, forced=False, test=False):
        """Update satellite info each self.polling_interval seconds
        so we smooth arbiter actions for just useful actions.

        Raise a satellite update status Brok

        If forced is True, then ignore the ping period. This is used when the configuration
        has not yet been dispatched to the Arbiter satellites.

        If test is True, do not really ping the daemon (useful for the unit tests only)

        :param forced: ignore the ping smoothing
        :type forced: bool
        :param test:
        :type test: bool
        :return:
        None if the last request is too recent,
        False if a timeout was raised during the request,
        else the managed configurations dictionary
        """
        logger.debug("Update informations, forced: %s", forced)

        # First look if it's not too early to ping
        now = time.time()
        if not forced and self.last_check and self.last_check + self.polling_interval > now:
            logger.debug(
                "Too early to ping %s, ping period is %ds!, last check: %d, now: %d",
                self.name, self.polling_interval, self.last_check, now)
            return None

        self.get_conf(test=test)

        # Update the daemon last check timestamp
        self.last_check = time.time()

        # Update the state of this element
        self.broks.append(self.get_update_status_brok())

        return self.cfg_managed

    @valid_connection()
    @communicate()
    def get_daemon_stats(self, details=False):
        """Send a HTTP request to the satellite (GET /get_daemon_stats)

        :return: Daemon statistics
        :rtype: dict
        """
        logger.debug("Get daemon statistics for %s, %s %s", self.name,
                     self.alive, self.reachable)
        return self.con.get('stats%s' % ('?details=1' if details else ''))

    @valid_connection()
    @communicate()
    def get_initial_broks(self, broker_name):
        """Send a HTTP request to the satellite (GET /_initial_broks)

        Used to build the initial broks for a broker connecting to a scheduler

        :param broker_name: the concerned broker name
        :type broker_name: str
        :return: Boolean indicating if the running id changed
        :type: bool
        """
        logger.debug("Getting initial broks for %s, %s %s", self.name,
                     self.alive, self.reachable)
        return self.con.get('_initial_broks', {'broker_name': broker_name},
                            wait=True)

    @valid_connection()
    @communicate()
    def wait_new_conf(self):
        """Send a HTTP request to the satellite (GET /wait_new_conf)

        :return: True if wait new conf, otherwise False
        :rtype: bool
        """
        logger.debug("Wait new configuration for %s, %s %s", self.name,
                     self.alive, self.reachable)
        return self.con.get('_wait_new_conf')

    @valid_connection()
    @communicate()
    def put_conf(self, configuration, test=False):
        """Send the configuration to the satellite
        HTTP request to the satellite (POST /push_configuration)

        If test is True, store the configuration internally

        :param configuration: The conf to send (data depend on the satellite)
        :type configuration:
        :return: None
        """
        logger.debug("Sending configuration to %s, %s %s", self.name,
                     self.alive, self.reachable)
        # ----------
        if test:
            setattr(self, 'unit_test_pushed_configuration', configuration)
            # print("*** unit tests - sent configuration %s: %s" % (self.name, configuration))
            return True
        # ----------

        return self.con.post('_push_configuration', {'conf': configuration},
                             wait=True)

    @valid_connection()
    @communicate()
    def has_a_conf(self, magic_hash=None):  # pragma: no cover
        """Send a HTTP request to the satellite (GET /have_conf)
        Used to know if the satellite has a conf

        :param magic_hash: Config hash. Only used for HA arbiter communication
        :type magic_hash: int
        :return: Boolean indicating if the satellite has a (specific) configuration
        :type: bool
        """
        logger.debug("Have a configuration for %s, %s %s", self.name,
                     self.alive, self.reachable)
        self.have_conf = self.con.get('_have_conf', {'magic_hash': magic_hash})
        return self.have_conf

    @valid_connection()
    @communicate()
    def get_conf(self, test=False):
        """Send a HTTP request to the satellite (GET /managed_configurations)
        and update the cfg_managed attribute with the new information
        Set to {} on failure

        the managed configurations are a dictionary which keys are the scheduler link instance id
        and the values are the push_flavor

        If test is True, returns the unit test internally stored configuration

        Returns False if a timeout is raised

        :return: see @communicate, or the managed configuration
        """
        logger.debug("Get managed configuration for %s, %s %s", self.name,
                     self.alive, self.reachable)
        # ----------
        if test:
            self.cfg_managed = {}
            self.have_conf = True
            logger.debug("Get managed configuration test ...")
            if getattr(self, 'unit_test_pushed_configuration',
                       None) is not None:
                # Note this is a dict not a SatelliteLink object !
                for scheduler_link in self.unit_test_pushed_configuration[
                        'schedulers'].values():
                    self.cfg_managed[scheduler_link['instance_id']] = {
                        'hash': scheduler_link['hash'],
                        'push_flavor': scheduler_link['push_flavor'],
                        'managed_conf_id': scheduler_link['managed_conf_id']
                    }
            # print("*** unit tests - get managed configuration %s: %s"
            #       % (self.name, self.cfg_managed))
        # ----------
        else:
            self.cfg_managed = self.con.get('managed_configurations')
            logger.debug("My (%s) fresh managed configuration: %s", self.name,
                         self.cfg_managed)

        self.have_conf = (self.cfg_managed != {})

        return self.cfg_managed

    @valid_connection()
    @communicate()
    def push_broks(self, broks):
        """Send a HTTP request to the satellite (POST /push_broks)
        Send broks to the satellite

        :param broks: Brok list to send
        :type broks: list
        :return: True on success, False on failure
        :rtype: bool
        """
        logger.debug("[%s] Pushing %d broks", self.name, len(broks))
        return self.con.post('_push_broks', {'broks': broks}, wait=True)

    @valid_connection()
    @communicate()
    def push_actions(self, actions, scheduler_instance_id):
        """Post the actions to execute to the satellite.
        Indeed, a scheduler post its checks to a poller and its actions to a reactionner.

        :param actions: Action list to send
        :type actions: list
        :param scheduler_instance_id: Scheduler instance identifier
        :type scheduler_instance_id: uuid
        :return: True on success, False on failure
        :rtype: bool
        """
        logger.debug("Pushing %d actions from %s", len(actions),
                     scheduler_instance_id)
        return self.con.post('_push_actions', {
            'actions': actions,
            'scheduler_instance_id': scheduler_instance_id
        },
                             wait=True)

    @valid_connection()
    @communicate()
    def push_results(self, results, scheduler_name):
        """Send a HTTP request to the satellite (POST /put_results)
        Send actions results to the satellite

        :param results: Results list to send
        :type results: list
        :param scheduler_name: Scheduler name
        :type scheduler_name: uuid
        :return: True on success, False on failure
        :rtype: bool
        """
        logger.debug("Pushing %d results", len(results))
        result = self.con.post('put_results', {
            'results': results,
            'from': scheduler_name
        },
                               wait=True)
        return result

    @valid_connection()
    @communicate()
    def push_external_commands(self, commands):
        """Send a HTTP request to the satellite (POST /r_un_external_commands)
        to send the external commands to the satellite

        :param results: Results list to send
        :type results: list
        :return: True on success, False on failure
        :rtype: bool
        """
        logger.debug("Pushing %d external commands", len(commands))
        return self.con.post('_run_external_commands', {'cmds': commands},
                             wait=True)

    @valid_connection()
    @communicate()
    def get_external_commands(self):
        """Send a HTTP request to the satellite (GET /_external_commands) to
        get the external commands from the satellite.

        :return: External Command list on success, [] on failure
        :rtype: list
        """
        res = self.con.get('_external_commands', wait=False)
        logger.debug("Got %d external commands from %s: %s", len(res),
                     self.name, res)
        return unserialize(res, True)

    @valid_connection()
    @communicate()
    def get_broks(self, broker_name):
        """Send a HTTP request to the satellite (GET /_broks)
        Get broks from the satellite.
        Un-serialize data received.

        :param broker_name: the concerned broker link
        :type broker_name: BrokerLink
        :return: Broks list on success, [] on failure
        :rtype: list
        """
        res = self.con.get('_broks', {'broker_name': broker_name}, wait=False)
        logger.debug("Got broks from %s: %s", self.name, res)
        return unserialize(res, True)

    @valid_connection()
    @communicate()
    def get_events(self):
        """Send a HTTP request to the satellite (GET /_events)
        Get monitoring events from the satellite.

        :return: Broks list on success, [] on failure
        :rtype: list
        """
        res = self.con.get('_events', wait=False)
        logger.debug("Got events from %s: %s", self.name, res)
        return unserialize(res, True)

    @valid_connection()
    def get_results(self, scheduler_instance_id):
        """Send a HTTP request to the satellite (GET /_results)
        Get actions results from satellite (only passive satellites expose this method.

        :param scheduler_instance_id: scheduler instance identifier
        :type scheduler_instance_id: str
        :return: Results list on success, [] on failure
        :rtype: list
        """
        res = self.con.get('_results',
                           {'scheduler_instance_id': scheduler_instance_id},
                           wait=True)
        logger.debug("Got %d results from %s: %s", len(res), self.name, res)
        return res

    @valid_connection()
    def get_actions(self, params):
        """Send a HTTP request to the satellite (GET /_checks)
        Get actions from the scheduler.
        Un-serialize data received.

        :param params: the request parameters
        :type params: str
        :return: Actions list on success, [] on failure
        :rtype: list
        """
        res = self.con.get('_checks', params, wait=True)
        logger.debug("Got checks to execute from %s: %s", self.name, res)
        return unserialize(res, True)
コード例 #26
0
class Realm(Itemgroup):
    """Realm class is used to implement realm.
    It is basically a group of Hosts assigned to a specific Scheduler/Poller
    (other daemon are optional)

    """
    my_type = 'realm'
    members_property = "members"
    group_members_property = "realm_members"

    properties = Itemgroup.properties.copy()
    properties.update({
        'realm_name':
        StringProp(default=u'', fill_brok=['full_status']),
        'alias':
        StringProp(default=u'', fill_brok=['full_status']),
        'realm_members':
        ListProp(default=[], split_on_comma=True),
        'group_members':
        ListProp(default=[], split_on_comma=True),
        'higher_realms':
        ListProp(default=[], split_on_comma=True),
        'default':
        BoolProp(default=False)
    })

    running_properties = Itemgroup.running_properties.copy()
    running_properties.update({
        # Indicate if some only passively or actively checks host exist in the realm
        'passively_checked_hosts': BoolProp(default=None),
        'actively_checked_hosts': BoolProp(default=None),
        # Those lists contain only the uuid of the satellite link, not the whole object!
        'arbiters': ListProp(default=[]),
        'schedulers': ListProp(default=[]),
        'brokers': ListProp(default=[]),
        'pollers': ListProp(default=[]),
        'reactionners': ListProp(default=[]),
        'receivers': ListProp(default=[]),
        'potential_schedulers': ListProp(default=[]),
        'potential_brokers': ListProp(default=[]),
        'potential_pollers': ListProp(default=[]),
        'potential_reactionners': ListProp(default=[]),
        'potential_receivers': ListProp(default=[]),
        # Once configuration is prepared, the count of the hosts in the realm
        'hosts_count': IntegerProp(default=0),
        'packs': DictProp(default={}),
        'parts': DictProp(default={}),
        # Realm level in the realms hierarchy
        'level': IntegerProp(default=-1),
        # All the sub realms (children and grand-children)
        'all_sub_members': ListProp(default=[]),
        'all_sub_members_names': ListProp(default=[]),
    })

    macros = {
        'REALMNAME': 'realm_name',
        'REALMDEFAULT': 'default',
        'REALMMEMBERS': 'members',
        'REALMREALM_MEMBERS': 'realm_members',
        'REALMGROUP_MEMBERS': 'group_members',
        'REALMHOSTS_COUNT': 'hosts_count',
    }

    def __init__(self, params=None, parsing=True):
        super(Realm, self).__init__(params, parsing)

        self.fill_default()

        # Define a packs list for the configuration preparation
        self.packs = []
        # Once the configuration got prepared, packs becomes a dictionary!
        # packs is a dictionary indexed with the configuration part
        # number and containing the list of hosts

        # List of satellites related to the realm
        self.to_satellites = {
            'reactionner': {},
            'poller': {},
            'broker': {},
            'receiver': {}
        }

        # List of satellites that need a configuration dispatch
        self.to_satellites_need_dispatch = {
            'reactionner': {},
            'poller': {},
            'broker': {},
            'receiver': {}
        }

        # List of satellites with their managed configuration
        self.to_satellites_managed_by = {
            'reactionner': {},
            'poller': {},
            'broker': {},
            'receiver': {}
        }

        # Attributes depending of the satellite type
        for sat_type in [
                'arbiter', 'scheduler', 'reactionner', 'poller', 'broker',
                'receiver'
        ]:
            # Minimum is to have one satellite
            setattr(self, "nb_%ss" % sat_type, 0)
            setattr(self, 'potential_%ss' % sat_type, [])

    def __repr__(self):
        res = '<%r %r (%d)' % (self.__class__.__name__, self.get_name(),
                               self.level)
        if self.realm_members:
            res = res + ', %d sub-realms: %r' \
                        % (len(self.realm_members), ', '.join([str(s) for s in self.realm_members]))
            if self.all_sub_members_names:
                res = res + ', %d all sub-realms: %r' \
                            % (len(self.all_sub_members_names),
                               ', '.join([str(s) for s in self.all_sub_members_names]))
        if self.hosts_count:
            res = res + ', %d hosts' % self.hosts_count
        if getattr(self, 'parts', None):
            res = res + ', %d parts' % len(self.parts)
        if getattr(self, 'packs', None):
            res = res + ', %d packs' % len(self.packs)
        return res + '/>'

    __str__ = __repr__

    @property
    def name(self):
        """Get the realm name"""
        return self.get_name()

    def get_name(self):
        """Accessor to realm_name attribute

        :return: realm name
        :rtype: str
        """
        return getattr(self, 'realm_name', 'unset')

    def add_group_members(self, members):
        """Add a new group member to the groups list

        :param members: member name
        :type members: str
        :return: None
        """
        if not isinstance(members, list):
            members = [members]

        if not getattr(self, 'group_members', None):
            self.group_members = members
        else:
            self.group_members.extend(members)

    def prepare_satellites(self, satellites):
        """Update the following attributes of a realm::

        * nb_*satellite type*s
        * self.potential_*satellite type*s

        (satellite types are scheduler, reactionner, poller, broker and receiver)

        :param satellites: dict of SatelliteLink objects
        :type satellites: dict
        :return: None
        """
        for sat_type in [
                "scheduler", "reactionner", "poller", "broker", "receiver"
        ]:
            # We get potential TYPE at realm level first
            for sat_link_uuid in getattr(self, "%ss" % sat_type):
                if sat_link_uuid not in satellites:
                    continue
                sat_link = satellites[sat_link_uuid]

                # Found our declared satellite in the provided satellites
                if sat_link.active and not sat_link.spare:
                    # Generic increment : realm.nb_TYPE += 1
                    setattr(self, "nb_%ss" % sat_type,
                            getattr(self, "nb_%ss" % sat_type) + 1)
                    break
                else:
                    self.add_error(
                        "Realm %s, satellite %s declared in the realm is not found "
                        "in the allowed satellites!" %
                        (self.name, sat_link.name))
                    logger.error(
                        "Satellite %s declared in the realm %s not found "
                        "in the allowed satellites!", sat_link.name, self.name)

        logger.info(
            " Realm %s: (in/potential) (schedulers:%d/%d) (pollers:%d/%d) "
            "(reactionners:%d/%d) (brokers:%d/%d) (receivers:%d/%d)",
            self.name, self.nb_schedulers, len(self.potential_schedulers),
            self.nb_pollers, len(self.potential_pollers), self.nb_reactionners,
            len(self.potential_reactionners), self.nb_brokers,
            len(self.potential_brokers), self.nb_receivers,
            len(self.potential_receivers))

    def get_realms_by_explosion(self, realms):
        """Get all members of this realm including members of sub-realms on multi-levels

        :param realms: realms list, used to look for a specific one
        :type realms: fusionsupervision.objects.realm.Realms
        :return: list of members and add realm to realm_members attribute
        :rtype: list
        """
        # If rec_tag is already set, then we detected a loop in the realms hierarchy!
        if getattr(self, 'rec_tag', False):
            self.add_error(
                "Error: there is a loop in the realm definition %s" %
                self.get_name())
            return None

        # Ok, not in a loop, we tag the realm and parse its members
        self.rec_tag = True

        # Order realm members list by name
        self.realm_members = sorted(self.realm_members)
        for member in self.realm_members:
            realm = realms.find_by_name(member)
            if not realm:
                self.add_unknown_members(member)
                continue

            children = realm.get_realms_by_explosion(realms)
            if children is None:
                # We got a loop in our children definition
                self.all_sub_members = []
                self.realm_members = []
                return None

        # Return the list of all unique members
        return self.all_sub_members

    def set_level(self, level, realms):
        """Set the realm level in the realms hierarchy

        :return: None
        """
        self.level = level
        if not self.level:
            logger.info("- %s", self.get_name())
        else:
            logger.info(" %s %s", '+' * self.level, self.get_name())
        self.all_sub_members = []
        self.all_sub_members_names = []
        for child in sorted(self.realm_members):
            child = realms.find_by_name(child)
            if not child:
                continue

            self.all_sub_members.append(child.uuid)
            self.all_sub_members_names.append(child.get_name())
            grand_children = child.set_level(self.level + 1, realms)
            for grand_child in grand_children:
                if grand_child in self.all_sub_members_names:
                    continue
                grand_child = realms.find_by_name(grand_child)
                if grand_child:
                    self.all_sub_members_names.append(grand_child.get_name())
                    self.all_sub_members.append(grand_child.uuid)
        return self.all_sub_members_names

    def get_all_subs_satellites_by_type(self, sat_type, realms):
        """Get all satellites of the wanted type in this realm recursively

        :param sat_type: satellite type wanted (scheduler, poller ..)
        :type sat_type:
        :param realms: all realms
        :type realms: list of realm object
        :return: list of satellite in this realm
        :rtype: list
        """
        res = copy.copy(getattr(self, sat_type))
        for member in self.all_sub_members:
            res.extend(realms[member].get_all_subs_satellites_by_type(
                sat_type, realms))
        return res

    def get_satellites_by_type(self, s_type):
        """Generic function to access one of the satellite attribute
        ie : self.pollers, self.reactionners ...

        :param s_type: satellite type wanted
        :type s_type: str
        :return: self.*type*s
        :rtype: list
        """

        if hasattr(self, s_type + 's'):
            return getattr(self, s_type + 's')

        logger.debug("[realm %s] do not have this kind of satellites: %s",
                     self.name, s_type)
        return []

    def get_potential_satellites_by_type(self, satellites, s_type):
        """Generic function to access one of the potential satellite attribute
        ie : self.potential_pollers, self.potential_reactionners ...

        :param satellites: list of SatelliteLink objects
        :type satellites: SatelliteLink list
        :param s_type: satellite type wanted
        :type s_type: str
        :return: self.potential_*type*s
        :rtype: list
        """
        if not hasattr(self, 'potential_' + s_type + 's'):
            logger.debug("[realm %s] do not have this kind of satellites: %s",
                         self.name, s_type)
            return []

        matching_satellites = []
        for sat_link in satellites:
            if sat_link.uuid in getattr(self, s_type + 's'):
                matching_satellites.append(sat_link)
        if matching_satellites:
            logger.debug("- found %ss: %s", s_type, matching_satellites)
            return matching_satellites

        for sat_link in satellites:
            if sat_link.uuid in getattr(self, 'potential_' + s_type + 's'):
                matching_satellites.append(sat_link)
                # Do not limit to one satellite!
                # break

        logger.debug("- potential %ss: %s", s_type, matching_satellites)
        return matching_satellites

    def get_nb_of_must_have_satellites(self, s_type):
        """Generic function to access one of the number satellite attribute
        ie : self.nb_pollers, self.nb_reactionners ...

        :param s_type: satellite type wanted
        :type s_type: str
        :return: self.nb_*type*s
        :rtype: int
        """
        if hasattr(self, 'nb_' + s_type + 's'):
            return getattr(self, 'nb_' + s_type + 's')

        logger.debug("[realm %s] do not have this kind of satellites: %s",
                     self.name, s_type)
        return 0

    def get_links_for_a_broker(self,
                               pollers,
                               reactionners,
                               receivers,
                               realms,
                               manage_sub_realms=False):
        """Get a configuration dictionary with pollers, reactionners and receivers links
        for a broker

        :param pollers: pollers
        :type pollers:
        :param reactionners: reactionners
        :type reactionners:
        :param receivers: receivers
        :type receivers:
        :param realms: realms
        :type realms:
        :param manage_sub_realms:
        :type manage_sub_realms: True if the borker manages sub realms

        :return: dict containing pollers, reactionners and receivers links (key is satellite id)
        :rtype: dict
        """

        # Create void satellite links
        cfg = {
            'pollers': {},
            'reactionners': {},
            'receivers': {},
        }

        # Our self.daemons are only identifiers... that we use to fill the satellite links
        for poller_id in self.pollers:
            poller = pollers[poller_id]
            cfg['pollers'][poller.uuid] = poller.give_satellite_cfg()

        for reactionner_id in self.reactionners:
            reactionner = reactionners[reactionner_id]
            cfg['reactionners'][
                reactionner.uuid] = reactionner.give_satellite_cfg()

        for receiver_id in self.receivers:
            receiver = receivers[receiver_id]
            cfg['receivers'][receiver.uuid] = receiver.give_satellite_cfg()

        # If the broker manages sub realms, fill the satellite links...
        if manage_sub_realms:
            # Now pollers
            for poller_id in self.get_all_subs_satellites_by_type(
                    'pollers', realms):
                poller = pollers[poller_id]
                cfg['pollers'][poller.uuid] = poller.give_satellite_cfg()

            # Now reactionners
            for reactionner_id in self.get_all_subs_satellites_by_type(
                    'reactionners', realms):
                reactionner = reactionners[reactionner_id]
                cfg['reactionners'][
                    reactionner.uuid] = reactionner.give_satellite_cfg()

            # Now receivers
            for receiver_id in self.get_all_subs_satellites_by_type(
                    'receivers', realms):
                receiver = receivers[receiver_id]
                cfg['receivers'][receiver.uuid] = receiver.give_satellite_cfg()

        return cfg

    def get_links_for_a_scheduler(self, pollers, reactionners, brokers):
        """Get a configuration dictionary with pollers, reactionners and brokers links
        for a scheduler

        :return: dict containing pollers, reactionners and brokers links (key is satellite id)
        :rtype: dict
        """

        # Create void satellite links
        cfg = {
            'pollers': {},
            'reactionners': {},
            'brokers': {},
        }

        # Our self.daemons are only identifiers... that we use to fill the satellite links
        try:
            for poller in self.pollers + self.get_potential_satellites_by_type(
                    pollers, "poller"):
                if poller in pollers:
                    poller = pollers[poller]
                cfg['pollers'][poller.uuid] = poller.give_satellite_cfg()

            for reactionner in self.reactionners + self.get_potential_satellites_by_type(
                    reactionners, "reactionner"):
                if reactionner in reactionners:
                    reactionner = reactionners[reactionner]
                cfg['reactionners'][
                    reactionner.uuid] = reactionner.give_satellite_cfg()

            for broker in self.brokers + self.get_potential_satellites_by_type(
                    brokers, "broker"):
                if broker in brokers:
                    broker = brokers[broker]
                cfg['brokers'][broker.uuid] = broker.give_satellite_cfg()
        except Exception as exp:  # pylint: disable=broad-except
            logger.exception("realm.get_links_for_a_scheduler: %s", exp)

            # for poller in self.get_potential_satellites_by_type(pollers, "poller"):
            #     logger.info("Poller: %s", poller)
            #     cfg['pollers'][poller.uuid] = poller.give_satellite_cfg()
            #
            # for reactionner in self.get_potential_satellites_by_type(reactionners, "reactionner"):
            #     cfg['reactionners'][reactionner.uuid] = reactionner.give_satellite_cfg()
            #
            # for broker in self.get_potential_satellites_by_type(brokers, "broker"):
            #     cfg['brokers'][broker.uuid] = broker.give_satellite_cfg()

        return cfg
コード例 #27
0
class Receiver(Satellite):
    """Receiver class. Referenced as "app" in most Interface

    """
    my_type = 'receiver'

    properties = Satellite.properties.copy()
    properties.update({
        'type': StringProp(default='receiver'),
        'port': IntegerProp(default=7773)
    })

    def __init__(self, **kwargs):
        """Receiver daemon initialisation

        :param kwargs: command line arguments
        """
        super(Receiver,
              self).__init__(kwargs.get('daemon_name', 'Default-receiver'),
                             **kwargs)

        # Our schedulers and arbiters are initialized in the base class

        # Our related daemons
        # self.pollers = {}
        # self.reactionners = {}

        # Modules are load one time
        self.have_modules = False

        # Now an external commands manager and a list for the external_commands
        self.external_commands_manager = None

        # and the unprocessed one, a buffer
        self.unprocessed_external_commands = []

        self.accept_passive_unknown_check_results = False

        self.http_interface = GenericInterface(self)

    def add(self, elt):
        """Generic function to add objects to the daemon internal lists.
        Manage Broks, External commands

        :param elt: object to add
        :type elt: fusionsupervision.FusionsupervisionObject
        :return: None
        """
        # external commands may be received as a dictionary when pushed from the WebUI
        if isinstance(
                elt, dict
        ) and 'my_type' in elt and elt['my_type'] == "externalcommand":
            if 'cmd_line' not in elt:
                logger.debug(
                    "Received a bad formated external command: %s. "
                    "No cmd_line!", elt)
                return

            logger.debug("Received a dictionary external command: %s", elt)
            if 'creation_timestamp' not in elt:
                elt['creation_timestamp'] = None
            elt = ExternalCommand(elt['cmd_line'], elt['creation_timestamp'])

        if isinstance(elt, Brok):
            # For brok, we tag the brok with our instance_id
            elt.instance_id = self.instance_id
            if elt.type == 'monitoring_log':
                # The brok is a monitoring event
                with self.events_lock:
                    self.events.append(elt)
                statsmgr.counter('events', 1)
            else:
                with self.broks_lock:
                    self.broks.append(elt)
            statsmgr.counter('broks.added', 1)
        elif isinstance(elt, ExternalCommand):
            logger.debug("Queuing an external command: %s",
                         str(ExternalCommand.__dict__))
            self.unprocessed_external_commands.append(elt)
            statsmgr.counter('external-commands.added', 1)

    def setup_new_conf(self):
        """Receiver custom setup_new_conf method

        This function calls the base satellite treatment and manages the configuration needed
        for a receiver daemon:
        - get and configure its satellites
        - configure the modules

        :return: None
        """
        # Execute the base class treatment...
        super(Receiver, self).setup_new_conf()

        # ...then our own specific treatment!
        with self.conf_lock:
            # self_conf is our own configuration from the fusionsupervision environment
            # self_conf = self.cur_conf['self_conf']
            logger.debug("Got config: %s", self.cur_conf)

            # Configure and start our modules
            if not self.have_modules:
                try:
                    self.modules = unserialize(self.cur_conf['modules'],
                                               no_load=True)
                except FusionsupervisionClassLookupException as exp:  # pragma: no cover, simple protection
                    logger.error(
                        'Cannot un-serialize modules configuration '
                        'received from arbiter: %s', exp)
                if self.modules:
                    logger.info("I received some modules configuration: %s",
                                self.modules)
                    self.have_modules = True

                    self.do_load_modules(self.modules)
                    # and start external modules too
                    self.modules_manager.start_external_instances()
                else:
                    logger.info("I do not have modules")

            # Now create the external commands manager
            # We are a receiver: our role is to get and dispatch commands to the schedulers
            global_conf = self.cur_conf.get('global_conf', None)
            if not global_conf:
                logger.error(
                    "Received a configuration without any global_conf! "
                    "This may hide a configuration problem with the "
                    "realms and the manage_sub_realms of the satellites!")
                global_conf = {
                    'accept_passive_unknown_check_results': False,
                    'log_external_commands': True
                }
            self.external_commands_manager = \
                ExternalCommandManager(None, 'receiver', self,
                                       global_conf.get(
                                           'accept_passive_unknown_check_results', False),
                                       global_conf.get(
                                           'log_external_commands', False))

            # Initialize connection with all our satellites
            logger.info("Initializing connection with my satellites:")
            my_satellites = self.get_links_of_type(s_type='')
            for satellite in list(my_satellites.values()):
                logger.info("- : %s/%s", satellite.type, satellite.name)
                if not self.daemon_connection_init(satellite):
                    logger.error("Satellite connection failed: %s", satellite)

        # Now I have a configuration!
        self.have_conf = True

    def get_external_commands_from_arbiters(self):
        """Get external commands from our arbiters

        As of now, only the arbiter are requested to provide their external commands that
        the receiver will push to all the known schedulers to make them being executed.

        :return: None
        """
        for arbiter_link_uuid in self.arbiters:
            link = self.arbiters[arbiter_link_uuid]

            if not link.active:
                logger.debug(
                    "The arbiter '%s' is not active, it is not possible to get "
                    "its external commands!", link.name)
                continue

            try:
                logger.debug("Getting external commands from: %s", link.name)
                external_commands = link.get_external_commands()
                if external_commands:
                    logger.debug("Got %d commands from: %s",
                                 len(external_commands), link.name)
                else:
                    # Simple protection against None value
                    external_commands = []
                for external_command in external_commands:
                    self.add(external_command)
            except LinkError:
                logger.warning(
                    "Arbiter connection failed, I could not get external commands!"
                )
            except Exception as exp:  # pylint: disable=broad-except
                logger.error(
                    "Arbiter connection failed, I could not get external commands!"
                )
                logger.exception("Exception: %s", exp)

    def push_external_commands_to_schedulers(self):
        """Push received external commands to the schedulers

        :return: None
        """
        if not self.unprocessed_external_commands:
            return

        # Those are the global external commands
        commands_to_process = self.unprocessed_external_commands
        self.unprocessed_external_commands = []
        logger.debug("Commands: %s", commands_to_process)

        # Now get all external commands and put them into the good schedulers
        logger.debug("Commands to process: %d commands",
                     len(commands_to_process))
        for ext_cmd in commands_to_process:
            cmd = self.external_commands_manager.resolve_command(ext_cmd)
            logger.debug("Resolved command: %s, result: %s", ext_cmd.cmd_line,
                         cmd)
            if cmd and cmd['global']:
                # Send global command to all our schedulers
                for scheduler_link_uuid in self.schedulers:
                    self.schedulers[
                        scheduler_link_uuid].pushed_commands.append(ext_cmd)

        # Now for all active schedulers, send the commands
        count_pushed_commands = 0
        count_failed_commands = 0
        for scheduler_link_uuid in self.schedulers:
            link = self.schedulers[scheduler_link_uuid]

            if not link.active:
                logger.debug(
                    "The scheduler '%s' is not active, it is not possible to push "
                    "external commands to its connection!", link.name)
                continue

            # If there are some commands for this scheduler...
            commands = [ext_cmd.cmd_line for ext_cmd in link.pushed_commands]
            if not commands:
                logger.debug("The scheduler '%s' has no commands.", link.name)
                continue

            logger.debug("Sending %d commands to scheduler %s", len(commands),
                         link.name)
            sent = []
            try:
                sent = link.push_external_commands(commands)
            except LinkError:
                logger.warning(
                    "Scheduler connection failed, I could not push external commands!"
                )

            # Whether we sent the commands or not, clean the scheduler list
            link.pushed_commands = []

            # If we didn't sent them, add the commands to the arbiter list
            if sent:
                statsmgr.gauge('external-commands.pushed.%s' % link.name,
                               len(commands))
                count_pushed_commands = count_pushed_commands + len(commands)
            else:
                count_failed_commands = count_failed_commands + len(commands)
                statsmgr.gauge('external-commands.failed.%s' % link.name,
                               len(commands))
                # Kepp the not sent commands... for a next try
                self.external_commands.extend(commands)

        statsmgr.gauge('external-commands.pushed.all', count_pushed_commands)
        statsmgr.gauge('external-commands.failed.all', count_failed_commands)

    def do_loop_turn(self):
        """Receiver daemon main loop

        :return: None
        """

        # Begin to clean modules
        self.check_and_del_zombie_modules()

        # Maybe the arbiter pushed a new configuration...
        if self.watch_for_new_conf(timeout=0.05):
            logger.info("I got a new configuration...")
            # Manage the new configuration
            self.setup_new_conf()

        # Maybe external modules raised 'objects'
        # we should get them
        _t0 = time.time()
        self.get_objects_from_from_queues()
        statsmgr.timer('core.get-objects-from-queues', time.time() - _t0)

        # Get external commands from the arbiters...
        _t0 = time.time()
        self.get_external_commands_from_arbiters()
        statsmgr.timer('external-commands.got.time', time.time() - _t0)
        statsmgr.gauge('external-commands.got.count',
                       len(self.unprocessed_external_commands))

        _t0 = time.time()
        self.push_external_commands_to_schedulers()
        statsmgr.timer('external-commands.pushed.time', time.time() - _t0)

        # Say to modules it's a new tick :)
        _t0 = time.time()
        self.hook_point('tick')
        statsmgr.timer('hook.tick', time.time() - _t0)

    def get_daemon_stats(self, details=False):
        """Increase the stats provided by the Daemon base class

        :return: stats dictionary
        :rtype: dict
        """
        # Call the base Daemon one
        res = super(Receiver, self).get_daemon_stats(details=details)

        res.update({'name': self.name, 'type': self.type})

        counters = res['counters']
        counters['external-commands'] = len(self.external_commands)
        counters['external-commands-unprocessed'] = len(
            self.unprocessed_external_commands)

        return res

    def main(self):
        """Main receiver function
        Init daemon and loop forever

        :return: None
        """
        try:
            # Start the daemon mode
            if not self.do_daemon_init_and_start():
                self.exit_on_error(message="Daemon initialization error",
                                   exit_code=3)

            #  We wait for initial conf
            self.wait_for_initial_conf()
            if self.new_conf:
                # Setup the received configuration
                self.setup_new_conf()

                # Now the main loop
                self.do_main_loop()
                logger.info("Exited from the main loop.")

            self.request_stop()
        except Exception:  # pragma: no cover, this should never happen indeed ;)
            self.exit_on_exception(traceback.format_exc())
            raise
コード例 #28
0
class NotificationWay(Item):
    """NotificationWay class is used to implement way of sending notifications (command, periods..)

    """
    my_type = 'notificationway'

    properties = Item.properties.copy()
    properties.update({
        'notificationway_name':
            StringProp(fill_brok=['full_status']),
        'host_notifications_enabled':
            BoolProp(default=True, fill_brok=['full_status']),
        'service_notifications_enabled':
            BoolProp(default=True, fill_brok=['full_status']),
        'host_notification_period':
            StringProp(fill_brok=['full_status']),
        'service_notification_period':
            StringProp(fill_brok=['full_status']),
        'host_notification_options':
            ListProp(default=[''], fill_brok=['full_status'], split_on_comma=True),
        'service_notification_options':
            ListProp(default=[''], fill_brok=['full_status'], split_on_comma=True),
        'host_notification_commands':
            ListProp(default=[], fill_brok=['full_status']),
        'service_notification_commands':
            ListProp(default=[], fill_brok=['full_status']),
        'min_business_impact':
            IntegerProp(default=0, fill_brok=['full_status']),
    })

    running_properties = Item.running_properties.copy()

    # This tab is used to transform old parameters name into new ones
    # so from Nagios2 format, to Nagios3 ones.
    # Or FusionSupervision Engine deprecated names like criticity
    old_properties = {
        'min_criticity': 'min_business_impact',
    }

    macros = {}

    special_properties = ('service_notification_commands', 'host_notification_commands',
                          'service_notification_period', 'host_notification_period')

    def __init__(self, params=None, parsing=True):
        if params is None:
            params = {}

        # At deserialization, thoses are dict
        # TODO: Separate parsing instance from recreated ones
        for prop in ['service_notification_commands', 'host_notification_commands']:
            if prop in params and isinstance(params[prop], list) and params[prop] \
                    and isinstance(params[prop][0], dict):
                new_list = [CommandCall(elem, parsing=parsing) for elem in params[prop]]
                # We recreate the object
                setattr(self, prop, new_list)
                # And remove prop, to prevent from being overridden
                del params[prop]
        super(NotificationWay, self).__init__(params, parsing=parsing)

    def serialize(self):
        res = super(NotificationWay, self).serialize()

        for prop in ['service_notification_commands', 'host_notification_commands']:
            if getattr(self, prop) is None:
                res[prop] = None
            else:
                res[prop] = [elem.serialize() for elem in getattr(self, prop)]

        return res

    def get_name(self):
        """Accessor to notificationway_name attribute

        :return: notificationway name
        :rtype: str
        """
        return self.notificationway_name

    def want_service_notification(self, timeperiods, timestamp, state, n_type,
                                  business_impact, cmd=None):
        # pylint: disable=too-many-return-statements
        """Check if notification options match the state of the service
        Notification is NOT wanted in ONE of the following case::

        * service notifications are disabled
        * cmd is not in service_notification_commands
        * business_impact < self.min_business_impact
        * service_notification_period is not valid
        * state does not match service_notification_options for problem, recovery and flapping
        * state does not match host_notification_options for downtime

        :param timestamp: time we want to notify the contact (usually now)
        :type timestamp: int
        :param state: host or service state ("WARNING", "CRITICAL" ..)
        :type state: str
        :param n_type: type of notification ("PROBLEM", "RECOVERY" ..)
        :type n_type: str
        :param business_impact: impact of this service
        :type business_impact: int
        :param cmd: command launched to notify the contact
        :type cmd: str
        :return: True if no condition is matched, otherwise False
        :rtype: bool
        TODO: Simplify function
        """
        if not self.service_notifications_enabled:
            return False

        # Maybe the command we ask for are not for us, but for another notification ways
        # on the same contact. If so, bail out
        if cmd and cmd not in self.service_notification_commands:
            return False

        # If the business_impact is not high enough, we bail out
        if business_impact < self.min_business_impact:
            return False

        notif_period = timeperiods[self.service_notification_period]
        in_notification_period = notif_period.is_time_valid(timestamp)
        if 'n' in self.service_notification_options:
            return False

        if in_notification_period:
            short_states = {
                u'WARNING': 'w', u'UNKNOWN': 'u', u'CRITICAL': 'c',
                u'RECOVERY': 'r', u'FLAPPING': 'f', u'DOWNTIME': 's'
            }
            if n_type == u'PROBLEM' and state in short_states:
                return short_states[state] in self.service_notification_options
            if n_type == u'RECOVERY' and n_type in short_states:
                return short_states[n_type] in self.service_notification_options
            if n_type == u'ACKNOWLEDGEMENT':
                return in_notification_period
            if n_type in (u'FLAPPINGSTART', u'FLAPPINGSTOP', u'FLAPPINGDISABLED'):
                return 'f' in self.service_notification_options
            if n_type in (u'DOWNTIMESTART', u'DOWNTIMEEND', u'DOWNTIMECANCELLED'):
                # No notification when a downtime was cancelled. Is that true??
                # According to the documentation we need to look at _host_ options
                return 's' in self.host_notification_options

        return False

    def want_host_notification(self, timperiods, timestamp,
                               state, n_type, business_impact, cmd=None):
        # pylint: disable=too-many-return-statements
        """Check if notification options match the state of the host
        Notification is NOT wanted in ONE of the following case::

        * host notifications are disabled
        * cmd is not in host_notification_commands
        * business_impact < self.min_business_impact
        * host_notification_period is not valid
        * state does not match host_notification_options for problem, recovery, flapping and dt


        :param timestamp: time we want to notify the contact (usually now)
        :type timestamp: int
        :param state: host or service state ("WARNING", "CRITICAL" ..)
        :type state: str
        :param n_type: type of notification ("PROBLEM", "RECOVERY" ..)
        :type n_type: str
        :param business_impact: impact of this service
        :type business_impact: int
        :param cmd: command launched to notify the contact
        :type cmd: str
        :return: True if no condition is matched, otherwise False
        :rtype: bool
        TODO: Simplify function
        """
        if not self.host_notifications_enabled:
            return False

        # If the business_impact is not high enough, we bail out
        if business_impact < self.min_business_impact:
            return False

        # Maybe the command we ask for are not for us, but for another notification ways
        # on the same contact. If so, bail out
        if cmd and cmd not in self.host_notification_commands:
            return False

        notif_period = timperiods[self.host_notification_period]
        in_notification_period = notif_period.is_time_valid(timestamp)
        if 'n' in self.host_notification_options:
            return False

        if in_notification_period:
            short_states = {
                u'DOWN': 'd', u'UNREACHABLE': 'u', u'RECOVERY': 'r',
                u'FLAPPING': 'f', u'DOWNTIME': 's'
            }
            if n_type == u'PROBLEM' and state in short_states:
                return short_states[state] in self.host_notification_options
            if n_type == u'RECOVERY' and n_type in short_states:
                return short_states[n_type] in self.host_notification_options
            if n_type == u'ACKNOWLEDGEMENT':
                return in_notification_period
            if n_type in (u'FLAPPINGSTART', u'FLAPPINGSTOP', u'FLAPPINGDISABLED'):
                return 'f' in self.host_notification_options
            if n_type in (u'DOWNTIMESTART', u'DOWNTIMEEND', u'DOWNTIMECANCELLED'):
                return 's' in self.host_notification_options

        return False

    def get_notification_commands(self, o_type):
        """Get notification commands for object type

        :param o_type: object type (host or service)
        :type o_type: str
        :return: command list
        :rtype: list[fusionsupervision.objects.command.Command]
        """
        # service_notification_commands for service
        notif_commands_prop = o_type + '_notification_commands'
        notif_commands = getattr(self, notif_commands_prop)
        return notif_commands

    def is_correct(self):
        # pylint: disable=too-many-branches
        """Check if this object configuration is correct ::

        * Check our own specific properties
        * Call our parent class is_correct checker

        :return: True if the configuration is correct, otherwise False
        :rtype: bool
        """
        state = True

        # Do not execute checks if notifications are disabled
        if (hasattr(self, 'service_notification_options') and
                self.service_notification_options == ['n']):
            if (hasattr(self, 'host_notification_options') and
                    self.host_notification_options == ['n']):
                return True

        # Internal checks before executing inherited function...

        # Service part
        if not hasattr(self, 'service_notification_commands'):
            msg = "[notificationway::%s] do not have any service_notification_commands defined" % (
                self.get_name()
            )
            self.add_error(msg)
            state = False
        else:
            for cmd in self.service_notification_commands:
                if cmd is None:
                    msg = "[notificationway::%s] a service_notification_command is missing" % (
                        self.get_name()
                    )
                    self.add_error(msg)
                    state = False
                elif not cmd.is_valid():
                    msg = "[notificationway::%s] a service_notification_command is invalid" % (
                        self.get_name()
                    )
                    self.add_error(msg)
                    state = False

        if getattr(self, 'service_notification_period', None) is None:
            msg = "[notificationway::%s] the service_notification_period is invalid" % (
                self.get_name()
            )
            self.add_error(msg)
            state = False

        # Now host part
        if not hasattr(self, 'host_notification_commands'):
            msg = "[notificationway::%s] do not have any host_notification_commands defined" % (
                self.get_name()
            )
            self.add_error(msg)
            state = False
        else:
            for cmd in self.host_notification_commands:
                if cmd is None:
                    msg = "[notificationway::%s] a host_notification_command is missing" % (
                        self.get_name()
                    )
                    self.add_error(msg)
                    state = False
                elif not cmd.is_valid():
                    msg = "[notificationway::%s] a host_notification_command is invalid (%s)" % (
                        cmd.get_name(), str(cmd.__dict__)
                    )
                    self.add_error(msg)
                    state = False

        if getattr(self, 'host_notification_period', None) is None:
            msg = "[notificationway::%s] the host_notification_period is invalid" % (
                self.get_name()
            )
            self.add_error(msg)
            state = False

        return super(NotificationWay, self).is_correct() and state
コード例 #29
0
class Downtime(FusionsupervisionObject):
    """ Schedules downtime for a specified service. If the "fixed" argument is set
    to one (1), downtime will start and end at the times specified by the
    "start" and "end" arguments.
    Otherwise, downtime will begin between the "start" and "end" times and last
    for "duration" seconds. The "start" and "end" arguments are specified
    in time_t format (seconds since the UNIX epoch). The specified service
    downtime can be triggered by another downtime entry if the "trigger_id"
    is set to the ID of another scheduled downtime entry.
    Set the "trigger_id" argument to zero (0) if the downtime for the
    specified service should not be triggered by another downtime entry.

    """

    my_type = 'downtime'
    properties = {
        'activate_me':
        StringProp(default=u''),
        'entry_time':
        IntegerProp(default=0, fill_brok=['full_status']),
        'fixed':
        BoolProp(default=True, fill_brok=['full_status']),
        'start_time':
        IntegerProp(default=0, fill_brok=['full_status']),
        'duration':
        IntegerProp(default=0, fill_brok=['full_status']),
        'trigger_id':
        StringProp(default=u''),
        'end_time':
        IntegerProp(default=0, fill_brok=['full_status']),
        'real_end_time':
        IntegerProp(default=0),
        'author':
        StringProp(default=u'FusionSupervision Engine',
                   fill_brok=['full_status']),
        'comment':
        StringProp(default=u''),
        'is_in_effect':
        BoolProp(default=False),
        'has_been_triggered':
        BoolProp(default=False),
        'can_be_deleted':
        BoolProp(default=False),
        'ref':
        StringProp(default=u'unset'),
        'ref_type':
        StringProp(default=u'unset'),
        'comment_id':
        StringProp(default=u''),
    }

    def __init__(self, params, parsing=False):
        creating = 'uuid' not in params

        super(Downtime, self).__init__(params, parsing=parsing)

        self.fill_default()

        if creating:
            self.activate_me = []  # The other downtimes i need to activate
            self.entry_time = int(time.time())
            if self.trigger_id not in [
                    '', '0'
            ]:  # triggered plus fixed makes no sense
                self.fixed = False
            if self.fixed:
                self.duration = self.end_time - self.start_time
            # This is important for flexible downtimes. Here start_time and
            # end_time mean: in this time interval it is possible to trigger
            # the beginning of the downtime which lasts for duration.
            # Later, when a non-ok event happens, real_end_time will be
            # recalculated from now+duration
            # end_time will be displayed in the web interface, but real_end_time
            # is used internally
            self.real_end_time = self.end_time
            self.is_in_effect = False

            self.has_been_triggered = False  # another downtime has triggered me
            self.can_be_deleted = False

    def __str__(self):  # pragma: no cover
        if self.is_in_effect is True:
            active = "active"
        else:
            active = "inactive"
        if self.fixed is True:
            d_type = "fixed"
        else:
            d_type = "flexible"
        return "%s %s Downtime id=%s %s - %s" % (active, d_type, self.uuid,
                                                 time.ctime(self.start_time),
                                                 time.ctime(self.end_time))

    def trigger_me(self, other_downtime):
        """Wrapper to activate_me.append function
        Used to add another downtime to activate

        :param other_downtime: other downtime to activate/cancel
        :type other_downtime:
        :return: None
        """
        self.activate_me.append(other_downtime)

    def in_scheduled_downtime(self):
        """Getter for is_in_effect attribute

        :return: True if downtime is in effect, False otherwise
        :rtype: bool
        """
        return self.is_in_effect

    def enter(self, timeperiods, hosts, services):
        """Set ref in scheduled downtime and raise downtime log entry (start)

        :param hosts: hosts objects to get item ref
        :type hosts: fusionsupervision.objects.host.Hosts
        :param services: services objects to get item ref
        :type services: fusionsupervision.objects.service.Services
        :return: broks
        :rtype: list of broks
        """
        if self.ref in hosts:
            item = hosts[self.ref]
        else:
            item = services[self.ref]
        broks = []
        self.is_in_effect = True
        if self.fixed is False:
            now = time.time()
            self.real_end_time = now + self.duration
        item.scheduled_downtime_depth += 1
        item.in_scheduled_downtime = True
        if item.scheduled_downtime_depth == 1:
            item.raise_enter_downtime_log_entry()
            notification_period = None
            if getattr(item, 'notification_period', None) is not None:
                notification_period = timeperiods[item.notification_period]
            # Notification author data
            # todo: note that alias and name are not implemented yet
            author_data = {
                'author': self.author,
                'author_name': u'Not available',
                'author_alias': u'Not available',
                'author_comment': self.comment
            }
            item.create_notifications('DOWNTIMESTART',
                                      notification_period,
                                      hosts,
                                      services,
                                      author_data=author_data)
            if self.ref in hosts:
                broks.append(self.get_raise_brok(item.get_name()))

                # For an host, acknowledge the host problem (and its services problems)
                # Acknowledge the host with a sticky ack and notifications
                # The acknowledge will expire at the same time as the downtime end
                item.acknowledge_problem(
                    notification_period, hosts, services, 2, 1,
                    "FusionSupervision Engine",
                    "Acknowledged because of an host downtime")
            else:
                broks.append(
                    self.get_raise_brok(item.host_name, item.get_name()))
        for downtime_id in self.activate_me:
            for host in hosts:
                if downtime_id in host.downtimes:
                    downtime = host.downtimes[downtime_id]
                    broks.extend(downtime.enter(timeperiods, hosts, services))
            for service in services:
                if downtime_id in service.downtimes:
                    downtime = service.downtimes[downtime_id]
                    broks.extend(downtime.enter(timeperiods, hosts, services))
        return broks

    def exit(self, timeperiods, hosts, services):
        """Remove ref in scheduled downtime and raise downtime log entry (exit)

        :param hosts: hosts objects to get item ref
        :type hosts: fusionsupervision.objects.host.Hosts
        :param services: services objects to get item ref
        :type services: fusionsupervision.objects.service.Services
        :return: [], always | None
        :rtype: list
        """
        if self.ref in hosts:
            item = hosts[self.ref]
        else:
            item = services[self.ref]

        broks = []
        # If not is_in_effect means that ot was probably a flexible downtime which was
        # not triggered. In this case, nothing special to do...
        if self.is_in_effect is True:
            # This was a fixed or a flexible+triggered downtime
            self.is_in_effect = False
            item.scheduled_downtime_depth -= 1
            if item.scheduled_downtime_depth == 0:
                item.raise_exit_downtime_log_entry()
                notification_period = timeperiods[item.notification_period]
                # Notification author data
                # todo: note that alias and name are not implemented yet
                author_data = {
                    'author': self.author,
                    'author_name': u'Not available',
                    'author_alias': u'Not available',
                    'author_comment': self.comment
                }
                item.create_notifications(u'DOWNTIMEEND',
                                          notification_period,
                                          hosts,
                                          services,
                                          author_data=author_data)
                item.in_scheduled_downtime = False
                if self.ref in hosts:
                    broks.append(self.get_expire_brok(item.get_name()))
                else:
                    broks.append(
                        self.get_expire_brok(item.host_name, item.get_name()))

        item.del_comment(self.comment_id)
        self.can_be_deleted = True

        # when a downtime ends and the concerned item was a problem
        # a notification should be sent with the next critical check

        # So we should set a flag here which informs the consume_result function
        # to send a notification
        item.in_scheduled_downtime_during_last_check = True
        return broks

    def cancel(self, timeperiods, hosts, services):
        """Remove ref in scheduled downtime and raise downtime log entry (cancel)

        :param hosts: hosts objects to get item ref
        :type hosts: fusionsupervision.objects.host.Hosts
        :param services: services objects to get item ref
        :type services: fusionsupervision.objects.service.Services
        :return: [], always
        :rtype: list
        """
        if self.ref in hosts:
            item = hosts[self.ref]
        else:
            item = services[self.ref]
        broks = []
        self.is_in_effect = False
        item.scheduled_downtime_depth -= 1
        if item.scheduled_downtime_depth == 0:
            item.raise_cancel_downtime_log_entry()
            item.in_scheduled_downtime = False
            if self.ref in hosts:
                broks.append(self.get_expire_brok(item.get_name()))
            else:
                broks.append(
                    self.get_expire_brok(item.host_name, item.get_name()))
        self.del_automatic_comment(item)
        self.can_be_deleted = True
        item.in_scheduled_downtime_during_last_check = True
        # Nagios does not notify on canceled downtimes
        # res.extend(self.ref.create_notifications('DOWNTIMECANCELLED'))
        # Also cancel other downtimes triggered by me
        for downtime in self.activate_me:
            broks.extend(downtime.cancel(timeperiods, hosts, services))
        return broks

    def add_automatic_comment(self, ref):
        """Add comment on ref for downtime

        :param ref: the host/service we want to link a comment to
        :type ref: fusionsupervision.objects.schedulingitem.SchedulingItem

        :return: None
        """
        if self.fixed is True:
            text = (
                DOWNTIME_FIXED_MESSAGE %
                (ref.my_type,
                 time.strftime("%Y-%m-%d %H:%M:%S",
                               time.localtime(self.start_time)),
                 time.strftime("%Y-%m-%d %H:%M:%S",
                               time.localtime(self.end_time)), ref.my_type))
        else:
            hours, remainder = divmod(self.duration, 3600)
            minutes, _ = divmod(remainder, 60)
            text = (DOWNTIME_FLEXIBLE_MESSAGE %
                    (ref.my_type,
                     time.strftime("%Y-%m-%d %H:%M:%S",
                                   time.localtime(self.start_time)),
                     time.strftime(
                         "%Y-%m-%d %H:%M:%S", time.localtime(
                             self.end_time)), hours, minutes, ref.my_type))

        data = {
            'comment': text,
            'comment_type': 1 if ref.my_type == 'host' else 2,
            'entry_type': 2,
            'source': 0,
            'expires': False,
            'ref': ref.uuid
        }
        comment = Comment(data)
        self.comment_id = comment.uuid
        ref.comments[comment.uuid] = comment
        return comment

    def del_automatic_comment(self, item):
        """Remove automatic comment on ref previously created

        :param item: item service or host
        :type item: object
        :return: None
        """
        item.del_comment(self.comment_id)
        self.comment_id = ''

    def fill_data_brok_from(self, data, brok_type):
        """Fill data with info of item by looking at brok_type
        in props of properties or running_properties

        :param data: data to fill
        :type data:
        :param brok_type: type of brok
        :type brok_type: str
        :return: None
        TODO: Duplicate from Notification.fill_data_brok_from
        """
        cls = self.__class__
        # Now config properties
        for prop, entry in list(cls.properties.items()):
            if hasattr(prop, 'fill_brok'):
                if brok_type in entry['fill_brok']:
                    data[prop] = getattr(self, prop)

    def get_raise_brok(self, host_name, service_name=''):
        """Get a start downtime brok

        :param host_name: host concerned by the downtime
        :type host_name
        :param service_name: service concerned by the downtime
        :type service_name
        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        data = self.serialize()
        data['host'] = host_name
        if service_name != '':
            data['service'] = service_name

        return Brok({'type': 'downtime_raise', 'data': data})

    def get_expire_brok(self, host_name, service_name=''):
        """Get an expire downtime brok

        :param host_name: host concerned by the downtime
        :type host_name
        :param service_name: service concerned by the downtime
        :type service_name
        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        data = self.serialize()
        data['host'] = host_name
        if service_name != '':
            data['service'] = service_name

        return Brok({'type': 'downtime_expire', 'data': data})
コード例 #30
0
class Comment(FusionsupervisionObject):
    """Comment class implements comments for monitoring purpose.
    It contains data like author, type etc..
    """

    my_type = 'comment'
    properties = {
        'entry_time': IntegerProp(default=0),
        'entry_type': IntegerProp(),
        'author': StringProp(default=u'FusionSupervision Engine'),
        'comment': StringProp(default=u''),
        'comment_type': IntegerProp(),
        'source': IntegerProp(default=0),
        'expires': BoolProp(default=False),
        'ref': StringProp(default=u'unset'),
        'ref_type': StringProp(default=u'unset'),
    }

    def __init__(self, params, parsing=False):
        """Adds a comment to a particular service.

        :param ref: reference object (host / service)
        :type ref: fusionsupervision.object.schedulingitem.SchedulingItem
        :param author: Author of this comment
        :type author: str
        :param comment: text comment itself
        :type comment: str
        :param comment_type: comment type ::

                            * 1 <=> HOST_COMMENT
                            * 2 <=> SERVICE_COMMENT

        :type comment_type: int
        :param entry_type: type of entry linked to this comment ::

                          * 1 <=> USER_COMMENT
                          * 2 <=>DOWNTIME_COMMENT
                          * 3 <=>FLAPPING_COMMENT
                          * 4 <=>ACKNOWLEDGEMENT_COMMENT

        :type entry_type: int
        :param source: source of this comment ::

                      * 0 <=> COMMENTSOURCE_INTERNAL
                      * 1 <=> COMMENTSOURCE_EXTERNAL

        :type source: int
        :param expires: comment expires or not
        :type expires: bool
        :return: None
        """
        super(Comment, self).__init__(params, parsing)

        if not hasattr(self, 'entry_time') or not self.entry_time:
            self.entry_time = int(time.time())

        self.fill_default()

    def __str__(self):  # pragma: no cover
        return "Comment id=%s %s" % (self.uuid, self.comment)

    def get_comment_brok(self, host_name, service_name=''):
        """Get a comment brok

        :param host_name:
        :param service_name:
        :return: brok with wanted data
        :rtype: fusionsupervision.brok.Brok
        """
        data = self.serialize()
        data['host'] = host_name
        if service_name:
            data['service'] = service_name

        return Brok({'type': 'comment', 'data': data})