Example #1
0
    def _check_disabled(self):
        """Check if health check is disabled.

        It logs a message if health check is disabled and it also adds an item
        to the action queue based on 'on_disabled' setting.

        Returns:
            True if check is disabled otherwise False.

        """
        if (self.config['check_disabled']
                and self.config['on_disabled'] == 'withdraw'):
            self.log.info("Check is disabled and ip_prefix will be withdrawn")
            del_operation = DeleteOperation(name=self.name,
                                            ip_prefix=self.ip_with_prefixlen,
                                            ip_version=self.ip_version)
            self.log.info("adding %s in the queue", self.ip_with_prefixlen)
            self.action.put(del_operation)
            self.log.info("Check is now permanently disabled")
            return True
        elif (self.config['check_disabled']
              and self.config['on_disabled'] == 'advertise'):
            self.log.info("check is disabled, ip_prefix wont be withdrawn")
            add_operation = AddOperation(name=self.name,
                                         ip_prefix=self.ip_with_prefixlen,
                                         ip_version=self.ip_version)
            self.log.info("adding %s in the queue", self.ip_with_prefixlen)
            self.action.put(add_operation)
            self.log.info('check is now permanently disabled')
            return True

        return False
Example #2
0
    def _check_disabled(self):
        """Checks if health check is disabled.

        It logs a message if health check is disabled and it also adds an item
        to the action queue based on 'on_disabled' setting.

        Returns:
            True if check is disabled otherwise False.
        """
        if (self.config['check_disabled']
                and self.config['on_disabled'] == 'withdraw'):
            self.log.info("Check is disabled and ip_prefix will be withdrawn",
                          priority=20,
                          **self.extra)
            del_operation = DeleteOperation(name=self.name,
                                            ip_prefix=self.ip_with_prefixlen,
                                            log=self.log,
                                            ip_version=self.ip_version,
                                            **self.extra)
            msg = "adding {i} in the queue".format(i=self.ip_with_prefixlen)
            self.log.info(msg, **self.extra)
            self.action.put(del_operation)
            self.log.info("Check is now permanently disabled",
                          priority=20,
                          status='disabled',
                          **self.extra)
            return True
        elif (self.config['check_disabled']
              and self.config['on_disabled'] == 'advertise'):
            self.log.info("check is disabled, ip_prefix wont be withdrawn",
                          priority=80,
                          **self.extra)
            add_operation = AddOperation(name=self.name,
                                         ip_prefix=self.ip_with_prefixlen,
                                         log=self.log,
                                         ip_version=self.ip_version,
                                         **self.extra)
            msg = "adding {i} in the queue".format(i=self.ip_with_prefixlen)
            self.log.info(msg, **self.extra)
            self.action.put(add_operation)
            self.log.info('check is now permanently disabled',
                          priority=20,
                          status='disabled',
                          **self.extra)
            return True

        return False
 def __init__(self, service, config, action, splay_startup):
     """Set the name of thread to be the name of the service."""
     super(ServiceCheck, self).__init__()
     self.name = service  # Used by Thread()
     self.daemon = True  # Used by Thread()
     self.config = config
     self.action = action
     self.splay_startup = splay_startup
     # sanity check has already been done, so the following *should* not
     # raise an exception
     _ip_prefix = ipaddress.ip_network(self.config['ip_prefix'])
     # NOTE: When subnetmask isn't provided ipaddress module creates an
     # object with a mask of /32 for IPv4 addresses and mask of /128 for
     # IPv6 addresses. As a result the prefix length is either 32 or 128
     # and we can get the IP address by looking at the network_address
     # attribute.
     self.ip_address = str(_ip_prefix.network_address)
     self.prefix_length = _ip_prefix.prefixlen
     self.ip_with_prefixlen = _ip_prefix.with_prefixlen
     self.ip_version = _ip_prefix.version
     self.ip_check_disabled = self.config['ip_check_disabled']
     self.log = logging.getLogger(PROGRAM_NAME)
     self.extra = {
         'ip_address': self.ip_address,
         'prefix_length': self.prefix_length,
         'ip_check_disabled': self.ip_check_disabled,
         'status': 'unknown',
     }
     self.add_operation = AddOperation(
         name=self.name,
         ip_prefix=self.ip_with_prefixlen,
         ip_version=self.ip_version,
         bird_reconfigure_timeout=(
             config['custom_bird_reconfigure_cmd_timeout']),
         bird_reconfigure_cmd=config.get('custom_bird_reconfigure_cmd',
                                         None))
     self.del_operation = DeleteOperation(
         name=self.name,
         ip_prefix=self.ip_with_prefixlen,
         ip_version=self.ip_version,
         bird_reconfigure_timeout=(
             config['custom_bird_reconfigure_cmd_timeout']),
         bird_reconfigure_cmd=config.get('custom_bird_reconfigure_cmd',
                                         None))
     self.log.info("loading check for %s", self.name, extra=self.extra)
Example #4
0
    def run(self):
        """Discovers the health of a service.

        Runs until it is being killed from main program and is responsible to
        put an item into the queue based on the status of the health check.
        The status of service is consider UP after a number of consecutive
        successful health checks, in that case it asks main program to add the
        IP prefix associated with service to BIRD configuration, otherwise ask
        for a removal.
        Rise and fail options prevent unnecessary configuration changes when
        check is flapping.
        """
        up_cnt = 0
        down_cnt = 0
        # The current established state of the service check, it can be
        # either UP or DOWN but only after a number of consecutive successful
        # or unsuccessful health checks.
        check_state = 'Unknown'

        for key, value in self.config.items():
            self.log.debug("%s=%s:%s", key, value, type(value))

        # Service check will abort if it is disabled.
        if self._check_disabled():
            return

        interval = self.config['check_interval']
        start_offset = time.time() % interval
        # Go in a loop until we are told to stop
        while True:
            timestamp = time.time()
            if not self._ip_assigned():
                up_cnt = 0
                self.extra['status'] = 'down'
                self.log.warning(
                    "status DOWN because %s isn't assigned to "
                    "loopback interface.",
                    self.ip_with_prefixlen,
                    extra=self.extra)
                if check_state != 'DOWN':
                    check_state = 'DOWN'
                    del_operation = DeleteOperation(
                        name=self.name,
                        ip_prefix=self.ip_with_prefixlen,
                        ip_version=self.ip_version)
                    self.log.info("adding %s in the queue",
                                  self.ip_with_prefixlen,
                                  extra=self.extra)
                    self.action.put(del_operation)
            elif self._run_check():
                if up_cnt == (self.config['check_rise'] - 1):
                    self.extra['status'] = 'up'
                    self.log.info("status UP", extra=self.extra)
                    # Service exceeded all consecutive checks. Set its state
                    # accordingly and put an item in queue. But do it only if
                    # previous state was different, to prevent unnecessary bird
                    # reloads when a service flaps between states.
                    if check_state != 'UP':
                        check_state = 'UP'
                        operation = AddOperation(
                            name=self.name,
                            ip_prefix=self.ip_with_prefixlen,
                            ip_version=self.ip_version)
                        self.log.info("adding %s in the queue",
                                      self.ip_with_prefixlen,
                                      extra=self.extra)
                        self.action.put(operation)
                elif up_cnt < self.config['check_rise']:
                    up_cnt += 1
                    self.log.info("going up %s", up_cnt, extra=self.extra)
                else:
                    self.log.error("up_cnt is higher %s, it's a BUG!",
                                   up_cnt,
                                   extra=self.extra)
                down_cnt = 0
            else:
                if down_cnt == (self.config['check_fail'] - 1):
                    self.extra['status'] = 'down'
                    self.log.info("status DOWN", extra=self.extra)
                    # Service exceeded all consecutive checks.
                    # Set its state accordingly and put an item in queue.
                    # But do it only if previous state was different, to
                    # prevent unnecessary bird reloads when a service flaps
                    # between states
                    if check_state != 'DOWN':
                        check_state = 'DOWN'
                        del_operation = DeleteOperation(
                            name=self.name,
                            ip_prefix=self.ip_with_prefixlen,
                            ip_version=self.ip_version)
                        self.log.info("adding %s in the queue",
                                      self.ip_with_prefixlen,
                                      extra=self.extra)
                        self.action.put(del_operation)
                elif down_cnt < self.config['check_fail']:
                    down_cnt += 1
                    self.log.info("going down %s", down_cnt, extra=self.extra)
                else:
                    self.log.error("up_cnt is higher %s, it's a BUG!",
                                   up_cnt,
                                   extra=self.extra)
                up_cnt = 0

            self.log.info("wall clock time %.3fms",
                          (time.time() - timestamp) * 1000,
                          extra=self.extra)

            # calculate sleep time
            sleep = start_offset - time.time() % interval
            if sleep < 0:
                sleep += interval
            self.log.debug("sleeping for %.3fsecs", sleep, extra=self.extra)
            time.sleep(sleep)
Example #5
0
    def run(self):
        """Discovers the health of a service.

        Runs until it is being killed from main program and is responsible to
        put an item into the queue based on the status of the health check.
        The status of service is consider UP after a number of consecutive
        successful health checks, in that case it asks main program to add the
        IP prefix associated with service to BIRD configuration, otherwise ask
        for a removal.
        Rise and fail options prevent unnecessary configuration changes when
        check is flapping.
        """
        up_cnt = 0
        down_cnt = 0
        # The current established state of the service check, it can be
        # either UP or DOWN but only after a number of consecutive successful
        # or unsuccessful health checks.
        check_state = 'Unknown'

        for key, value in self.config.items():
            self.log.debug("{}={}:{}".format(key, value, type(value)),
                           json_blob=False)

        # Service check will abort if it is disabled.
        if self._check_disabled():
            return

        interval = self.config['check_interval']
        start_offset = time.time() % interval
        # Go in a loop until we are told to stop
        while True:
            timestamp = time.time()
            if not self._ip_assigned():
                up_cnt = 0
                msg = ("status DOWN because {i} isn't assigned to loopback "
                       "interface.".format(i=self.ip_with_prefixlen))
                self.log.warning(msg, priority=80, status='down', **self.extra)
                if check_state != 'DOWN':
                    check_state = 'DOWN'
                    del_operation = DeleteOperation(
                        name=self.name,
                        ip_prefix=self.ip_with_prefixlen,
                        log=self.log,
                        ip_version=self.ip_version,
                        **self.extra)
                    msg = ("adding {i} in the queue".format(
                        i=self.ip_with_prefixlen))
                    self.log.info(msg, **self.extra)
                    self.action.put(del_operation)
            elif self._run_check():
                if up_cnt == (self.config['check_rise'] - 1):
                    self.log.info("status UP", status='up', **self.extra)
                    # Service exceeded all consecutive checks. Set its state
                    # accordingly and put an item in queue. But do it only if
                    # previous state was different, to prevent unnecessary bird
                    # reloads when a service flaps between states.
                    if check_state != 'UP':
                        check_state = 'UP'
                        operation = AddOperation(
                            name=self.name,
                            ip_prefix=self.ip_with_prefixlen,
                            log=self.log,
                            ip_version=self.ip_version,
                            **self.extra)
                        msg = ("adding {i} in the queue".format(
                            i=self.ip_with_prefixlen))
                        self.log.info(msg, **self.extra)
                        self.action.put(operation)
                elif up_cnt < self.config['check_rise']:
                    up_cnt += 1
                    msg = "going up {n}".format(n=up_cnt)
                    self.log.info(msg, **self.extra)
                else:
                    msg = "up_cnt higher, it's a BUG! {n}".format(n=up_cnt)
                    self.log.error(msg, priority=70, **self.extra)
                down_cnt = 0
            else:
                if down_cnt == (self.config['check_fail'] - 1):
                    self.log.info("status DOWN",
                                  priority=100,
                                  status='down',
                                  **self.extra)
                    # Service exceeded all consecutive checks.
                    # Set its state accordingly and put an item in queue.
                    # But do it only if previous state was different, to
                    # prevent unnecessary bird reloads when a service flaps
                    # between states
                    if check_state != 'DOWN':
                        check_state = 'DOWN'
                        del_operation = DeleteOperation(
                            name=self.name,
                            ip_prefix=self.ip_with_prefixlen,
                            log=self.log,
                            ip_version=self.ip_version,
                            **self.extra)
                        msg = ("adding {i} in the queue".format(
                            i=self.ip_with_prefixlen))
                        self.log.info(msg, **self.extra)
                        self.action.put(del_operation)
                elif down_cnt < self.config['check_fail']:
                    down_cnt += 1
                    msg = "going down {n}".format(n=down_cnt)
                    self.log.info(msg, priority=40, **self.extra)
                else:
                    msg = "down_cnt higher, it's a BUG! {n}".format(n=down_cnt)
                    self.log.error(msg, priority=70, **self.extra)
                up_cnt = 0

            msg = ("wall clock time {t:.3f}ms".format(
                t=(time.time() - timestamp) * 1000))
            self.log.info(msg, json_blob=False)

            # calculate sleep time
            sleep = start_offset - time.time() % interval
            if sleep < 0:
                sleep += interval
            self.log.debug("sleep for {t:.3f}secs".format(t=sleep),
                           json_blob=False)
            time.sleep(sleep)