def _check_disabled(self): """Check if health check is disabled. It logs a message if health check is disabled and it also adds an item to the action queue based on 'on_disabled' setting. Returns: True if check is disabled otherwise False. """ if (self.config['check_disabled'] and self.config['on_disabled'] == 'withdraw'): self.log.info("Check is disabled and ip_prefix will be withdrawn") del_operation = DeleteOperation(name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version) self.log.info("adding %s in the queue", self.ip_with_prefixlen) self.action.put(del_operation) self.log.info("Check is now permanently disabled") return True elif (self.config['check_disabled'] and self.config['on_disabled'] == 'advertise'): self.log.info("check is disabled, ip_prefix wont be withdrawn") add_operation = AddOperation(name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version) self.log.info("adding %s in the queue", self.ip_with_prefixlen) self.action.put(add_operation) self.log.info('check is now permanently disabled') return True return False
def _check_disabled(self): """Checks if health check is disabled. It logs a message if health check is disabled and it also adds an item to the action queue based on 'on_disabled' setting. Returns: True if check is disabled otherwise False. """ if (self.config['check_disabled'] and self.config['on_disabled'] == 'withdraw'): self.log.info("Check is disabled and ip_prefix will be withdrawn", priority=20, **self.extra) del_operation = DeleteOperation(name=self.name, ip_prefix=self.ip_with_prefixlen, log=self.log, ip_version=self.ip_version, **self.extra) msg = "adding {i} in the queue".format(i=self.ip_with_prefixlen) self.log.info(msg, **self.extra) self.action.put(del_operation) self.log.info("Check is now permanently disabled", priority=20, status='disabled', **self.extra) return True elif (self.config['check_disabled'] and self.config['on_disabled'] == 'advertise'): self.log.info("check is disabled, ip_prefix wont be withdrawn", priority=80, **self.extra) add_operation = AddOperation(name=self.name, ip_prefix=self.ip_with_prefixlen, log=self.log, ip_version=self.ip_version, **self.extra) msg = "adding {i} in the queue".format(i=self.ip_with_prefixlen) self.log.info(msg, **self.extra) self.action.put(add_operation) self.log.info('check is now permanently disabled', priority=20, status='disabled', **self.extra) return True return False
def __init__(self, service, config, action, splay_startup): """Set the name of thread to be the name of the service.""" super(ServiceCheck, self).__init__() self.name = service # Used by Thread() self.daemon = True # Used by Thread() self.config = config self.action = action self.splay_startup = splay_startup # sanity check has already been done, so the following *should* not # raise an exception _ip_prefix = ipaddress.ip_network(self.config['ip_prefix']) # NOTE: When subnetmask isn't provided ipaddress module creates an # object with a mask of /32 for IPv4 addresses and mask of /128 for # IPv6 addresses. As a result the prefix length is either 32 or 128 # and we can get the IP address by looking at the network_address # attribute. self.ip_address = str(_ip_prefix.network_address) self.prefix_length = _ip_prefix.prefixlen self.ip_with_prefixlen = _ip_prefix.with_prefixlen self.ip_version = _ip_prefix.version self.ip_check_disabled = self.config['ip_check_disabled'] self.log = logging.getLogger(PROGRAM_NAME) self.extra = { 'ip_address': self.ip_address, 'prefix_length': self.prefix_length, 'ip_check_disabled': self.ip_check_disabled, 'status': 'unknown', } self.add_operation = AddOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version, bird_reconfigure_timeout=( config['custom_bird_reconfigure_cmd_timeout']), bird_reconfigure_cmd=config.get('custom_bird_reconfigure_cmd', None)) self.del_operation = DeleteOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version, bird_reconfigure_timeout=( config['custom_bird_reconfigure_cmd_timeout']), bird_reconfigure_cmd=config.get('custom_bird_reconfigure_cmd', None)) self.log.info("loading check for %s", self.name, extra=self.extra)
def run(self): """Discovers the health of a service. Runs until it is being killed from main program and is responsible to put an item into the queue based on the status of the health check. The status of service is consider UP after a number of consecutive successful health checks, in that case it asks main program to add the IP prefix associated with service to BIRD configuration, otherwise ask for a removal. Rise and fail options prevent unnecessary configuration changes when check is flapping. """ up_cnt = 0 down_cnt = 0 # The current established state of the service check, it can be # either UP or DOWN but only after a number of consecutive successful # or unsuccessful health checks. check_state = 'Unknown' for key, value in self.config.items(): self.log.debug("%s=%s:%s", key, value, type(value)) # Service check will abort if it is disabled. if self._check_disabled(): return interval = self.config['check_interval'] start_offset = time.time() % interval # Go in a loop until we are told to stop while True: timestamp = time.time() if not self._ip_assigned(): up_cnt = 0 self.extra['status'] = 'down' self.log.warning( "status DOWN because %s isn't assigned to " "loopback interface.", self.ip_with_prefixlen, extra=self.extra) if check_state != 'DOWN': check_state = 'DOWN' del_operation = DeleteOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version) self.log.info("adding %s in the queue", self.ip_with_prefixlen, extra=self.extra) self.action.put(del_operation) elif self._run_check(): if up_cnt == (self.config['check_rise'] - 1): self.extra['status'] = 'up' self.log.info("status UP", extra=self.extra) # Service exceeded all consecutive checks. Set its state # accordingly and put an item in queue. But do it only if # previous state was different, to prevent unnecessary bird # reloads when a service flaps between states. if check_state != 'UP': check_state = 'UP' operation = AddOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version) self.log.info("adding %s in the queue", self.ip_with_prefixlen, extra=self.extra) self.action.put(operation) elif up_cnt < self.config['check_rise']: up_cnt += 1 self.log.info("going up %s", up_cnt, extra=self.extra) else: self.log.error("up_cnt is higher %s, it's a BUG!", up_cnt, extra=self.extra) down_cnt = 0 else: if down_cnt == (self.config['check_fail'] - 1): self.extra['status'] = 'down' self.log.info("status DOWN", extra=self.extra) # Service exceeded all consecutive checks. # Set its state accordingly and put an item in queue. # But do it only if previous state was different, to # prevent unnecessary bird reloads when a service flaps # between states if check_state != 'DOWN': check_state = 'DOWN' del_operation = DeleteOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, ip_version=self.ip_version) self.log.info("adding %s in the queue", self.ip_with_prefixlen, extra=self.extra) self.action.put(del_operation) elif down_cnt < self.config['check_fail']: down_cnt += 1 self.log.info("going down %s", down_cnt, extra=self.extra) else: self.log.error("up_cnt is higher %s, it's a BUG!", up_cnt, extra=self.extra) up_cnt = 0 self.log.info("wall clock time %.3fms", (time.time() - timestamp) * 1000, extra=self.extra) # calculate sleep time sleep = start_offset - time.time() % interval if sleep < 0: sleep += interval self.log.debug("sleeping for %.3fsecs", sleep, extra=self.extra) time.sleep(sleep)
def run(self): """Discovers the health of a service. Runs until it is being killed from main program and is responsible to put an item into the queue based on the status of the health check. The status of service is consider UP after a number of consecutive successful health checks, in that case it asks main program to add the IP prefix associated with service to BIRD configuration, otherwise ask for a removal. Rise and fail options prevent unnecessary configuration changes when check is flapping. """ up_cnt = 0 down_cnt = 0 # The current established state of the service check, it can be # either UP or DOWN but only after a number of consecutive successful # or unsuccessful health checks. check_state = 'Unknown' for key, value in self.config.items(): self.log.debug("{}={}:{}".format(key, value, type(value)), json_blob=False) # Service check will abort if it is disabled. if self._check_disabled(): return interval = self.config['check_interval'] start_offset = time.time() % interval # Go in a loop until we are told to stop while True: timestamp = time.time() if not self._ip_assigned(): up_cnt = 0 msg = ("status DOWN because {i} isn't assigned to loopback " "interface.".format(i=self.ip_with_prefixlen)) self.log.warning(msg, priority=80, status='down', **self.extra) if check_state != 'DOWN': check_state = 'DOWN' del_operation = DeleteOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, log=self.log, ip_version=self.ip_version, **self.extra) msg = ("adding {i} in the queue".format( i=self.ip_with_prefixlen)) self.log.info(msg, **self.extra) self.action.put(del_operation) elif self._run_check(): if up_cnt == (self.config['check_rise'] - 1): self.log.info("status UP", status='up', **self.extra) # Service exceeded all consecutive checks. Set its state # accordingly and put an item in queue. But do it only if # previous state was different, to prevent unnecessary bird # reloads when a service flaps between states. if check_state != 'UP': check_state = 'UP' operation = AddOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, log=self.log, ip_version=self.ip_version, **self.extra) msg = ("adding {i} in the queue".format( i=self.ip_with_prefixlen)) self.log.info(msg, **self.extra) self.action.put(operation) elif up_cnt < self.config['check_rise']: up_cnt += 1 msg = "going up {n}".format(n=up_cnt) self.log.info(msg, **self.extra) else: msg = "up_cnt higher, it's a BUG! {n}".format(n=up_cnt) self.log.error(msg, priority=70, **self.extra) down_cnt = 0 else: if down_cnt == (self.config['check_fail'] - 1): self.log.info("status DOWN", priority=100, status='down', **self.extra) # Service exceeded all consecutive checks. # Set its state accordingly and put an item in queue. # But do it only if previous state was different, to # prevent unnecessary bird reloads when a service flaps # between states if check_state != 'DOWN': check_state = 'DOWN' del_operation = DeleteOperation( name=self.name, ip_prefix=self.ip_with_prefixlen, log=self.log, ip_version=self.ip_version, **self.extra) msg = ("adding {i} in the queue".format( i=self.ip_with_prefixlen)) self.log.info(msg, **self.extra) self.action.put(del_operation) elif down_cnt < self.config['check_fail']: down_cnt += 1 msg = "going down {n}".format(n=down_cnt) self.log.info(msg, priority=40, **self.extra) else: msg = "down_cnt higher, it's a BUG! {n}".format(n=down_cnt) self.log.error(msg, priority=70, **self.extra) up_cnt = 0 msg = ("wall clock time {t:.3f}ms".format( t=(time.time() - timestamp) * 1000)) self.log.info(msg, json_blob=False) # calculate sleep time sleep = start_offset - time.time() % interval if sleep < 0: sleep += interval self.log.debug("sleep for {t:.3f}secs".format(t=sleep), json_blob=False) time.sleep(sleep)