def set_as_down(self, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_as_down: is_down(currently)=%s, channels=%s', self, self.is_down, channels) # Alert (varies depending on whether was already down) if self.is_down and not self._initial_downtime_alert_sent: if self.is_validator: channels.alert_major(CannotAccessNodeAlert(self.name)) else: channels.alert_minor(CannotAccessNodeAlert(self.name)) self._downtime_alert_limiter.did_task() self._initial_downtime_alert_sent = True elif self.is_down and self._downtime_alert_limiter.can_do_task(): downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_major(StillCannotAccessNodeAlert( self.name, self._went_down_at, downtime)) else: channels.alert_minor(StillCannotAccessNodeAlert( self.name, self._went_down_at, downtime)) self._downtime_alert_limiter.did_task() elif not self.is_down: # Do not alert for now just in case this is a connection hiccup channels.alert_info(ExperiencingDelaysAlert(self.name)) self._went_down_at = datetime.now() self._initial_downtime_alert_sent = False
def set_no_of_peers(self, new_no_of_peers: int, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_no_of_peers: before=%s, new=%s, channels=%s', self, self.no_of_peers, new_no_of_peers, channels) # Variable alias for improved readability if self.is_validator: danger = self._validator_peer_danger_boundary safe = self._validator_peer_safe_boundary else: danger = self._full_node_peer_danger_boundary safe = None # Alert if number of peers has changed if self.no_of_peers not in [None, new_no_of_peers]: if self.is_validator: if new_no_of_peers <= self._validator_peer_safe_boundary: # beneath safe boundary if new_no_of_peers > self.no_of_peers: # increase channels.alert_info( PeersIncreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) elif new_no_of_peers > danger: # decrease outside danger range channels.alert_minor( PeersDecreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) else: # decrease inside danger range channels.alert_major( PeersDecreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) elif self._no_of_peers <= self._validator_peer_safe_boundary \ < new_no_of_peers: # increase outside safe range for the first time channels.alert_info( PeersIncreasedOutsideSafeRangeAlert(self.name, safe)) else: if new_no_of_peers > self.no_of_peers: # increase if new_no_of_peers <= danger: # increase inside danger range channels.alert_info( PeersIncreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) elif self.no_of_peers <= danger < new_no_of_peers: # increase outside danger range channels.alert_info( PeersIncreasedOutsideDangerRangeAlert( self.name, danger)) elif new_no_of_peers > danger: # decrease outside danger range pass else: # decrease inside danger range channels.alert_minor( PeersDecreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) # Update number of peers self._no_of_peers = new_no_of_peers
def set_as_down(self, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_as_down: is_down(currently)=%s, channels=%s', self, self.is_down, channels) # If node was not down before, do not alert for now, just in case it's # a connection hiccup but take note of the start of the downtime if not self.is_down: self._went_down_at = datetime.now() self._experiencing_delays_alert_sent = False self._initial_downtime_alert_sent = False self._downtime_initial_alert_delayer.did_task() # If node was down and we have not yet sent an alert about this, send # an informational 'experiencing delays' alert as a warning elif not self._experiencing_delays_alert_sent: channels.alert_info(ExperiencingDelaysAlert(self.name)) self._experiencing_delays_alert_sent = True # If we have not yet sent an initial downtime alert, and enough # time has passed for it, then send an initial alert elif not self._initial_downtime_alert_sent: if self._downtime_initial_alert_delayer.can_do_task(): downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_major( CannotAccessNodeAlert(self.name, self._went_down_at, downtime)) else: channels.alert_minor( CannotAccessNodeAlert(self.name, self._went_down_at, downtime)) self._downtime_reminder_limiter.did_task() self._initial_downtime_alert_sent = True # If we already sent an initial alert and enough time has passed # for a reminder alert, then send a reminder alert else: if self._downtime_reminder_limiter.can_do_task(): downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_major( StillCannotAccessNodeAlert(self.name, self._went_down_at, downtime)) else: channels.alert_minor( StillCannotAccessNodeAlert(self.name, self._went_down_at, downtime)) self._downtime_reminder_limiter.did_task()
def set_catching_up(self, now_catching_up: bool, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_catching_up: before=%s, new=%s, channels=%s', self, self.catching_up, now_catching_up, channels) # Alert if catching up has changed if not self.catching_up and now_catching_up: channels.alert_minor(IsCatchingUpAlert(self.name)) elif self.catching_up and not now_catching_up: channels.alert_info(IsNoLongerCatchingUpAlert(self.name)) # Update catching-up self._catching_up = now_catching_up
def add_missed_block(self, block_height: int, block_time: datetime, missing_validators: int, channels: ChannelSet, logger: logging.Logger) -> None: # NOTE: This function assumes that the node is a validator # Calculate the actual blocks missed as of when this function was called blocks_missed = self._consecutive_blocks_missed + 1 # Variable alias for improved readability danger = self._missed_blocks_danger_boundary logger.debug( '%s add_missed_block: before=%s, new=%s, missing_validators = %s, ' 'channels=%s', self, self.consecutive_blocks_missed_so_far, blocks_missed, missing_validators, channels) # Let timed tracker know that block missed self._timed_block_miss_tracker.action_happened(at_time=block_time) # Alert (varies depending on whether was already missing blocks) if not self.is_missing_blocks: pass # Do not alert on first missed block elif 2 <= blocks_missed < danger: channels.alert_info( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators) ) # 2+ blocks missed inside danger range elif blocks_missed == 5: channels.alert_minor( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators)) # reached danger range elif blocks_missed >= max(10, danger) and blocks_missed % 10 == 0: channels.alert_major( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators) ) # Every (10N)th block missed for N >= 1 inside danger range self._timed_block_miss_tracker.reset() if self._timed_block_miss_tracker.too_many_occurrences(block_time): blocks_in_interval = self._timed_block_miss_tracker.max_occurrences time_interval = self._timed_block_miss_tracker.time_interval_pretty channels.alert_major( TimedMissedBlocksAlert(self.name, blocks_in_interval, time_interval, block_height, missing_validators) ) # More blocks missed than is acceptable in the time interval self._timed_block_miss_tracker.reset() # Update consecutive blocks missed self._consecutive_blocks_missed = blocks_missed