def set_as_down(self, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_as_down: is_down(currently)=%s, channels=%s', self, self.is_down, channels) # Alert (varies depending on whether was already down) if self.is_down and not self._initial_downtime_alert_sent: if self.is_validator: channels.alert_major(CannotAccessNodeAlert(self.name)) else: channels.alert_minor(CannotAccessNodeAlert(self.name)) self._downtime_alert_limiter.did_task() self._initial_downtime_alert_sent = True elif self.is_down and self._downtime_alert_limiter.can_do_task(): downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_major(StillCannotAccessNodeAlert( self.name, self._went_down_at, downtime)) else: channels.alert_minor(StillCannotAccessNodeAlert( self.name, self._went_down_at, downtime)) self._downtime_alert_limiter.did_task() elif not self.is_down: # Do not alert for now just in case this is a connection hiccup channels.alert_info(ExperiencingDelaysAlert(self.name)) self._went_down_at = datetime.now() self._initial_downtime_alert_sent = False
def set_as_down(self, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_as_down: is_down(currently)=%s, channels=%s', self, self.is_down, channels) # If node was not down before, do not alert for now, just in case it's # a connection hiccup but take note of the start of the downtime if not self.is_down: self._went_down_at = datetime.now() self._experiencing_delays_alert_sent = False self._initial_downtime_alert_sent = False self._downtime_initial_alert_delayer.did_task() # If node was down and we have not yet sent an alert about this, send # an informational 'experiencing delays' alert as a warning elif not self._experiencing_delays_alert_sent: channels.alert_info(ExperiencingDelaysAlert(self.name)) self._experiencing_delays_alert_sent = True # If we have not yet sent an initial downtime alert, and enough # time has passed for it, then send an initial alert elif not self._initial_downtime_alert_sent: if self._downtime_initial_alert_delayer.can_do_task(): downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_major( CannotAccessNodeAlert(self.name, self._went_down_at, downtime)) else: channels.alert_minor( CannotAccessNodeAlert(self.name, self._went_down_at, downtime)) self._downtime_reminder_limiter.did_task() self._initial_downtime_alert_sent = True # If we already sent an initial alert and enough time has passed # for a reminder alert, then send a reminder alert else: if self._downtime_reminder_limiter.can_do_task(): downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_major( StillCannotAccessNodeAlert(self.name, self._went_down_at, downtime)) else: channels.alert_minor( StillCannotAccessNodeAlert(self.name, self._went_down_at, downtime)) self._downtime_reminder_limiter.did_task()
def set_voting_power(self, new_voting_power: int, channels: ChannelSet, logger: logging.Logger) -> None: # NOTE: This function assumes that the node is a validator logger.debug( '%s set_voting_power: before=%s, new=%s, channels=%s', self, self.voting_power, new_voting_power, channels) # Alert if voting power has changed if self.voting_power not in [None, new_voting_power]: if self.is_validator and new_voting_power == 0: # N to 0 channels.alert_major(VotingPowerDecreasedAlert( self.name, self.voting_power, new_voting_power)) elif self.is_validator and self.voting_power == 0: # 0 to N channels.alert_info(VotingPowerIncreasedAlert( self.name, self.voting_power, new_voting_power)) else: # Any change diff = new_voting_power - self.voting_power if abs(diff) > self._change_in_voting_power_threshold: if diff > 0: channels.alert_info(VotingPowerIncreasedByAlert( self.name, self.voting_power, new_voting_power)) else: channels.alert_info(VotingPowerDecreasedByAlert( self.name, self.voting_power, new_voting_power)) # Update voting power self._voting_power = new_voting_power
def clear_missed_blocks(self, channels: ChannelSet, logger: logging.Logger) -> None: # NOTE: This function assumes that the node is a validator logger.debug( '%s clear_missed_blocks: channels=%s', self, channels) # Alert if validator was missing blocks (only if more than 1 block) if self.is_missing_blocks and self._consecutive_blocks_missed > 1: channels.alert_info(NoLongerMissingBlocksAlert( self.name, self._consecutive_blocks_missed)) # Reset missed blocks related values self._consecutive_blocks_missed = 0
def set_catching_up(self, now_catching_up: bool, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_catching_up: before=%s, new=%s, channels=%s', self, self.catching_up, now_catching_up, channels) # Alert if catching up has changed if not self.catching_up and now_catching_up: channels.alert_minor(IsCatchingUpAlert(self.name)) elif self.catching_up and not now_catching_up: channels.alert_info(IsNoLongerCatchingUpAlert(self.name)) # Update catching-up self._catching_up = now_catching_up
def setUp(self) -> None: self.logger = logging.getLogger('dummy') self.monitor_name = 'testmonitor' self.counter_channel = CounterChannel(self.logger) self.channel_set = ChannelSet([self.counter_channel]) self.repo_name = 'dummy/repository/' self.releases_page = 'dummy.releases.page' self.redis_prefix = TestInternalConf.redis_github_releases_key_prefix self.db = TestInternalConf.redis_test_database self.host = TestUserConf.redis_host self.port = TestUserConf.redis_port self.password = TestUserConf.redis_password self.redis = RedisApi(self.logger, self.db, self.host, self.port, self.password) self.redis.delete_all_unsafe() try: self.redis.ping_unsafe() except RedisConnectionError: self.fail('Redis is not online.') self.monitor = GitHubMonitor(self.monitor_name, self.channel_set, self.logger, self.redis, self.repo_name, self.releases_page, self.redis_prefix) self.monitor._internal_conf = TestInternalConf
def setUp(self) -> None: self.node_name = 'testnode' self.logger = logging.getLogger('dummy') self.downtime_alert_time_interval = \ TestInternalConf.downtime_alert_time_interval self.downtime_alert_time_interval_with_error_margin = \ self.downtime_alert_time_interval + timedelta(seconds=0.5) self.max_missed_blocks_time_interval = \ TestInternalConf.max_missed_blocks_time_interval self.max_missed_blocks_time_interval_with_error_margin = \ self.max_missed_blocks_time_interval + timedelta(seconds=0.5) self.max_missed_blocks_in_time_interval = \ TestInternalConf.max_missed_blocks_in_time_interval self.full_node = Node(name=self.node_name, rpc_url=None, node_type=NodeType.NON_VALIDATOR_FULL_NODE, pubkey=None, network='', redis=None, internal_conf=TestInternalConf) self.validator = Node(name=self.node_name, rpc_url=None, node_type=NodeType.VALIDATOR_FULL_NODE, pubkey=None, network='', redis=None, internal_conf=TestInternalConf) self.counter_channel = CounterChannel(self.logger) self.channel_set = ChannelSet([self.counter_channel]) self.dummy_exception = DummyException() self.dummy_block_height = -1 self.dummy_block_time = datetime.min + timedelta(days=123) self.dummy_block_time_after_time_interval = \ self.dummy_block_time + \ self.max_missed_blocks_time_interval_with_error_margin self.dummy_missing_validators = -1 self.dummy_voting_power = 1000 self.dummy_no_of_peers = 1000 self.peers_validator_danger_boundary = \ TestInternalConf.validator_peer_danger_boundary self.peers_less_than_validator_danger_boundary = \ self.peers_validator_danger_boundary - 2 self.peers_more_than_validator_danger_boundary = \ self.peers_validator_danger_boundary + 2 self.peers_validator_safe_boundary = \ TestInternalConf.validator_peer_safe_boundary self.peers_less_than_validator_safe_boundary = \ self.peers_validator_safe_boundary - 2 self.peers_more_than_validator_safe_boundary = \ self.peers_validator_safe_boundary + 2 self.peers_full_node_danger_boundary = \ TestInternalConf.full_node_peer_danger_boundary self.peers_less_than_full_node_danger_boundary = \ self.peers_full_node_danger_boundary - 2 self.peers_more_than_full_node_danger_boundary = \ self.peers_full_node_danger_boundary + 2
def set_as_up(self, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_as_up: is_down(currently)=%s, channels=%s', self, self.is_down, channels) # Alert if node was down if self.is_down: # Only send accessible alert if inaccessible alert was sent if self._initial_downtime_alert_sent: downtime = strfdelta(datetime.now() - self._went_down_at, "{hours}h, {minutes}m, {seconds}s") channels.alert_info(NowAccessibleAlert( self.name, self._went_down_at, downtime)) # Reset downtime-related values self._downtime_alert_limiter.reset() self._went_down_at = None
def setUp(self) -> None: self.alerter_name = 'testalerter' self.logger = logging.getLogger('dummy') self.counter_channel = CounterChannel(self.logger) self.channel_set = ChannelSet([self.counter_channel]) self.mute_key = TestInternalConf.redis_periodic_alive_reminder_mute_key self.par = PeriodicAliveReminder( timedelta(), self.channel_set, self.mute_key, None)
def get_periodic_alive_reminder_channel_set(channel_name: str, logger_general: logging.Logger, redis: Optional[RedisApi], alerts_log_file: str, internal_conf: InternalConfig = InternalConf, user_conf: UserConfig = UserConf) \ -> ChannelSet: # Initialise list of channels with default channels channels = [ _get_console_channel(channel_name, logger_general), _get_log_channel(alerts_log_file, channel_name, logger_general, internal_conf) ] # Initialise backup channel sets with default channels backup_channels_for_telegram = ChannelSet(channels) # Add telegram alerts to channel set if they are enabled from config file if user_conf.telegram_alerts_enabled and \ user_conf.telegram_enabled: telegram_channel = _get_telegram_channel(channel_name, logger_general, redis, backup_channels_for_telegram, user_conf) channels.append(telegram_channel) # Add email alerts to channel set if they are enabled from config file if user_conf.email_alerts_enabled and \ user_conf.email_enabled: email_channel = _get_email_channel(channel_name, logger_general, redis, user_conf) channels.append(email_channel) else: email_channel = None # Set up email channel as backup channel for telegram and twilio if email_channel is not None: backup_channels_for_telegram.add_channel(email_channel) return ChannelSet(channels)
def add_missed_block(self, block_height: int, block_time: datetime, missing_validators: int, channels: ChannelSet, logger: logging.Logger) -> None: # NOTE: This function assumes that the node is a validator # Calculate the actual blocks missed as of when this function was called blocks_missed = self._consecutive_blocks_missed + 1 # Variable alias for improved readability danger = self._missed_blocks_danger_boundary logger.debug( '%s add_missed_block: before=%s, new=%s, missing_validators = %s, ' 'channels=%s', self, self.consecutive_blocks_missed_so_far, blocks_missed, missing_validators, channels) # Let timed tracker know that block missed self._timed_block_miss_tracker.action_happened(at_time=block_time) # Alert (varies depending on whether was already missing blocks) if not self.is_missing_blocks: pass # Do not alert on first missed block elif 2 <= blocks_missed < danger: channels.alert_info( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators) ) # 2+ blocks missed inside danger range elif blocks_missed == 5: channels.alert_minor( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators)) # reached danger range elif blocks_missed >= max(10, danger) and blocks_missed % 10 == 0: channels.alert_major( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators) ) # Every (10N)th block missed for N >= 1 inside danger range self._timed_block_miss_tracker.reset() if self._timed_block_miss_tracker.too_many_occurrences(block_time): blocks_in_interval = self._timed_block_miss_tracker.max_occurrences time_interval = self._timed_block_miss_tracker.time_interval_pretty channels.alert_major( TimedMissedBlocksAlert(self.name, blocks_in_interval, time_interval, block_height, missing_validators) ) # More blocks missed than is acceptable in the time interval self._timed_block_miss_tracker.reset() # Update consecutive blocks missed self._consecutive_blocks_missed = blocks_missed
def setUp(self) -> None: self.logger = logging.getLogger('dummy') self.monitor_name = 'testmonitor' self.counter_channel = CounterChannel(self.logger) self.channel_set = ChannelSet([self.counter_channel]) self.repo_name = 'dummy/repository/' self.releases_page = 'dummy.releases.page' self.redis_prefix = TestInternalConf.redis_github_releases_key_prefix self.db = TestInternalConf.redis_test_database self.host = TestUserConf.redis_host self.port = TestUserConf.redis_port self.password = TestUserConf.redis_password self.monitor = GitHubMonitor(self.monitor_name, self.channel_set, self.logger, None, self.repo_name, self.releases_page, self.redis_prefix, TestInternalConf)
def setUp(self) -> None: self.alerter_name = 'testalerter' self.logger = logging.getLogger('dummy') self.counter_channel = CounterChannel(self.logger) self.channel_set = ChannelSet([self.counter_channel]) self.db = TestInternalConf.redis_test_database self.host = TestUserConf.redis_host self.port = TestUserConf.redis_port self.password = TestUserConf.redis_password self.redis = RedisApi(self.logger, self.db, self.host, self.port, self.password) self.redis.delete_all_unsafe() try: self.redis.ping_unsafe() except RedisConnectionError: self.fail('Redis is not online.') self.mute_key = TestInternalConf.redis_periodic_alive_reminder_mute_key self.par = PeriodicAliveReminder( timedelta(), self.channel_set, self.mute_key, self.redis)
def set_no_of_peers(self, new_no_of_peers: int, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug( '%s set_no_of_peers: before=%s, new=%s, channels=%s', self, self.no_of_peers, new_no_of_peers, channels) # Variable alias for improved readability if self.is_validator: danger = self._validator_peer_danger_boundary safe = self._validator_peer_safe_boundary else: danger = self._full_node_peer_danger_boundary safe = None # Alert if number of peers has changed if self.no_of_peers not in [None, new_no_of_peers]: if self.is_validator: if new_no_of_peers <= self._validator_peer_safe_boundary: # beneath safe boundary if new_no_of_peers > self.no_of_peers: # increase channels.alert_info(PeersIncreasedAlert( self.name, self.no_of_peers, new_no_of_peers)) elif new_no_of_peers > danger: # decrease outside danger range channels.alert_minor(PeersDecreasedAlert( self.name, self.no_of_peers, new_no_of_peers)) else: # decrease inside danger range channels.alert_major(PeersDecreasedAlert( self.name, self.no_of_peers, new_no_of_peers)) elif self._no_of_peers <= self._validator_peer_safe_boundary \ < new_no_of_peers: # increase outside safe range for the first time channels.alert_info( PeersIncreasedOutsideSafeRangeAlert(self.name, safe)) else: if new_no_of_peers > self.no_of_peers: # increase if new_no_of_peers <= danger: # increase inside danger range channels.alert_info(PeersIncreasedAlert( self.name, self.no_of_peers, new_no_of_peers)) elif self.no_of_peers <= danger < new_no_of_peers: # increase outside danger range channels.alert_info( PeersIncreasedOutsideDangerRangeAlert( self.name, danger)) elif new_no_of_peers > danger: # decrease outside danger range pass else: # decrease inside danger range channels.alert_minor(PeersDecreasedAlert( self.name, self.no_of_peers, new_no_of_peers)) # Update number of peers self._no_of_peers = new_no_of_peers
def get_full_channel_set(channel_name: str, logger_general: logging.Logger, redis: Optional[RedisApi], alerts_log_file: str, internal_conf: InternalConfig = InternalConf, user_conf: UserConfig = UserConf) -> ChannelSet: # Logger initialisation logger_alerts = create_logger(alerts_log_file, 'alerts', internal_conf.logging_level) # Initialise list of channels with default channels channels = [ ConsoleChannel(channel_name, logger_general), LogChannel(channel_name, logger_general, logger_alerts) ] # Initialise backup channel sets with default channels backup_channels_for_telegram = ChannelSet(channels) backup_channels_for_twilio = ChannelSet(channels) # Add telegram alerts to channel set if user_conf.telegram_alerts_enabled: telegram_bot = TelegramBotApi(user_conf.telegram_alerts_bot_token, user_conf.telegram_alerts_bot_chat_id) telegram_channel = TelegramChannel(channel_name, logger_general, redis, telegram_bot, backup_channels_for_telegram) channels.append(telegram_channel) else: telegram_channel = None # Add email alerts to channel set if user_conf.email_alerts_enabled: email = EmailSender(user_conf.email_smtp, user_conf.email_from) email_channel = EmailChannel(channel_name, logger_general, redis, email, user_conf.email_to) channels.append(email_channel) else: email_channel = None # Add twilio alerts to channel set if user_conf.twilio_alerts_enabled: twilio = TwilioApi(user_conf.twilio_account_sid, user_conf.twilio_auth_token) twilio_channel = TwilioChannel(channel_name, logger_general, redis, twilio, user_conf.twilio_phone_number, user_conf.twilio_dial_numbers, internal_conf.twiml_instructions_url, internal_conf.redis_twilio_snooze_key, backup_channels_for_twilio) channels.append(twilio_channel) else: # noinspection PyUnusedLocal twilio_channel = None # Set up email channel as backup channel for telegram and twilio if email_channel is not None: backup_channels_for_telegram.add_channel(email_channel) backup_channels_for_twilio.add_channel(email_channel) # Set up telegram channel as backup channel for twilio if telegram_channel is not None: backup_channels_for_twilio.add_channel(telegram_channel) return ChannelSet(channels)