def set_as_down(self, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_as_down: is_down(currently)=%s, channels=%s', self, self.is_down, channels) # Alert (varies depending on whether was already down) if self.is_down and not self._initial_downtime_alert_sent: if self.is_validator: channels.alert_critical(CannotAccessNodeAlert(self.name)) else: channels.alert_warning(CannotAccessNodeAlert(self.name)) self._downtime_alert_limiter.did_task() self._initial_downtime_alert_sent = True elif self.is_down and self._downtime_alert_limiter.can_do_task(): went_down_at = datetime.fromtimestamp(self._went_down_at) downtime = strfdelta(datetime.now() - went_down_at, "{hours}h, {minutes}m, {seconds}s") if self.is_validator: channels.alert_critical( StillCannotAccessNodeAlert(self.name, went_down_at, downtime)) else: channels.alert_warning( StillCannotAccessNodeAlert(self.name, went_down_at, downtime)) self._downtime_alert_limiter.did_task() elif not self.is_down: # Do not alert for now just in case this is a connection hiccup channels.alert_info(ExperiencingDelaysAlert(self.name)) self._went_down_at = datetime.now().timestamp() self._initial_downtime_alert_sent = False
def update_finalized_block_height(self, new_finalized_height: int, logger: logging.Logger, channels: ChannelSet): logger.debug( '%s update_finalized_block_height: finalized_block_height' ' (currently)=%s', self, self._finalized_block_height) current_timestamp = datetime.now().timestamp() if self._finalized_block_height != new_finalized_height: if self.is_no_change_in_height_warning_sent: self._no_change_in_height_warning_sent = False channels.alert_info( NodeFinalizedBlockHeightHasNowBeenUpdatedAlert(self.name)) if self._finalized_block_height > new_finalized_height: logger.info( 'The finalized height of node {} decreased to {}.'.format( self, self._finalized_block_height)) self._finalized_block_height = new_finalized_height self._time_of_last_height_change = current_timestamp self._time_of_last_height_check_activity = current_timestamp self._finalized_height_alert_limiter.set_last_time_that_did_task( datetime.fromtimestamp(current_timestamp)) else: timestamp_difference = current_timestamp - \ self._time_of_last_height_change time_interval = strfdelta( timedelta(seconds=int(timestamp_difference)), "{hours}h, {minutes}m, {seconds}s") if not self.is_no_change_in_height_warning_sent and \ timestamp_difference > \ self._no_change_in_height_first_warning_seconds: self._no_change_in_height_warning_sent = True channels.alert_warning( NodeFinalizedBlockHeightDidNotChangeInAlert( self.name, time_interval)) elif self._finalized_height_alert_limiter.can_do_task() and \ self.is_no_change_in_height_warning_sent: if self.is_validator: channels.alert_critical( NodeFinalizedBlockHeightDidNotChangeInAlert( self.name, time_interval)) else: channels.alert_warning( NodeFinalizedBlockHeightDidNotChangeInAlert( self.name, time_interval)) self._time_of_last_height_check_activity = current_timestamp self._finalized_height_alert_limiter. \ set_last_time_that_did_task( datetime.fromtimestamp(current_timestamp))
def set_no_of_peers(self, new_no_of_peers: int, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_no_of_peers: before=%s, new=%s, channels=%s', self, self.no_of_peers, new_no_of_peers, channels) # Variable alias for improved readability if self.is_validator: danger = self._validator_peer_danger_boundary safe = self._validator_peer_safe_boundary else: danger = self._full_node_peer_danger_boundary safe = None # Alert if number of peers has changed if self.no_of_peers not in [None, new_no_of_peers]: if self.is_validator: if new_no_of_peers <= self._validator_peer_safe_boundary: # beneath safe boundary if new_no_of_peers > self.no_of_peers: # increase channels.alert_info( PeersIncreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) elif new_no_of_peers > danger: # decrease outside danger range channels.alert_warning( PeersDecreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) else: # decrease inside danger range channels.alert_critical( PeersDecreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) elif self._no_of_peers <= self._validator_peer_safe_boundary \ < new_no_of_peers: # increase outside safe range for the first time channels.alert_info( PeersIncreasedOutsideSafeRangeAlert(self.name, safe)) else: if new_no_of_peers > self.no_of_peers: # increase if new_no_of_peers <= danger: # increase inside danger range channels.alert_info( PeersIncreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) elif self.no_of_peers <= danger < new_no_of_peers: # increase outside danger range channels.alert_info( PeersIncreasedOutsideDangerRangeAlert( self.name, danger)) elif new_no_of_peers > danger: # decrease outside danger range pass else: # decrease inside danger range channels.alert_warning( PeersDecreasedAlert(self.name, self.no_of_peers, new_no_of_peers)) # Update number of peers self._no_of_peers = new_no_of_peers
def set_open_file_descriptors(self, new_open_file_descriptors: int, channels: ChannelSet, logger: logging.Logger) \ -> None: logger.debug( '%s set_open_file_descriptors: ' 'set_open_file_descriptors(currently)=%s, channels=%s', self, self._open_file_descriptors, channels) if self._open_file_descriptors is None: self._open_file_descriptors = new_open_file_descriptors return if self.node.is_validator: danger = self._validator_open_file_descriptors_danger_boundary safe = self._validator_open_file_descriptors_safe_boundary else: danger = self._node_open_file_descriptors_danger_boundary safe = self._node_open_file_descriptors_safe_boundary if self._open_file_descriptors not in [ None, new_open_file_descriptors ]: if safe <= new_open_file_descriptors < danger: if new_open_file_descriptors > self._open_file_descriptors: channels.alert_warning( OpenFileDescriptorsIncreasedInsideWarningRangeAlert( self.name, new_open_file_descriptors, safe)) elif new_open_file_descriptors < self._open_file_descriptors: channels.alert_info( OpenFileDescriptorsDecreasedAlert( self.name, self._open_file_descriptors, new_open_file_descriptors)) elif new_open_file_descriptors >= danger: if new_open_file_descriptors > self._open_file_descriptors: channels.alert_critical( OpenFileDescriptorsIncreasedInsideDangerRangeAlert( self.name, new_open_file_descriptors, danger)) elif new_open_file_descriptors < self._open_file_descriptors: channels.alert_critical( OpenFileDescriptorsDecreasedAlert( self.name, self._open_file_descriptors, new_open_file_descriptors)) else: if new_open_file_descriptors < self._open_file_descriptors: channels.alert_info( OpenFileDescriptorsDecreasedAlert( self.name, self._open_file_descriptors, new_open_file_descriptors)) elif new_open_file_descriptors > self._open_file_descriptors: channels.alert_info( OpenFileDescriptorsIncreasedAlert( self.name, self._open_file_descriptors, new_open_file_descriptors)) self._open_file_descriptors = new_open_file_descriptors
def set_system_storage_usage(self, new_system_storage_usage: int, channels: ChannelSet, logger: logging.Logger) \ -> None: logger.debug( '%s set_system_storage_usage: ' 'set_system_storage_usage(currently)=%s, channels=%s', self, self._system_storage_usage, channels) if self._system_storage_usage is None: self._system_storage_usage = new_system_storage_usage return if self.node.is_validator: danger = self._validator_system_storage_usage_danger_boundary safe = self._validator_system_storage_usage_safe_boundary else: danger = self._node_system_storage_usage_danger_boundary safe = self._node_system_storage_usage_safe_boundary if self._system_storage_usage not in [None, new_system_storage_usage]: if safe <= new_system_storage_usage < danger: if new_system_storage_usage > self._system_storage_usage: channels.alert_warning( SystemStorageUsageIncreasedInsideWarningRangeAlert( self.name, new_system_storage_usage, safe)) elif new_system_storage_usage < self._system_storage_usage: channels.alert_info( SystemStorageUsageDecreasedAlert( self.name, self._system_storage_usage, new_system_storage_usage)) elif new_system_storage_usage >= danger: if new_system_storage_usage > self._system_storage_usage: channels.alert_critical( SystemStorageUsageIncreasedInsideDangerRangeAlert( self.name, new_system_storage_usage, danger)) elif new_system_storage_usage < self._system_storage_usage: channels.alert_critical( SystemStorageUsageDecreasedAlert( self.name, self._system_storage_usage, new_system_storage_usage)) else: if new_system_storage_usage < self._system_storage_usage: channels.alert_info( SystemStorageUsageDecreasedAlert( self.name, self._system_storage_usage, new_system_storage_usage)) elif new_system_storage_usage > self._system_storage_usage: channels.alert_info( SystemStorageUsageIncreasedAlert( self.name, self._system_storage_usage, new_system_storage_usage)) self._system_storage_usage = new_system_storage_usage
def disconnect_from_api(self, channels: ChannelSet, logger: logging.Logger): logger.debug('%s disconnect_from_api: channels=%s', self, channels) if self.is_connected_to_api_server: if self.is_validator: channels.alert_critical( NodeWasNotConnectedToApiServerAlert(self.name)) else: channels.alert_warning( NodeWasNotConnectedToApiServerAlert(self.name)) self._connected_to_api_server = False
def set_is_syncing(self, now_is_syncing: bool, channels: ChannelSet, logger: logging.Logger) -> None: logger.debug('%s set_is_syncing: before=%s, new=%s, channels=%s', self, self.is_syncing, now_is_syncing, channels) # Alert if is syncing has changed if not self.is_syncing and now_is_syncing: channels.alert_warning(IsSyncingAlert(self.name)) elif self.is_syncing and not now_is_syncing: channels.alert_info(IsNoLongerSyncingAlert(self.name)) # Update is-syncing self._is_syncing = now_is_syncing
def add_missed_block(self, block_height: int, block_time: datetime, missing_validators: int, channels: ChannelSet, logger: logging.Logger) -> None: # NOTE: This function assumes that the node is a validator # Calculate the actual blocks missed as of when this function was called blocks_missed = self._consecutive_blocks_missed + 1 # Variable alias for improved readability danger = self._missed_blocks_danger_boundary logger.debug( '%s add_missed_block: before=%s, new=%s, missing_validators = %s, ' 'channels=%s', self, self.consecutive_blocks_missed_so_far, blocks_missed, missing_validators, channels) # Let timed tracker know that block missed self._timed_block_miss_tracker.action_happened(at_time=block_time) # Alert (varies depending on whether was already missing blocks) if not self.is_missing_blocks: pass # Do not alert on first missed block elif 2 <= blocks_missed < danger: channels.alert_info( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators) ) # 2+ blocks missed inside danger range elif blocks_missed == 5: channels.alert_warning( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators)) # reached danger range elif blocks_missed >= max(10, danger) and blocks_missed % 10 == 0: channels.alert_critical( MissedBlocksAlert(self.name, blocks_missed, block_height, missing_validators) ) # Every (10N)th block missed for N >= 1 inside danger range self._timed_block_miss_tracker.reset() if self._timed_block_miss_tracker.too_many_occurrences(block_time): blocks_in_interval = self._timed_block_miss_tracker.max_occurrences time_interval = self._timed_block_miss_tracker.time_interval_pretty channels.alert_critical( TimedMissedBlocksAlert(self.name, blocks_in_interval, time_interval, block_height, missing_validators) ) # More blocks missed than is acceptable in the time interval self._timed_block_miss_tracker.reset() # Update consecutive blocks missed self._consecutive_blocks_missed = blocks_missed
def set_elected(self, now_is_elected: bool, channels: ChannelSet, logger: logging.Logger) -> None: # NOTE: This function assumes that the node is a validator. logger.debug('%s set_elected: elected(currently)=%s, channels=%s', self, self.is_elected, channels) if self.is_elected not in [now_is_elected, None]: if now_is_elected: channels.alert_info( ValidatorIsElectedForTheNextSessionAlert(self.name)) else: channels.alert_warning( ValidatorIsNotElectedForNextSessionAlert(self.name)) self._elected = now_is_elected
def set_api_as_down(self, monitor: str, is_validator_monitor, channels: ChannelSet) -> None: self._logger.debug('%s set_api_as_down: api_down(currently)=%s, ' 'channels=%s', self, self._api_down, channels) # If API is suddenly down, inform via a warning alert if not self._api_down: channels.alert_warning(ApiIsDownAlert(monitor)) self._api_down_limiter.did_task() # If 15 seconds pass since a validator monitor lost connection with the # API server, the user is informed via critical alert once if is_validator_monitor and self._api_down_limiter.can_do_task() \ and not self._critical_alert_sent: channels.alert_critical(ApiIsDownAlert(monitor)) self._critical_alert_sent = True self._api_down = True
def set_no_of_blocks_authored(self, channels: ChannelSet, logger: logging.Logger, new_no_of_blocks_authored: int, era_index: int): # NOTE: This function assumes that the node is a validator. logger.debug( '%s set_no_of_blocks_authored: no_of_blocks_' 'authored(currently)=%s, channels=%s', self, self._no_of_blocks_authored, channels) if self.is_active: if self._no_of_blocks_authored < new_no_of_blocks_authored: self._no_of_blocks_authored = new_no_of_blocks_authored self._time_of_last_block = datetime.now().timestamp() self.blocks_authored_alert_limiter.did_task() self._time_of_last_block_check_activity = \ datetime.now().timestamp() if self._is_authoring is False: self._is_authoring = True channels.alert_info( ANewBlockHasNowBeenAuthoredByValidatorAlert(self.name)) elif self._no_of_blocks_authored == \ new_no_of_blocks_authored and \ self.blocks_authored_alert_limiter.can_do_task(): if self._time_of_last_block != NONE: time_interval = strfdelta( datetime.now() - datetime.fromtimestamp(self._time_of_last_block), "{hours}h, {minutes}m, {seconds}s") channels.alert_warning( LastAuthoredBlockInEraAlert(self.name, time_interval, era_index)) else: channels.alert_warning( NoBlocksHaveYetBeenAuthoredInEraAlert( self.name, era_index)) self._is_authoring = False self.blocks_authored_alert_limiter.did_task() self._time_of_last_block_check_activity = \ datetime.now().timestamp()
class TestChannelSet(unittest.TestCase): def setUp(self) -> None: self.alerter_name = 'testalerter' self.logger = logging.getLogger('dummy') self.counter_channel = CounterChannel(self.logger) self.channel_set = ChannelSet([self.counter_channel], TestInternalConfSomeAlertsDisabled) self.dummy_alert = Alert(AlertCode.TestAlert, 'dummy') self.severities_map_bkp = \ TestInternalConfSomeAlertsDisabled.severities_enabled_map.copy() self.alerts_map_bkp = \ TestInternalConfSomeAlertsDisabled.alerts_enabled_map.copy() @staticmethod def enable_severity(severity: SeverityCode): TestInternalConfSomeAlertsDisabled.severities_enabled_map[ severity.name] = True @staticmethod def disable_severity(severity: SeverityCode): TestInternalConfSomeAlertsDisabled.severities_enabled_map[ severity.name] = False @staticmethod def enable_alert(alert: AlertCode): TestInternalConfSomeAlertsDisabled.alerts_enabled_map[ alert.name] = True @staticmethod def disable_alert(alert: AlertCode): TestInternalConfSomeAlertsDisabled.alerts_enabled_map[ alert.name] = False def tearDown(self) -> None: self.counter_channel.reset() # ignore previous alerts TestInternalConfSomeAlertsDisabled.severities_enabled_map = \ self.severities_map_bkp TestInternalConfSomeAlertsDisabled.alerts_enabled_map = \ self.alerts_map_bkp def test_info_severity_disabled_from_config(self): # As set in test_internal_config_alerts, info is disabled by default self.channel_set.alert_info(self.dummy_alert) self.assertEqual(self.counter_channel.info_count, 0) def test_warning_severity_disabled_from_config(self): # As set in test_internal_config_alerts, warning is disabled by default self.channel_set.alert_warning(self.dummy_alert) self.assertEqual(self.counter_channel.warning_count, 0) def test_critical_severity_enabled_from_config(self): # As set in test_internal_config_alerts, critical is enabled by default self.channel_set.alert_critical(self.dummy_alert) self.assertEqual(self.counter_channel.critical_count, 1) def test_error_severity_enabled_from_config(self): # As set in test_internal_config_alerts, error is enabled by default self.channel_set.alert_error(self.dummy_alert) self.assertEqual(self.counter_channel.error_count, 1) def test_info_severity_does_not_work_if_disabled(self): self.disable_severity(SeverityCode.INFO) self.channel_set.alert_info(self.dummy_alert) self.assertEqual(self.counter_channel.info_count, 0) def test_warning_severity_does_not_work_if_disabled(self): self.disable_severity(SeverityCode.WARNING) self.channel_set.alert_warning(self.dummy_alert) self.assertEqual(self.counter_channel.warning_count, 0) def test_critical_severity_does_not_work_if_disabled(self): self.disable_severity(SeverityCode.CRITICAL) self.channel_set.alert_critical(self.dummy_alert) self.assertEqual(self.counter_channel.critical_count, 0) def test_error_severity_does_not_work_if_disabled(self): self.disable_severity(SeverityCode.ERROR) self.channel_set.alert_error(self.dummy_alert) self.assertEqual(self.counter_channel.error_count, 0) def test_info_severity_works_if_enabled(self): self.enable_severity(SeverityCode.INFO) self.channel_set.alert_info(self.dummy_alert) self.assertEqual(self.counter_channel.info_count, 1) def test_warning_severity_works_if_enabled(self): self.enable_severity(SeverityCode.WARNING) self.channel_set.alert_warning(self.dummy_alert) self.assertEqual(self.counter_channel.warning_count, 1) def test_critical_severity_works_if_enabled(self): self.enable_severity(SeverityCode.CRITICAL) self.channel_set.alert_critical(self.dummy_alert) self.assertEqual(self.counter_channel.critical_count, 1) def test_error_severity_works_if_enabled(self): self.enable_severity(SeverityCode.ERROR) self.channel_set.alert_error(self.dummy_alert) self.assertEqual(self.counter_channel.error_count, 1) def test_alert_works_if_severity_and_alert_enabled_in_config(self): # As set in test_internal_config_alerts, # - info, warning are disabled by default # - critical, error are enabled by default # - test alert code is enabled by default self.channel_set.alert_info(self.dummy_alert) self.channel_set.alert_warning(self.dummy_alert) self.channel_set.alert_critical(self.dummy_alert) self.channel_set.alert_error(self.dummy_alert) self.assertEqual(self.counter_channel.info_count, 0) self.assertEqual(self.counter_channel.warning_count, 0) self.assertEqual(self.counter_channel.critical_count, 1) self.assertEqual(self.counter_channel.error_count, 1) def test_alert_does_not_work_on_any_severity_if_disabled(self): self.disable_alert(self.dummy_alert.alert_code) self.channel_set.alert_info(self.dummy_alert) self.channel_set.alert_warning(self.dummy_alert) self.channel_set.alert_critical(self.dummy_alert) self.channel_set.alert_error(self.dummy_alert) self.assertTrue(self.counter_channel.no_alerts()) def test_alert_works_on_any_severity_if_enabled(self): self.enable_severity(SeverityCode.INFO) self.enable_severity(SeverityCode.WARNING) self.enable_severity(SeverityCode.CRITICAL) self.enable_severity(SeverityCode.ERROR) self.enable_alert(self.dummy_alert.alert_code) self.channel_set.alert_info(self.dummy_alert) self.channel_set.alert_warning(self.dummy_alert) self.channel_set.alert_critical(self.dummy_alert) self.channel_set.alert_error(self.dummy_alert) self.assertEqual(self.counter_channel.info_count, 1) self.assertEqual(self.counter_channel.warning_count, 1) self.assertEqual(self.counter_channel.critical_count, 1) self.assertEqual(self.counter_channel.error_count, 1)
def process_event(self, event_height: str, event: dict, channels: ChannelSet, logger: logging.Logger): # An escrow event is when tokens are either taken/added or reclaimed # from a delegation. if self._check_dict_path(event, 'escrow'): # Escrow events that take are usually done by the blockchain, # such as when a validator is slashed if self._check_dict_path(event, 'escrow', 'take'): if event['escrow']['take']['owner'] == self.entity_public_key: tokens = event['escrow']['take']['amount'] logger.debug('%s Node %s Slashed %s tokens at height %s', self, self.name, tokens, event_height) channels.alert_critical( SlashedAlert(self.name, tokens, event_height)) # Escrow events that add occur when someone delegates tokens to a # validator. elif self._check_dict_path(event, 'escrow', 'add'): if event['escrow']['add']['owner'] == self.entity_public_key: tokens = event['escrow']['add']['amount'] escrow = event['escrow']['add']['escrow'] logger.debug('%s Node %s : Added %s tokens at height %s to ' '%s .', self, self.name, tokens, event_height, \ escrow) channels.alert_info( EscrowAddEventSelfOwner(self.name, tokens, event_height, escrow)) elif event['escrow']['add'][ 'escrow'] == self.entity_public_key: tokens = event['escrow']['add']['amount'] owner = event['escrow']['add']['owner'] logger.debug('%s Node %s : Added %s tokens at height %s to ' '%s .', self, self.name, tokens, event_height, \ owner) channels.alert_info( EscrowAddEventSelfEscrow(self.name, tokens, event_height, owner)) # Escrow events that reclaim occur when someone takes back their # delegated tokens from a validator elif self._check_dict_path(event, 'escrow', 'reclaim'): if event['escrow']['reclaim']['owner'] == \ self.entity_public_key: tokens = event['escrow']['reclaim']['amount'] escrow = event['escrow']['reclaim']['escrow'] logger.debug('%s Node %s : reclaimed %s tokens at height %s' 'to %s .', self, self.name, tokens, \ event_height, escrow) channels.alert_info( EscrowReclaimEventSelfOwner(self.name, tokens, event_height, escrow)) elif event['escrow']['reclaim']['escrow'] == \ self.entity_public_key: tokens = event['escrow']['reclaim']['amount'] owner = event['escrow']['reclaim']['owner'] logger.debug('%s Node %s : reclaimed %s tokens at height %s' 'to %s .', self, self.name, tokens, \ event_height, owner) channels.alert_info( EscrowReclaimEventSelfEscrow(self.name, tokens, event_height, owner)) # Burn events occur when a user decides to destroy their own tokens. elif self._check_dict_path(event, 'burn'): if event['burn']['owner'] == self.entity_public_key: tokens = event['burn']['amount'] logger.debug('%s Node %s Burned %s tokens at height %s', self, self.name, tokens, event_height) channels.alert_critical( TokensBurnedAlert(self.name, tokens, event_height)) # Transfer events occur when a user decides to send tokens to another # address. elif self._check_dict_path(event, 'transfer'): if event['transfer']['from'] == self.entity_public_key: tokens = event['transfer']['amount'] destination = event['transfer']['to'] logger.debug( '%s Node %s transfered %s tokens at height %s ' + 'to %s', self, self.name, tokens, event_height, event['transfer']['to']) channels.alert_info( TokensTransferedToAlert(self.name, tokens, event_height, destination)) elif event['transfer']['to'] == self.entity_public_key: tokens = event['transfer']['amount'] source = event['transfer']['from'] logger.debug( '%s Node %s transfered %s tokens at height %s ' + 'from, %s', self, self.name, tokens, event_height, event['transfer']['to']) channels.alert_info( TokensTransferedFromAlert(self.name, tokens, event_height, source)) else: logger.debug('%s Node received unknown event : %s', self, self.name, event) channels.alert_warning( UnknownEventFound(self.name, event, event_height))