def cloudwatch_timeframe(self): """ Try to work out a likely timeframe for CloudWatch errors. """ threshold = ThresholdMessage.from_message(self.state_reason) return Interval(start=threshold.date - dt.timedelta(seconds=300), end=threshold.date + dt.timedelta(seconds=300))
def cloudwatch_timeframe(self): """ Try to work out a likely timeframe for CloudWatch errors. """ threshold = ThresholdMessage.from_message(self.state_reason) try: return Interval(start=threshold.date - dt.timedelta(seconds=300), end=threshold.date + dt.timedelta(seconds=300)) except TypeError: # Raised when threshold.date is None. raise MessageHasNoDateError()
def get_human_message(alarm_name, state_reason): """ Sometimes we can provide a more human-readable message than "Threshold Crossed". Try to do so, if possible. """ threshold = ThresholdMessage.from_message(state_reason) # For a DLQ, the lower threshold is always going to be zero, so it's # enough to state how many items were on the DLQ. For example: # # There is 1 item on the ID minter DLQ. # if alarm_name.endswith("_dlq_not_empty"): queue_name = alarm_name[:-len("_dlq_not_empty")] queue_length = threshold.actual_value if queue_length == 1: message = "There is 1 item" else: message = f"There are {queue_length} items" return message + f" on the {queue_name} DLQ." # For unhealthy hosts, the lower threshold is always going to be zero. # For example: # # There are 2 unhealthy targets in the id_minter ALB target group. # if alarm_name.endswith("-alb-unhealthy-hosts"): group_name = alarm_name[:-len("-alb-unhealthy-hosts")] unhealthy_host_count = threshold.actual_value if unhealthy_host_count == 1: message = "There is 1 unhealthy target" else: message = f"There are {unhealthy_host_count} unhealthy targets" return message + f" in the {group_name} ALB target group." # For not-enough-healthy hosts, the lower threshold may be different, # so we include that in the message. For example: # # There aren't enough healthy targets in the ingestor # (saw 2, expected at least 3). # if alarm_name.endswith("-alb-not-enough-healthy-hosts"): group_name = alarm_name[:-len("-alb-not-enough-healthy-hosts")] if threshold.is_breaching: return f"There are no healthy hosts in the {group_name} ALB target group." else: desired_count = threshold.desired_value actual_count = threshold.actual_value return ( f"There aren't enough healthy targets in the {group_name} ALB target group " f"(saw {actual_count}, expected at least {desired_count}).") # Any number of 500 errors is bad! For example: # # There were multiple 500 errors (3) from the ingestor ALB target group. # # We put the numeral in brackets just to make the sentence easier to read. if alarm_name.endswith("-alb-target-500-errors"): group_name = alarm_name[:-len("-alb-target-500-errors")] error_count = threshold.actual_value if error_count == 1: return f"There was a 500 error from the {group_name} ALB target group." else: return f"There were multiple 500 errors ({error_count}) from the {group_name} ALB target group." # As are any number of Lambda errors. Example: # # There was an error in the post_to_slack Lambda. # if alarm_name.startswith("lambda-") and alarm_name.endswith("-errors"): lambda_name = alarm_name[len("lambda-"):-len("-errors")] error_count = threshold.actual_value if error_count == 1: return f"There was an error in the {lambda_name} Lambda." else: return f"There were {error_count} errors in the {lambda_name} Lambda." return state_reason
def test_is_breaking(self, message, is_breaching): t = ThresholdMessage.from_message(message) assert t.is_breaching == is_breaching
def test_unexpected_message_is_valueerror(self, bad_message): with pytest.raises(ValueError): ThresholdMessage.from_message(bad_message)
def test_operator(self, message, expected_operator): t = ThresholdMessage.from_message(message) assert t.operator == expected_operator
def test_date(self, message, expected_date): t = ThresholdMessage.from_message(message) assert t.date == expected_date
def test_desired_value(self, message, desired_value): t = ThresholdMessage.from_message(message) assert t.desired_value == desired_value
def test_actual_value(self, message, actual_value): t = ThresholdMessage.from_message(message) assert t.actual_value == actual_value
def get_human_message(alarm_name, state_reason): """ Sometimes we can provide a more human-readable message than "Threshold Crossed". Try to do so, if possible. """ threshold = ThresholdMessage.from_message(state_reason) # For a DLQ, the lower threshold is always going to be zero, so it's # enough to state how many items were on the DLQ. For example: # # There is 1 item on the ID minter DLQ. # if alarm_name.endswith('_dlq_not_empty'): queue_name = alarm_name[:-len('_dlq_not_empty')] queue_length = threshold.actual_value if queue_length == 1: message = 'There is 1 item' else: message = f'There are {queue_length} items' return message + f' on the {queue_name} DLQ.' # For unhealthy hosts, the lower threshold is always going to be zero. # For example: # # There are 2 unhealthy targets in the id_minter ALB target group. # if alarm_name.endswith('-alb-unhealthy-hosts'): group_name = alarm_name[:-len('-alb-unhealthy-hosts')] unhealthy_host_count = threshold.actual_value if unhealthy_host_count == 1: message = 'There is 1 unhealthy target' else: message = f'There are {unhealthy_host_count} unhealthy targets' return message + f' in the {group_name} ALB target group.' # For not-enough-healthy hosts, the lower threshold may be different, # so we include that in the message. For example: # # There aren't enough healthy targets in the ingestor # (saw 2, expected at least 3). # if alarm_name.endswith('-alb-not-enough-healthy-hosts'): group_name = alarm_name[:-len('-alb-not-enough-healthy-hosts')] if threshold.is_breaching: return f'There are no healthy hosts in the {group_name} ALB target group.' else: desired_count = threshold.desired_value actual_count = threshold.actual_value return ( f"There aren't enough healthy targets in the {group_name} ALB target group " f"(saw {actual_count}, expected at least {desired_count}).") # Any number of 500 errors is bad! For example: # # There were multiple 500 errors (3) from the ingestor ALB target group. # # We put the numeral in brackets just to make the sentence easier to read. if alarm_name.endswith('-alb-target-500-errors'): group_name = alarm_name[:-len('-alb-target-500-errors')] error_count = threshold.actual_value if error_count == 1: return f'There was a 500 error from the {group_name} ALB target group.' else: return f'There were multiple 500 errors ({error_count}) from the {group_name} ALB target group.' # As are any number of Lambda errors. Example: # # There was an error in the post_to_slack Lambda. # if alarm_name.startswith('lambda-') and alarm_name.endswith('-errors'): lambda_name = alarm_name[len('lambda-'):-len('-errors')] error_count = threshold.actual_value if error_count == 1: return f'There was an error in the {lambda_name} Lambda.' else: return f'There were {error_count} errors in the {lambda_name} Lambda.' # The snapshot generator queue filling up suggests the snapshot # generator isn't running correctly. In that case, we can be helpful # and tell the user when the last snapshot was taken. if alarm_name == 'snapshot_scheduler_queue_not_empty': error_count = threshold.actual_value if error_count == 1: return 'The snapshot generator queue has 1 unprocessed item.' else: return f'The snapshot generator queue has {error_count} unprocessed items.' return state_reason