def test_metric_bounds_fixed_value(self, comparison, threshold_value, expected_bounds): # `fixed_value` comparison ignores value history value_history = [] threshold = metrics.Threshold('fixed_value', threshold_value) bounds = metrics.metric_bounds(value_history, threshold, comparison) self.assertSequenceAlmostEqual(bounds, expected_bounds)
def test_metric_bounds_stddevs_from_mean(self, comparison, threshold_value, expected_bounds): # mean = 3, stddev = ~1.414 value_history = [ metrics.MetricPoint(metric_value=1, wall_time=10), metrics.MetricPoint(metric_value=2, wall_time=20), metrics.MetricPoint(metric_value=3, wall_time=30), metrics.MetricPoint(metric_value=4, wall_time=40), metrics.MetricPoint(metric_value=5, wall_time=50), ] threshold = metrics.Threshold('stddevs_from_mean', threshold_value) bounds = metrics.metric_bounds(value_history, threshold, comparison) self.assertSequenceAlmostEqual(bounds, expected_bounds, places=3)
def compute_bounds_and_report_errors(self, metrics_history, new_metrics, job_status): """Compute the bounds for metrics and report abnormal values. Any metric that is currently outside the expected bounds is reported to Stackdriver Error Reporting unless `alert_for_oob_metrics` is set to False in the regression test config. Even if this reporting is turned off, this method computes the upper and lower bounds for each metric to provide to BigQuery as a visual aid when rendering metrics history into charts. Args: metrics_history(dict): Historic values of each metric. new_metrics(dict): Key is metric name and value is MetricPoint containing the latest aggregated value for that metric. job_status(string): Final state of the job, should be one of the status constants found in job_status_handler.py. Returns: metric_name_to_visual_bounds (dict): Key is metric name and value is a tuple of floats of the form (lower_bound, upper_bound). """ if not self.regression_test_config: return {} success_conditions = self.regression_test_config.get( 'metric_success_conditions') if not success_conditions: return {} metrics_history = metrics_history.copy() # Add the metrics from the latest run. These aren't in Bigquery yet. for metric_name, metric_value in new_metrics.items(): metrics_history[metric_name].append(metric_value) metric_name_to_visual_bounds = {} metric_subset_to_report = set( self.regression_test_config.get('metric_subset_to_alert', [])) for metric_name, value_history in metrics_history.items(): if metric_subset_to_report and metric_name not in metric_subset_to_report: self.logger.info( 'Skipping alerts and bounds for metric `{}` since ' 'it does not appear in `metric_subset_to_report` in your ' 'regression test config.'.format(metric_name)) continue success_condition = success_conditions.get(metric_name) or \ success_conditions.get('default') if not success_condition: self.logger.warning( 'metric: `{}` has an empty success condition in the ' '`metric_success_conditions` dict in the regression_test_config ' 'but there is no default condition provided. No bounds or ' 'alerts will be computed. See README for config details.'.format( metric_name)) continue elif len(value_history) <= success_condition.get( 'wait_for_n_points_of_history', -1): self.logger.info( 'Metric: {} had only {} points of history. Skipping bounds ' 'enforcement. Success condition: {}'.format( metric_name, len(value_history), success_condition)) continue threshold_type, threshold_value = list(success_condition.get('success_threshold').items())[0] threshold = metrics.Threshold(threshold_type, threshold_value) comparison = success_condition.get('comparison') lower_bound, upper_bound = metrics.metric_bounds( value_history, threshold, comparison) metric_name_to_visual_bounds[metric_name] = (lower_bound, upper_bound) metric_value = value_history[-1].metric_value within_bounds = metrics.within_bounds(metric_value, lower_bound, upper_bound, inclusive=('equal' in comparison)) # Generate an alert unless one of these is True: # 1. metrics are within bounds. # 2. alerting is disabled by config. # 3. the job failed and therefore metrics are unreliable. if within_bounds or not self.regression_test_config.get( 'alert_for_oob_metrics', True) or \ job_status != job_status_handler.SUCCESS: continue self.logger.error( 'Metric `{}` was out of bounds for test `{}`. Bounds were ' '({}, {}) and value was {:.2f}'.format( metric_name, self.test_name, lower_bound, upper_bound, metric_value), debug_info=self.debug_info) return metric_name_to_visual_bounds
def test_skip_oob_alerting(self): handler_base_args = { 'test_name': 'test', 'events_dir': self.temp_dir, 'debug_info': None, 'metric_collection_config': {}, 'regression_test_config': { 'alert_after_second_test_failure': True, }, 'test_type': None, 'accelerator': None, 'framework_version': None, 'logger': self.logger, } metrics_handler = main.CloudMetricsHandler(**handler_base_args) # Both current and previous runs were OOB. Should alert. self.assertFalse( metrics_handler.skip_oob_alerting( job_status_handler.SUCCESS, [ metrics.MetricPoint(0.8, 111), metrics.MetricPoint(0.8, 112), metrics.MetricPoint(1.0, 113) ], metrics.Threshold('fixed_value', 0.9), 'greater')) # Job was FAILURE; should skip metrics alerting. self.assertTrue( metrics_handler.skip_oob_alerting( job_status_handler.FAILURE, [ metrics.MetricPoint(1.0, 111), metrics.MetricPoint(1.0, 112), metrics.MetricPoint(1.0, 113) ], metrics.Threshold('fixed_value', 0.9), 'greater')) # Job was TIMEOUT; should skip metrics alerting. self.assertTrue( metrics_handler.skip_oob_alerting( job_status_handler.TIMEOUT, [ metrics.MetricPoint(1.0, 111), metrics.MetricPoint(1.0, 112), metrics.MetricPoint(1.0, 113) ], metrics.Threshold('fixed_value', 0.9), 'greater')) # Latest run was OOB but previous run was not; should skip alerting. self.assertTrue( metrics_handler.skip_oob_alerting( job_status_handler.SUCCESS, [ metrics.MetricPoint(0.8, 110), metrics.MetricPoint(1.0, 112), metrics.MetricPoint(1.0, 113) ], metrics.Threshold('fixed_value', 0.9), 'greater')) handler_base_args['regression_test_config'] = { 'alert_after_second_test_failure': False, } metrics_handler = main.CloudMetricsHandler(**handler_base_args) # Latest run was OOB but previous run was not; should alert since now the # config has 'alert_after_second_test_failure': False. self.assertFalse( metrics_handler.skip_oob_alerting( job_status_handler.SUCCESS, [ metrics.MetricPoint(0.8, 110), metrics.MetricPoint(1.0, 112), metrics.MetricPoint(1.0, 113) ], metrics.Threshold('fixed_value', 0.9), 'greater'))