def testChangePoint_CanBeMadeAndConvertedToDict(self): series = list(enumerate([4, 4, 4, 8, 8, 8, 8])) change_point = find_change_points.MakeChangePoint(series, 3) self.assertEqual( find_change_points.ChangePoint(x_value=3, median_before=4.0, median_after=8.0, size_before=3, size_after=4, window_start=0, window_end=6, relative_change=1.0, std_dev_before=0.0, t_statistic=float('inf'), degrees_of_freedom=1.0, p_value=0.001), change_point) self.assertEqual( { 'x_value': 3, 'median_before': 4.0, 'median_after': 8.0, 'size_before': 3, 'size_after': 4, 'window_start': 0, 'window_end': 6, 'relative_change': 1.0, 'std_dev_before': 0.0, 't_statistic': float('inf'), 'degrees_of_freedom': 1.0, 'p_value': 0.001, }, change_point.AsDict())
def testFindChangePoints(self): # Simple test that the output is as expected for a clear change. # Tests for specific aspects of the algorithm are below. data = [1, 1, 2, 1, 1, 8, 8, 8, 9, 8, 9] series = list(enumerate(data)) expected = [ find_change_points.ChangePoint( x_value=5, median_before=1, median_after=8, window_start=1, window_end=10, size_before=4, size_after=6, relative_change=7, std_dev_before=0.4330127018922193, t_statistic=-24.452628375754593, degrees_of_freedom=6.9938793160801023, p_value=0.001) ] actual = find_change_points.FindChangePoints(series, max_window_size=10, multiple_of_std_dev=3, min_relative_change=0.5, min_absolute_change=1, min_steppiness=0.4, min_segment_size=3) self.assertEqual(expected, actual)
def _MakeSampleChangePoint(x_value, median_before, median_after): """Makes a sample find_change_points.ChangePoint for use in these tests.""" # The only thing that matters in these tests is the revision number # and the values before and after. return find_change_points.ChangePoint(x_value=x_value, median_before=median_before, median_after=median_after, window_start=1, window_end=8, size_before=None, size_after=None, relative_change=None, std_dev_before=None, t_statistic=None, degrees_of_freedom=None, p_value=None)
def run( # pylint: disable=invalid-name self, bench_name, description, simulation_results): """"Generates a comparison report between experimental and base results. Args: bench_name: A string bench name. description: A string description of this bench job. simulation_results: A list of pairs, each of which is a pair (TestBench id, change point results), i.e. the return value of SimulateAlertProcessingPipeline.run. But, the ChangePoint objects, which are named tuple objects, are automatically converted to lists because they're implicitly serialized as JSON. """ bench_id_to_change_points_as_lists = dict(simulation_results) results = { 'bench_name': bench_name, 'description': description, } total_invalid_alerts = 0 total_confirmed_alerts = 0 total_new_alerts = 0 total_alerts = 0 total_base_alerts = 0 total_base_invalid_alerts = 0 total_base_confirmed_alerts = 0 unconfirmed_alert_links = [] extra_alert_links = [] for bench in TestBench.query().fetch(): bench_id = bench.key.integer_id() if bench_id not in bench_id_to_change_points_as_lists: continue change_points_as_lists = bench_id_to_change_points_as_lists[ bench_id] invalid_anomaly_rev_set = _Flatten(bench.invalid_anomaly_revs) confirmed_anomaly_rev_set = _Flatten(bench.confirmed_anomaly_revs) base_anomaly_rev_set = _Flatten(bench.base_anomaly_revs) unconfirmed_alert_links.extend( _UnconfirmedAlertLinks(bench, change_points_as_lists)) extra_alert_links.extend( _ExtraAlertLinks(bench, change_points_as_lists)) for change_point_as_list in change_points_as_lists: change_point = find_change_points.ChangePoint( *change_point_as_list) end_rev = change_point.x_value if end_rev in invalid_anomaly_rev_set: total_invalid_alerts += 1 elif end_rev in confirmed_anomaly_rev_set: total_confirmed_alerts += 1 elif end_rev not in base_anomaly_rev_set: total_new_alerts += 1 total_alerts += len(change_points_as_lists) total_base_alerts += len(bench.base_anomaly_revs) total_base_invalid_alerts += len(bench.invalid_anomaly_revs) total_base_confirmed_alerts += len(bench.confirmed_anomaly_revs) results['invalid_alerts'] = ( '%s/%s' % (total_invalid_alerts, total_base_invalid_alerts)) results['confirmed_alerts'] = ( '%s/%s' % (total_confirmed_alerts, total_base_confirmed_alerts)) results['new_alerts'] = total_new_alerts results['total_alerts'] = '%s/%s' % (total_alerts, total_base_alerts) results['unconfirmed_alert_links'] = '\n'.join( unconfirmed_alert_links[:10]) results['extra_alert_links'] = '\n'.join(extra_alert_links[:10]) _AddReportToLog(results) logging.debug( 'Completed comparison report for bench_name: %s, ' 'description: %s. Results: %s', bench_name, description, results)