def testFindChangePoints(self): # Simple test that the output is as expected for a clear change. # Tests for specific aspects of the algorithm are below. data = [1, 1, 2, 1, 1, 8, 8, 8, 9, 8, 9] series = list(enumerate(data)) expected = [ find_change_points.ChangePoint( x_value=5, median_before=1, median_after=8, window_start=1, window_end=10, size_before=4, size_after=6, relative_change=7, std_dev_before=0.4330127018922193, t_statistic=-24.452628375754593, degrees_of_freedom=6.9938793160801023, p_value=0.001) ] actual = find_change_points.FindChangePoints(series, max_window_size=10, multiple_of_std_dev=3, min_relative_change=0.5, min_absolute_change=1, min_steppiness=0.4, min_segment_size=3) self.assertEqual(expected, actual)
def SimulateAlertProcessing(chart_series, **config_dict): """Finds the same alerts as would be found normally as points are added. Each time a new point is added to a data series on dashboard, the FindChangePoints function is called with some points from that series. In order to simulate this here, we need to repeatedly call FindChangePoints. Args: chart_series: A list of (x, y) pairs. **config_dict: An alert threshold config dict. Returns: A list of find_change_points.ChangePoint objects, one for each alert found. """ all_change_points = [] highest_x = None # This is used to avoid finding duplicate alerts. # The number of points that are passed in to FindChangePoints normally may # depend on either the specific "max_window_size" value or another default # used in find_anomalies. window = config_dict.get('max_window_size', find_anomalies.DEFAULT_NUM_POINTS) for end in range(1, len(chart_series)): start = max(0, end - window) series = chart_series[start:end] change_points = find_change_points.FindChangePoints(series, **config_dict) change_points = [c for c in change_points if c.x_value > highest_x] if change_points: highest_x = max(c.x_value for c in change_points) all_change_points.extend(change_points) return all_change_points
def FindChangePointsForTest(rows, config_dict): """Gets the anomaly data from the anomaly detection module. Args: rows: The Row entities to find anomalies for, sorted backwards by revision. config_dict: Anomaly threshold parameters as a dictionary. Returns: A list of find_change_points.ChangePoint objects. """ data_series = [(revision, value) for (revision, _, value) in rows] return find_change_points.FindChangePoints(data_series, **config_dict)
def _AssertFindsChangePoints( self, y_values, expected_indexes, max_window_size=50, min_segment_size=6, min_absolute_change=0, min_relative_change=0.01, min_steppiness=0.4, multiple_of_std_dev=2.5): """Asserts that change points are found at particular indexes.""" series = list(enumerate(y_values)) results = find_change_points.FindChangePoints( series, max_window_size=max_window_size, min_segment_size=min_segment_size, min_absolute_change=min_absolute_change, min_relative_change=min_relative_change, min_steppiness=min_steppiness, multiple_of_std_dev=multiple_of_std_dev) actual_indexes = [a.x_value for a in results] self.assertEqual(expected_indexes, actual_indexes)
def testFindChangePoints_EmptySeries(self): # For an empty series, there are certainly no change points. self.assertEqual([], find_change_points.FindChangePoints([]))