def MakeChangePoint(series, split_index): """Makes a ChangePoint object for the given series at the given point. Args: series: A list of (x, y) pairs. split_index: Index of the first point after the split. Returns: A ChangePoint object. """ assert 0 <= split_index < len(series) x_values, y_values = zip(*series) left, right = y_values[:split_index], y_values[split_index:] left_median, right_median = math_utils.Median(left), math_utils.Median( right) ttest_results = ttest.WelchsTTest(left, right) return ChangePoint( x_value=x_values[split_index], median_before=left_median, median_after=right_median, size_before=len(left), size_after=len(right), window_start=x_values[0], window_end=x_values[-1], # inclusive bound relative_change=math_utils.RelativeChange(left_median, right_median), std_dev_before=math_utils.StandardDeviation(left), t_statistic=ttest_results.t, degrees_of_freedom=ttest_results.df, p_value=ttest_results.p)
def _PassesThresholds(values, split_index, min_segment_size, min_absolute_change, min_relative_change, min_steppiness, multiple_of_std_dev): """Checks whether a point in a series appears to be an change point. Args: values: A list of numbers. split_index: An index in the list of numbers. min_segment_size: Threshold for size of segments before or after a point. min_absolute_change: Minimum absolute median change threshold. min_relative_change: Minimum relative median change threshold. min_steppiness: Threshold for how similar to a step a change point must be. multiple_of_std_dev: Threshold for change as multiple of std. deviation. Returns: A tuple of (bool, string) where the bool indicates whether the split index passes the thresholds and the string being the reason it did not. """ left, right = values[:split_index], values[split_index:] left_median, right_median = math_utils.Median(left), math_utils.Median( right) # 1. Segment size filter. if len(left) < min_segment_size or len(right) < min_segment_size: return (False, 'min_segment_size') # 2. Absolute change filter. absolute_change = abs(left_median - right_median) if absolute_change < min_absolute_change: return (False, 'min_absolute_change') # 3. Relative change filter. relative_change = math_utils.RelativeChange(left_median, right_median) if relative_change < min_relative_change: return (False, 'min_relative_change') # 4. Multiple of standard deviation filter. min_std_dev = min(math_utils.StandardDeviation(left), math_utils.StandardDeviation(right)) if absolute_change < multiple_of_std_dev * min_std_dev: return (False, 'min_std_dev') # 5. Steppiness filter. steppiness = find_step.Steppiness(values, split_index) if steppiness < min_steppiness: return (False, 'min_steppiness') # Passed all filters! return (True, 'passed')
def testGetSimilarHistoricalTimings_Same(self): now = datetime.datetime.now() self._RecordTiming( { 'configuration': 'linux', 'benchmark': 'foo', 'story': 'bar1' }, now - datetime.timedelta(minutes=1), now) median = math_utils.Median([i for i in range(0, 10)]) std_dev = math_utils.StandardDeviation([i for i in range(0, 10)]) p90 = math_utils.Percentile([i for i in range(0, 10)], 0.9) for i in range(0, 10): j = self._RecordTiming( { 'configuration': 'linux', 'benchmark': 'foo', 'story': 'bar2' }, now - datetime.timedelta(seconds=i), now) timings, tags = timing_record.GetSimilarHistoricalTimings(j) self.assertEqual(['try', 'linux', 'foo', 'bar2'], tags) self.assertClose(median, timings[0].total_seconds()) self.assertClose(std_dev, timings[1].total_seconds()) self.assertClose(p90, timings[2].total_seconds())
def testZeroMedian_ResultProperties(self): nums = [3.4, 8, 100.2, 78, 3, -4, 12, 3.14, 1024] zeroed_nums = find_change_points._ZeroMedian(nums) # The output of _ZeroMedian has the same standard deviation as the input. self.assertEqual(math_utils.StandardDeviation(nums), math_utils.StandardDeviation(zeroed_nums)) # Also, the median of the output is always zero. self.assertEqual(0, math_utils.Median(zeroed_nums))
def FindMagnitudeBetweenCommits(test_key, start_commit, end_commit): start_commit = _GitHashToCommitPosition(start_commit) end_commit = _GitHashToCommitPosition(end_commit) test = test_key.get() num_points = anomaly_config.GetAnomalyConfigDict(test).get( 'min_segment_size', find_change_points.MIN_SEGMENT_SIZE) start_rows = graph_data.GetRowsForTestBeforeAfterRev( test_key, start_commit, num_points, 0) end_rows = graph_data.GetRowsForTestBeforeAfterRev(test_key, end_commit, 0, num_points) if not start_rows or not end_rows: return None median_before = math_utils.Median([r.value for r in start_rows]) median_after = math_utils.Median([r.value for r in end_rows]) return median_after - median_before
def _Estimate(tags, completed_before=None): records = _QueryTimingRecords(tags, completed_before) if not records: if tags: return _Estimate(tags[:-1]) return None times = [(r.completed - r.started).total_seconds() for r in records] median = math_utils.Median(times) std_dev = math_utils.StandardDeviation(times) p90 = math_utils.Percentile(times, 0.9) timings = Timings(datetime.timedelta(seconds=median), datetime.timedelta(seconds=std_dev), datetime.timedelta(seconds=p90)) return EstimateResult(timings, tags)
def _ZeroMedian(values): """Subtracts the median value in the list from all values in the list.""" median = math_utils.Median(values) return [val - median for val in values]
def testMedian_EvenLengthList_UsesMeanOfMiddleTwoValues(self): self.assertEqual(10.0, math_utils.Median([1, 4, 16, 145]))
def testMedian_OddLengthList_UsesMiddleValue(self): self.assertEqual(4.0, math_utils.Median([1, 4, 16]))
def testMedian_OneValue(self): self.assertEqual(3.0, math_utils.Median([3]))
def testMedian_EmptyInput_ReturnsNan(self): self.assertTrue(math.isnan(math_utils.Median([])))