def MakeChangePoint(series, split_index):
    """Makes a ChangePoint object for the given series at the given point.

  Args:
    series: A list of (x, y) pairs.
    split_index: Index of the first point after the split.

  Returns:
    A ChangePoint object.
  """
    assert 0 <= split_index < len(series)
    x_values, y_values = zip(*series)
    left, right = y_values[:split_index], y_values[split_index:]
    left_median, right_median = math_utils.Median(left), math_utils.Median(
        right)
    ttest_results = ttest.WelchsTTest(left, right)
    return ChangePoint(
        x_value=x_values[split_index],
        median_before=left_median,
        median_after=right_median,
        size_before=len(left),
        size_after=len(right),
        window_start=x_values[0],
        window_end=x_values[-1],  # inclusive bound
        relative_change=math_utils.RelativeChange(left_median, right_median),
        std_dev_before=math_utils.StandardDeviation(left),
        t_statistic=ttest_results.t,
        degrees_of_freedom=ttest_results.df,
        p_value=ttest_results.p)
def _PassesThresholds(values, split_index, min_segment_size,
                      min_absolute_change, min_relative_change, min_steppiness,
                      multiple_of_std_dev):
    """Checks whether a point in a series appears to be an change point.

  Args:
    values: A list of numbers.
    split_index: An index in the list of numbers.
    min_segment_size: Threshold for size of segments before or after a point.
    min_absolute_change: Minimum absolute median change threshold.
    min_relative_change: Minimum relative median change threshold.
    min_steppiness: Threshold for how similar to a step a change point must be.
    multiple_of_std_dev: Threshold for change as multiple of std. deviation.

  Returns:
    A tuple of (bool, string) where the bool indicates whether the split index
    passes the thresholds and the string being the reason it did not.
  """
    left, right = values[:split_index], values[split_index:]
    left_median, right_median = math_utils.Median(left), math_utils.Median(
        right)

    # 1. Segment size filter.
    if len(left) < min_segment_size or len(right) < min_segment_size:
        return (False, 'min_segment_size')

    # 2. Absolute change filter.
    absolute_change = abs(left_median - right_median)
    if absolute_change < min_absolute_change:
        return (False, 'min_absolute_change')

    # 3. Relative change filter.
    relative_change = math_utils.RelativeChange(left_median, right_median)
    if relative_change < min_relative_change:
        return (False, 'min_relative_change')

    # 4. Multiple of standard deviation filter.
    min_std_dev = min(math_utils.StandardDeviation(left),
                      math_utils.StandardDeviation(right))
    if absolute_change < multiple_of_std_dev * min_std_dev:
        return (False, 'min_std_dev')

    # 5. Steppiness filter.
    steppiness = find_step.Steppiness(values, split_index)
    if steppiness < min_steppiness:
        return (False, 'min_steppiness')

    # Passed all filters!
    return (True, 'passed')
Beispiel #3
0
  def testGetSimilarHistoricalTimings_Same(self):
    now = datetime.datetime.now()
    self._RecordTiming(
        {
            'configuration': 'linux',
            'benchmark': 'foo',
            'story': 'bar1'
        }, now - datetime.timedelta(minutes=1), now)

    median = math_utils.Median([i for i in range(0, 10)])
    std_dev = math_utils.StandardDeviation([i for i in range(0, 10)])
    p90 = math_utils.Percentile([i for i in range(0, 10)], 0.9)
    for i in range(0, 10):
      j = self._RecordTiming(
          {
              'configuration': 'linux',
              'benchmark': 'foo',
              'story': 'bar2'
          }, now - datetime.timedelta(seconds=i), now)

    timings, tags = timing_record.GetSimilarHistoricalTimings(j)

    self.assertEqual(['try', 'linux', 'foo', 'bar2'], tags)
    self.assertClose(median, timings[0].total_seconds())
    self.assertClose(std_dev, timings[1].total_seconds())
    self.assertClose(p90, timings[2].total_seconds())
Beispiel #4
0
 def testZeroMedian_ResultProperties(self):
     nums = [3.4, 8, 100.2, 78, 3, -4, 12, 3.14, 1024]
     zeroed_nums = find_change_points._ZeroMedian(nums)
     # The output of _ZeroMedian has the same standard deviation as the input.
     self.assertEqual(math_utils.StandardDeviation(nums),
                      math_utils.StandardDeviation(zeroed_nums))
     # Also, the median of the output is always zero.
     self.assertEqual(0, math_utils.Median(zeroed_nums))
Beispiel #5
0
def FindMagnitudeBetweenCommits(test_key, start_commit, end_commit):
    start_commit = _GitHashToCommitPosition(start_commit)
    end_commit = _GitHashToCommitPosition(end_commit)

    test = test_key.get()
    num_points = anomaly_config.GetAnomalyConfigDict(test).get(
        'min_segment_size', find_change_points.MIN_SEGMENT_SIZE)
    start_rows = graph_data.GetRowsForTestBeforeAfterRev(
        test_key, start_commit, num_points, 0)
    end_rows = graph_data.GetRowsForTestBeforeAfterRev(test_key, end_commit, 0,
                                                       num_points)

    if not start_rows or not end_rows:
        return None

    median_before = math_utils.Median([r.value for r in start_rows])
    median_after = math_utils.Median([r.value for r in end_rows])

    return median_after - median_before
Beispiel #6
0
def _Estimate(tags, completed_before=None):
    records = _QueryTimingRecords(tags, completed_before)

    if not records:
        if tags:
            return _Estimate(tags[:-1])
        return None

    times = [(r.completed - r.started).total_seconds() for r in records]

    median = math_utils.Median(times)
    std_dev = math_utils.StandardDeviation(times)
    p90 = math_utils.Percentile(times, 0.9)
    timings = Timings(datetime.timedelta(seconds=median),
                      datetime.timedelta(seconds=std_dev),
                      datetime.timedelta(seconds=p90))

    return EstimateResult(timings, tags)
def _ZeroMedian(values):
    """Subtracts the median value in the list from all values in the list."""
    median = math_utils.Median(values)
    return [val - median for val in values]
 def testMedian_EvenLengthList_UsesMeanOfMiddleTwoValues(self):
     self.assertEqual(10.0, math_utils.Median([1, 4, 16, 145]))
 def testMedian_OddLengthList_UsesMiddleValue(self):
     self.assertEqual(4.0, math_utils.Median([1, 4, 16]))
Beispiel #10
0
 def testMedian_OneValue(self):
     self.assertEqual(3.0, math_utils.Median([3]))
Beispiel #11
0
 def testMedian_EmptyInput_ReturnsNan(self):
     self.assertTrue(math.isnan(math_utils.Median([])))