Example #1
0
def Compare(values_a, values_b, attempt_count, mode, magnitude):
    """Decide whether two samples are the same, different, or unknown.

  Arguments:
    values_a: A list of sortable values. They don't need to be numeric.
    values_b: A list of sortable values. They don't need to be numeric.
    attempt_count: The average number of attempts made.
    mode: 'functional' or 'performance'. We use different significance
        thresholds for each type.
    magnitude: An estimate of the size of differences to look for. We need
        more values to find smaller differences. If mode is 'functional',
        this is the failure rate, a float between 0 and 1. If mode is
        'performance', this is a multiple of the interquartile range (IQR).

  Returns:
    DIFFERENT: The samples are unlikely to come from the same distribution,
        and are therefore likely different. Reject the null hypothesis.
    SAME: The samples are unlikely to come from distributions that differ by the
        given magnitude. Reject the alternative hypothesis.
    UNKNOWN: Not enough evidence to reject either hypothesis.
        We should collect more data before making a final decision.
  """
    if not (values_a and values_b):
        # A sample has no values in it.
        return UNKNOWN

    # MWU is bad at detecting changes in variance, and K-S is bad with discrete
    # distributions. So use both. We want low p-values for the below examples.
    #        a                     b               MWU(a, b)  KS(a, b)
    # [0]*20            [0]*15+[1]*5                0.0097     0.4973
    # range(10, 30)     range(10)+range(30, 40)     0.4946     0.0082
    p_value = min(kolmogorov_smirnov.KolmogorovSmirnov(values_a, values_b),
                  mann_whitney_u.MannWhitneyU(values_a, values_b))

    if p_value <= thresholds.LowThreshold():
        # The p-value is less than the significance level. Reject the null
        # hypothesis.
        return DIFFERENT

    if p_value <= thresholds.HighThreshold(mode, magnitude, attempt_count):
        # The p-value is not less than the significance level, but it's small
        # enough to be suspicious. We'd like to investigate more closely.
        return UNKNOWN

    # The p-value is quite large. We're not suspicious that the two samples might
    # come from different distributions, and we don't care to investigate more.
    return SAME
Example #2
0
 def testHighThresholdHighSampleSize(self):
     threshold = thresholds.HighThreshold('performance', 1.5, 50)
     self.assertLessEqual(threshold, thresholds.LowThreshold())
Example #3
0
 def testLowThreshold(self):
   self.assertEqual(thresholds.LowThreshold(), 0.01)
Example #4
0
 def testHighThresholdPerformance(self):
     threshold = thresholds.HighThreshold('performance', 1.5, 20)
     self.assertLessEqual(threshold, thresholds.LowThreshold())