def testBasic(self): self.assertAlmostEqual( mann_whitney_u.MannWhitneyU(range(10), range(20, 30)), 0.00018267179110955002) self.assertAlmostEqual( mann_whitney_u.MannWhitneyU(range(5), range(10)), 0.13986357686781267)
def _CompareValues(values_a, values_b): if not (values_a and values_b): return _UNKNOWN try: p_value = mann_whitney_u.MannWhitneyU(values_a, values_b) except ValueError: return _UNKNOWN if p_value < _SIGNIFICANCE_LEVEL: return _DIFFERENT else: return _UNKNOWN
def _CompareValues(values_a, values_b): """Decide whether two samples are the same, different, or unknown. Arguments: values_a: A list of sortable values. They don't need to be numeric. values_b: A list of sortable values. They don't need to be numeric. Returns: _DIFFERENT: The samples likely come from different distributions. Reject the null hypothesis. _SAME: Not enough evidence to say that the samples come from different distributions. Fail to reject the null hypothesis. _UNKNOWN: Not enough evidence to say that the samples come from different distributions, but it looks a little suspicious, and we would like more data before making a final decision. """ if not (values_a and values_b): # A sample has no values in it. return _UNKNOWN # MWU is bad at detecting changes in variance, and K-S is bad with discrete # distributions. So use both. We want low p-values for the below examples. # a b MWU(a, b) KS(a, b) # [0]*20 [0]*15+[1]*5 0.0097 0.4973 # range(10, 30) range(10)+range(30, 40) 0.4946 0.0082 p_value = min( kolmogorov_smirnov.KolmogorovSmirnov(values_a, values_b), mann_whitney_u.MannWhitneyU(values_a, values_b)) if p_value < _SIGNIFICANCE_LEVEL: # The p-value is less than the significance level. Reject the null # hypothesis. return _DIFFERENT index = min(len(values_a), len(values_b)) / 10 index = min(index, len(_QUESTIONABLE_SIGNIFICANCE_LEVELS) - 1) questionable_significance_level = _QUESTIONABLE_SIGNIFICANCE_LEVELS[index] if p_value < questionable_significance_level: # The p-value is not less than the significance level, but it's small enough # to be suspicious. We'd like to investigate more closely. return _UNKNOWN # The p-value is quite large. We're not suspicious that the two samples might # come from different distributions, and we don't care to investigate more. return _SAME
def _CompareValues(values_a, values_b): """Decide whether two samples are the same, different, or unknown. Arguments: values_a: A list of sortable values. They don't need to be numeric. values_b: A list of sortable values. They don't need to be numeric. Returns: _DIFFERENT: The samples likely come from different distributions. Reject the null hypothesis. _SAME: Not enough evidence to say that the samples come from different distributions. Fail to reject the null hypothesis. _UNKNOWN: Not enough evidence to say that the samples come from different distributions, but it looks a little suspicious, and we would like more data before making a final decision. """ if not (values_a and values_b): # A sample has no values in it. return _UNKNOWN if (len(values_a) < _MINIMUM_VALUE_COUNT or len(values_b) < _MINIMUM_VALUE_COUNT): # There are few enough values that the significance test would never reject # the null hypothesis. We'd like more information. This can happen if a lot # of the test runs fail, so we don't have a lot of performance numbers to # work with. return _UNKNOWN p_value = mann_whitney_u.MannWhitneyU(values_a, values_b) if p_value < _SIGNIFICANCE_LEVEL: # The p-value is less than the significance level. Reject the null # hypothesis. return _DIFFERENT if p_value < _QUESTIONABLE_SIGNIFICANCE_LEVEL: # The p-value is not less than the significance level, but it's small enough # to be suspicious. We'd like to investigate more closely. return _UNKNOWN # The p-value is quite large. We're not suspicious that the two samples might # come from different distributions, and we don't care to investigate more. return _SAME
def testAllValuesIdentical(self): with self.assertRaises(ValueError): mann_whitney_u.MannWhitneyU([0] * 5, [0] * 5)
def testSmallSamples(self): self.assertEqual(mann_whitney_u.MannWhitneyU([0], [1]), 1.0)
def testDuplicateValues(self): self.assertAlmostEqual(mann_whitney_u.MannWhitneyU([0] * 5, [1] * 5), 0.0039767517097886512)
def testAllValuesIdentical(self): self.assertEqual(mann_whitney_u.MannWhitneyU([0] * 5, [0] * 5), 1.0)