Example #1
0
    def test_equal_scalar_data(self):
        # when two scalars are equal, there is an -0.5/0 in the asymptotic
        # approximation. R gives pvalue=1.0 for alternatives 'less' and
        # 'greater' but NA for 'two-sided'. I don't see why, so I don't
        # see a need for a special case to match that behavior.
        assert_equal(mannwhitneyu(1, 1, method="exact"), (0.5, 1))
        assert_equal(mannwhitneyu(1, 1, method="asymptotic"), (0.5, 1))

        # without continuity correction, this becomes 0/0, which really
        # is undefined
        assert_equal(
            mannwhitneyu(1, 1, method="asymptotic", use_continuity=False),
            (0.5, np.nan))
Example #2
0
 def test_gh_9184(self, use_continuity, alternative, method, pvalue_exp):
     # gh-9184 might be considered a doc-only bug. Please see the
     # documentation to confirm that mannwhitneyu correctly notes
     # that the output statistic is that of the first sample (x). In any
     # case, check the case provided there against output from R.
     # R code:
     # options(digits=16)
     # x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
     # y <- c(1.15, 0.88, 0.90, 0.74, 1.21)
     # wilcox.test(x, y, alternative = "less", exact = FALSE)
     # wilcox.test(x, y, alternative = "greater", exact = FALSE)
     # wilcox.test(x, y, alternative = "two.sided", exact = FALSE)
     # wilcox.test(x, y, alternative = "less", exact = FALSE,
     #             correct=FALSE)
     # wilcox.test(x, y, alternative = "greater", exact = FALSE,
     #             correct=FALSE)
     # wilcox.test(x, y, alternative = "two.sided", exact = FALSE,
     #             correct=FALSE)
     # wilcox.test(x, y, alternative = "less", exact = TRUE)
     # wilcox.test(x, y, alternative = "greater", exact = TRUE)
     # wilcox.test(x, y, alternative = "two.sided", exact = TRUE)
     statistic_exp = 35
     x = (0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
     y = (1.15, 0.88, 0.90, 0.74, 1.21)
     res = mannwhitneyu(x,
                        y,
                        use_continuity=use_continuity,
                        alternative=alternative,
                        method=method)
     assert_equal(res.statistic, statistic_exp)
     assert_allclose(res.pvalue, pvalue_exp)
Example #3
0
    def test_gh_11355(self):
        # Test for correct behavior with NaN/Inf in input
        x = [1, 2, 3, 4]
        y = [3, 6, 7, 8, 9, 3, 2, 1, 4, 4, 5]
        res1 = mannwhitneyu(x, y)

        # Inf is not a problem. This is a rank test, and it's the largest value
        y[4] = np.inf
        res2 = mannwhitneyu(x, y)

        assert_equal(res1.statistic, res2.statistic)
        assert_equal(res1.pvalue, res2.pvalue)

        # NaNs should raise an error. No nan_policy for now.
        y[4] = np.nan
        with assert_raises(ValueError, match="`x` and `y` must not contain"):
            mannwhitneyu(x, y)
Example #4
0
    def test_exact_U_equals_mean(self):
        # Test U == m*n/2 with exact method
        # Without special treatment, two-sided p-value > 1 because both
        # one-sided p-values are > 0.5
        res_l = mannwhitneyu([1, 2, 3], [1.5, 2.5],
                             alternative="less",
                             method="exact")
        res_g = mannwhitneyu([1, 2, 3], [1.5, 2.5],
                             alternative="greater",
                             method="exact")
        assert_equal(res_l.pvalue, res_g.pvalue)
        assert res_l.pvalue > 0.5

        res = mannwhitneyu([1, 2, 3], [1.5, 2.5],
                           alternative="two-sided",
                           method="exact")
        assert_equal(res, (3, 1))
Example #5
0
 def test_gh_2118(self, x, y, alternative, expected):
     # test cases in which U == m*n/2 when method is asymptotic
     # applying continuity correction could result in p-value > 1
     res = mannwhitneyu(x,
                        y,
                        use_continuity=True,
                        alternative=alternative,
                        method="asymptotic")
     assert_allclose(res, expected, rtol=1e-12)
Example #6
0
    def test_asymptotic_behavior(self):
        np.random.seed(0)

        # for small samples, the asymptotic test is not very accurate
        x = np.random.rand(5)
        y = np.random.rand(5)
        res1 = mannwhitneyu(x, y, method="exact")
        res2 = mannwhitneyu(x, y, method="asymptotic")
        assert res1.statistic == res2.statistic
        assert np.abs(res1.pvalue - res2.pvalue) > 1e-2

        # for large samples, they agree reasonably well
        x = np.random.rand(40)
        y = np.random.rand(40)
        res1 = mannwhitneyu(x, y, method="exact")
        res2 = mannwhitneyu(x, y, method="asymptotic")
        assert res1.statistic == res2.statistic
        assert np.abs(res1.pvalue - res2.pvalue) < 1e-3
Example #7
0
    def test_gh_12837_11113(self, method):
        # Test that behavior for broadcastable nd arrays is appropriate:
        # output shape is correct and all values are equal to when the test
        # is performed on one pair of samples at a time.
        # Tests that gh-12837 and gh-11113 (requests for n-d input)
        # are resolved
        np.random.seed(0)

        # arrays are broadcastable except for axis = -3
        axis = -3
        m, n = 7, 10  # sample sizes
        x = np.random.rand(m, 3, 8)
        y = np.random.rand(6, n, 1, 8) + 0.1
        res = mannwhitneyu(x, y, method=method, axis=axis)

        shape = (6, 3, 8)  # appropriate shape of outputs, given inputs
        assert (res.pvalue.shape == shape)
        assert (res.statistic.shape == shape)

        # move axis of test to end for simplicity
        x, y = np.moveaxis(x, axis, -1), np.moveaxis(y, axis, -1)

        x = x[None, ...]  # give x a zeroth dimension
        assert (x.ndim == y.ndim)

        x = np.broadcast_to(x, shape + (m, ))
        y = np.broadcast_to(y, shape + (n, ))
        assert (x.shape[:-1] == shape)
        assert (y.shape[:-1] == shape)

        # loop over pairs of samples
        statistics = np.zeros(shape)
        pvalues = np.zeros(shape)
        for indices in product(*[range(i) for i in shape]):
            xi = x[indices]
            yi = y[indices]
            temp = mannwhitneyu(xi, yi, method=method)
            statistics[indices] = temp.statistic
            pvalues[indices] = temp.pvalue

        np.testing.assert_equal(res.pvalue, pvalues)
        np.testing.assert_equal(res.statistic, statistics)
Example #8
0
 def test_continuity(self, kwds, expected):
     # When x and y are interchanged, less and greater p-values should
     # swap (compare to above). This wouldn't happen if the continuity
     # correction were applied in the wrong direction. Note that less and
     # greater p-values do not sum to 1 when continuity correction is on,
     # which is what we'd expect. Also check that results match R when
     # continuity correction is turned off.
     # Note that method='asymptotic' -> exact=FALSE
     # and use_continuity=False -> correct=FALSE, e.g.:
     # wilcox.test(x, y, alternative="t", exact=FALSE, correct=FALSE)
     res = mannwhitneyu(self.y, self.x, method='asymptotic', **kwds)
     assert_allclose(res, expected)
Example #9
0
 def test_tie_correct(self):
     # Test tie correction against R's wilcox.test
     # options(digits = 16)
     # x = c(1, 2, 3, 4)
     # y = c(1, 2, 3, 4, 5)
     # wilcox.test(x, y, exact=FALSE)
     x = [1, 2, 3, 4]
     y0 = np.array([1, 2, 3, 4, 5])
     dy = np.array([0, 1, 0, 1, 0]) * 0.01
     dy2 = np.array([0, 0, 1, 0, 0]) * 0.01
     y = [y0 - 0.01, y0 - dy, y0 - dy2, y0, y0 + dy2, y0 + dy, y0 + 0.01]
     res = mannwhitneyu(x, y, axis=-1, method="asymptotic")
     U_expected = [10, 9, 8.5, 8, 7.5, 7, 6]
     p_expected = [
         1, 0.9017048037317, 0.804080657472, 0.7086240584439,
         0.6197963884941, 0.5368784563079, 0.3912672792826
     ]
     assert_equal(res.statistic, U_expected)
     assert_allclose(res.pvalue, p_expected)
Example #10
0
 def test_gh_4067(self):
     # Test for correct behavior with all NaN input
     a = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
     b = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
     with assert_raises(ValueError, match="`x` and `y` must not contain"):
         mannwhitneyu(a, b)
Example #11
0
 def test_gh_6897(self):
     # Test for correct behavior with empty input
     with assert_raises(ValueError, match="`x` and `y` must be of nonzero"):
         mannwhitneyu([], [])
Example #12
0
 def test_gh_11355b(self, x, y, statistic, pvalue):
     # Test for correct behavior with NaN/Inf in input
     res = mannwhitneyu(x, y, method='asymptotic')
     assert_allclose(res.statistic, statistic, atol=1e-12)
     assert_allclose(res.pvalue, pvalue, atol=1e-12)
Example #13
0
 def test_scalar_data(self, kwds, result):
     # just making sure scalars work
     assert_allclose(mannwhitneyu(1, 2, **kwds), result)
Example #14
0
 def test_basic(self, kwds, expected):
     res = mannwhitneyu(self.x, self.y, **kwds)
     assert_allclose(res, expected)
Example #15
0
    def test_auto(self):
        # Test that default method ('auto') chooses intended method

        np.random.seed(1)
        n = 8  # threshold to switch from exact to asymptotic

        # both inputs are smaller than threshold; should use exact
        x = np.random.rand(n - 1)
        y = np.random.rand(n - 1)
        auto = mannwhitneyu(x, y)
        asymptotic = mannwhitneyu(x, y, method='asymptotic')
        exact = mannwhitneyu(x, y, method='exact')
        assert auto.pvalue == exact.pvalue
        assert auto.pvalue != asymptotic.pvalue

        # one input is smaller than threshold; should use exact
        x = np.random.rand(n - 1)
        y = np.random.rand(n + 1)
        auto = mannwhitneyu(x, y)
        asymptotic = mannwhitneyu(x, y, method='asymptotic')
        exact = mannwhitneyu(x, y, method='exact')
        assert auto.pvalue == exact.pvalue
        assert auto.pvalue != asymptotic.pvalue

        # other input is smaller than threshold; should use exact
        auto = mannwhitneyu(y, x)
        asymptotic = mannwhitneyu(x, y, method='asymptotic')
        exact = mannwhitneyu(x, y, method='exact')
        assert auto.pvalue == exact.pvalue
        assert auto.pvalue != asymptotic.pvalue

        # both inputs are larger than threshold; should use asymptotic
        x = np.random.rand(n + 1)
        y = np.random.rand(n + 1)
        auto = mannwhitneyu(x, y)
        asymptotic = mannwhitneyu(x, y, method='asymptotic')
        exact = mannwhitneyu(x, y, method='exact')
        assert auto.pvalue != exact.pvalue
        assert auto.pvalue == asymptotic.pvalue

        # both inputs are smaller than threshold, but there is a tie
        # should use asymptotic
        x = np.random.rand(n - 1)
        y = np.random.rand(n - 1)
        y[3] = x[3]
        auto = mannwhitneyu(x, y)
        asymptotic = mannwhitneyu(x, y, method='asymptotic')
        exact = mannwhitneyu(x, y, method='exact')
        assert auto.pvalue != exact.pvalue
        assert auto.pvalue == asymptotic.pvalue
Example #16
0
 def test_input_validation(self):
     x = np.array([1, 2])  # generic, valid inputs
     y = np.array([3, 4])
     with assert_raises(ValueError, match="`x` and `y` must be of nonzero"):
         mannwhitneyu([], y)
     with assert_raises(ValueError, match="`x` and `y` must be of nonzero"):
         mannwhitneyu(x, [])
     with assert_raises(ValueError, match="`x` and `y` must not contain"):
         mannwhitneyu([np.nan, 2], y)
     with assert_raises(ValueError, match="`use_continuity` must be one"):
         mannwhitneyu(x, y, use_continuity='ekki')
     with assert_raises(ValueError, match="`alternative` must be one of"):
         mannwhitneyu(x, y, alternative='ekki')
     with assert_raises(ValueError, match="`axis` must be an integer"):
         mannwhitneyu(x, y, axis=1.5)
     with assert_raises(ValueError, match="`method` must be one of"):
         mannwhitneyu(x, y, method='ekki')