def mannwhitney_u(x, y): """ Return the Mann-Whitney U statistic on the provided scores. Copied from scipy.stats.mannwhitneyu except that we only return the U such that large U means that population x was systematically larger than population y, rather than the smaller U between x and y. The two possible U values one can report are related by U' = n1*n2 - U. """ x = np.asarray(x) y = np.asarray(y) if x.ndim != 1 or y.ndim != 1: raise ValueError, "populations must be rank 1 collections" n1 = len(x) n2 = len(y) ranked = rankdata(np.concatenate((x,y))) rankx = ranked[0:n1] # get the x-ranks u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - rankx.sum() # calc U for x return n1 * n2 - u1 # return U for y
def test_identical_discrete(self): for _ in xrange(1000): data = np.random.randint(0, 1000, size=1000).astype(float) self.assertTrue((rankdata(data) == scipy_rankdata(data)).all())
def test_identical_continuous(self): for _ in xrange(1000): data = np.random.uniform(0, 1000, size=1000) self.assertTrue((rankdata(data) == scipy_rankdata(data)).all())