def test_zprob(self): """zprob should match twice the z_high probability for abs(z)""" probs = [2*i for i in [ 5.000000e-01, 4.960106e-01, 4.601722e-01, 3.085375e-01, 1.586553e-01, 2.275013e-02, 2.866516e-07, 7.619853e-24, 2.753624e-89, 4.906714e-198, 0.000000e+00, 0.000000e+00]] for z, p in zip(self.values, probs): self.assertFloatEqual(zprob(z), p) for z, p in zip(self.negvalues, probs): self.assertFloatEqual(zprob(z), p)
def z_tailed_prob(z, tails): """Returns appropriate p-value for given z, depending on tails.""" if tails == 'high': return z_high(z) elif tails == 'low': return z_low(z) else: return zprob(z)
def kendalls_tau(x, y, return_p=True): """returns kendall's tau Arguments: - return_p: returns the probability from the normal approximation when True, otherwise just returns tau""" ranked = as_paired_ranks(x, y) n = len(ranked) denom = n * (n - 1) / 2 con = 0 discor = 0 x_tied = 0 y_tied = 0 for i in range(n - 1): x_1 = ranked[i][0] y_1 = ranked[i][1] for j in range(i + 1, n): x_2 = ranked[j][0] y_2 = ranked[j][1] x_diff = x_1 - x_2 y_diff = y_1 - y_2 if x_diff * y_diff > 0: con += 1 elif x_diff and y_diff: discor += 1 else: if x_diff: y_tied += 1 if y_diff: x_tied += 1 diff = con - discor total = con + discor denom = ((total + y_tied) * (total + x_tied))**0.5 variance = (4 * n + 10) / (9 * n * (n - 1)) tau = diff / denom stat = tau if x_tied or y_tied: x_tied = array([v for v in Freqs(x).itervalues() if v > 1]) y_tied = array([v for v in Freqs(y).itervalues() if v > 1]) t0 = n * (n - 1) / 2 t1 = sum(x_tied * (x_tied - 1)) / 2 t2 = sum(y_tied * (y_tied - 1)) / 2 stat = tau * sqrt((t0 - t1) * (t0 - t2)) v0 = n * (n - 1) * (2 * n + 5) vt = sum(x_tied * (x_tied - 1) * (2 * x_tied + 5)) vu = sum(y_tied * (y_tied - 1) * (2 * y_tied + 5)) v1 = sum(x_tied * (x_tied - 1)) * sum(y_tied * (y_tied - 1)) v2 = sum(x_tied * (x_tied - 1) * (x_tied - 2)) * \ sum(y_tied * (y_tied - 1) * (y_tied - 2)) variance = (v0 - vt - vu) / 18 + v1 / (2 * n * (n - 1)) + v2 / (9 * n * \ (n - 1) * (n - 2)) if return_p: return tau, zprob(stat / variance**0.5) else: return tau
def kendalls_tau(x, y, return_p=True): """returns kendall's tau Arguments: - return_p: returns the probability from the normal approximation when True, otherwise just returns tau""" ranked = as_paired_ranks(x, y) n = len(ranked) denom = n * (n-1) / 2 con = 0 discor = 0 x_tied = 0 y_tied = 0 for i in range(n-1): x_1 = ranked[i][0] y_1 = ranked[i][1] for j in range(i+1, n): x_2 = ranked[j][0] y_2 = ranked[j][1] x_diff = x_1 - x_2 y_diff = y_1 - y_2 if x_diff * y_diff > 0: con += 1 elif x_diff and y_diff: discor += 1 else: if x_diff: y_tied += 1 if y_diff: x_tied += 1 diff = con - discor total = con + discor denom = ((total + y_tied) * (total + x_tied))**0.5 variance = (4*n+10) / (9*n*(n-1)) tau = diff / denom stat = tau if x_tied or y_tied: x_tied = array([v for v in Freqs(x).itervalues() if v > 1]) y_tied = array([v for v in Freqs(y).itervalues() if v > 1]) t0 = n*(n-1)/2 t1 = sum(x_tied * (x_tied-1)) / 2 t2 = sum(y_tied * (y_tied-1)) / 2 stat = tau * sqrt((t0-t1)*(t0-t2)) v0 = n * (n - 1) * (2 * n + 5) vt = sum(x_tied * (x_tied - 1) * (2 * x_tied + 5)) vu = sum(y_tied * (y_tied - 1) * (2 * y_tied + 5)) v1 = sum(x_tied * (x_tied - 1)) * sum(y_tied * (y_tied - 1)) v2 = sum(x_tied * (x_tied - 1) * (x_tied - 2)) * \ sum(y_tied * (y_tied - 1) * (y_tied - 2)) variance = (v0 - vt - vu) / 18 + v1 / (2 * n * (n - 1)) + v2 / (9 * n * \ (n - 1) * (n - 2)) if return_p: return tau, zprob(stat / variance**0.5) else: return tau
def mw_test(x, y): """computes the Mann-Whitney U statistic and the probability using the normal approximation""" if len(x) > len(y): x, y = y, x num_x = len(x) num_y = len(y) x = zip(x, zeros(len(x), int), zeros(len(x), int)) y = zip(y, ones(len(y), int), zeros(len(y), int)) combined = x+y combined = array(combined, dtype=[('stat', float), ('sample', int), ('rank', float)]) combined.sort(order='stat') prev = None start = None ties = False T = 0.0 for index in range(combined.shape[0]): value = combined['stat'][index] sample = combined['sample'][index] if value == prev and start is None: start = index continue if value != prev and start is not None: ties = True ave_rank = _average_rank(start, index) num_tied = index - start + 1 T += (num_tied**3 - num_tied) for i in range(start-1, index): combined['rank'][i] = ave_rank start = None combined['rank'][index] = index+1 prev = value if start is not None: ave_rank = _average_rank(start, index) num_tied = index - start + 2 T += (num_tied**3 - num_tied) for i in range(start-1, index+1): combined['rank'][i] = ave_rank total = combined.shape[0] x_ranks_sum = sum(combined['rank'][i] for i in range(total) if combined['sample'][i] == 0) prod = num_x * num_y U1 = prod + (num_x * (num_x+1) / 2) - x_ranks_sum U2 = prod - U1 U = max([U1, U2]) numerator = U - prod / 2 denominator = sqrt((prod / (total * (total-1)))*((total**3 - total - T)/12)) z = (numerator/denominator) p = zprob(z) return U, p