Beispiel #1
0
 def test_zprob(self):
     """zprob should match twice the z_high probability for abs(z)"""
     
     probs = [2*i for i in [
         5.000000e-01, 4.960106e-01, 4.601722e-01, 3.085375e-01,
         1.586553e-01, 2.275013e-02, 2.866516e-07, 7.619853e-24,
         2.753624e-89, 4.906714e-198, 0.000000e+00, 0.000000e+00]]
     
     for z, p in zip(self.values, probs):
         self.assertFloatEqual(zprob(z), p)
     for z, p in zip(self.negvalues, probs):
         self.assertFloatEqual(zprob(z), p)
Beispiel #2
0
def z_tailed_prob(z, tails):
    """Returns appropriate p-value for given z, depending on tails."""
    if tails == 'high':
        return z_high(z)
    elif tails == 'low':
        return z_low(z)
    else:
        return zprob(z)
Beispiel #3
0
def kendalls_tau(x, y, return_p=True):
    """returns kendall's tau
    
    Arguments:
        - return_p: returns the probability from the normal approximation when
          True, otherwise just returns tau"""
    ranked = as_paired_ranks(x, y)
    n = len(ranked)
    denom = n * (n - 1) / 2
    con = 0
    discor = 0
    x_tied = 0
    y_tied = 0
    for i in range(n - 1):
        x_1 = ranked[i][0]
        y_1 = ranked[i][1]
        for j in range(i + 1, n):
            x_2 = ranked[j][0]
            y_2 = ranked[j][1]
            x_diff = x_1 - x_2
            y_diff = y_1 - y_2
            if x_diff * y_diff > 0:
                con += 1
            elif x_diff and y_diff:
                discor += 1
            else:
                if x_diff:
                    y_tied += 1
                if y_diff:
                    x_tied += 1

    diff = con - discor
    total = con + discor
    denom = ((total + y_tied) * (total + x_tied))**0.5
    variance = (4 * n + 10) / (9 * n * (n - 1))
    tau = diff / denom
    stat = tau

    if x_tied or y_tied:
        x_tied = array([v for v in Freqs(x).itervalues() if v > 1])
        y_tied = array([v for v in Freqs(y).itervalues() if v > 1])
        t0 = n * (n - 1) / 2
        t1 = sum(x_tied * (x_tied - 1)) / 2
        t2 = sum(y_tied * (y_tied - 1)) / 2
        stat = tau * sqrt((t0 - t1) * (t0 - t2))
        v0 = n * (n - 1) * (2 * n + 5)
        vt = sum(x_tied * (x_tied - 1) * (2 * x_tied + 5))
        vu = sum(y_tied * (y_tied - 1) * (2 * y_tied + 5))
        v1 = sum(x_tied * (x_tied - 1)) * sum(y_tied * (y_tied - 1))
        v2 = sum(x_tied * (x_tied - 1) * (x_tied - 2)) * \
               sum(y_tied * (y_tied - 1) * (y_tied - 2))
        variance = (v0 - vt - vu) / 18 + v1 / (2 * n * (n - 1)) + v2 / (9 * n * \
                                                        (n - 1) * (n - 2))
    if return_p:
        return tau, zprob(stat / variance**0.5)
    else:
        return tau
Beispiel #4
0
def kendalls_tau(x, y, return_p=True):
    """returns kendall's tau
    
    Arguments:
        - return_p: returns the probability from the normal approximation when
          True, otherwise just returns tau"""
    ranked = as_paired_ranks(x, y)
    n = len(ranked)
    denom = n * (n-1) / 2
    con = 0
    discor = 0
    x_tied = 0
    y_tied = 0
    for i in range(n-1):
        x_1 = ranked[i][0]
        y_1 = ranked[i][1]
        for j in range(i+1, n):
            x_2 = ranked[j][0]
            y_2 = ranked[j][1]
            x_diff = x_1 - x_2
            y_diff = y_1 - y_2
            if x_diff * y_diff > 0:
                con += 1
            elif x_diff and y_diff:
                discor += 1
            else:
                if x_diff:
                    y_tied += 1
                if y_diff:
                    x_tied += 1
    
    diff = con - discor
    total = con + discor
    denom = ((total + y_tied) * (total + x_tied))**0.5
    variance = (4*n+10) / (9*n*(n-1))
    tau = diff / denom
    stat = tau
    
    if x_tied or y_tied:
        x_tied = array([v for v in Freqs(x).itervalues() if v > 1])
        y_tied = array([v for v in Freqs(y).itervalues() if v > 1])
        t0 = n*(n-1)/2
        t1 = sum(x_tied * (x_tied-1)) / 2
        t2 = sum(y_tied * (y_tied-1)) / 2
        stat = tau * sqrt((t0-t1)*(t0-t2))
        v0 = n * (n - 1) * (2 * n + 5)
        vt = sum(x_tied * (x_tied - 1) * (2 * x_tied + 5))
        vu = sum(y_tied * (y_tied - 1) * (2 * y_tied + 5))
        v1 = sum(x_tied * (x_tied - 1)) * sum(y_tied * (y_tied - 1))
        v2 = sum(x_tied * (x_tied - 1) * (x_tied - 2)) * \
               sum(y_tied * (y_tied - 1) * (y_tied - 2))
        variance = (v0 - vt - vu) / 18 + v1 / (2 * n * (n - 1)) + v2 / (9 * n * \
                                                        (n - 1) * (n - 2))
    if return_p:
        return tau, zprob(stat / variance**0.5)
    else:
        return tau
Beispiel #5
0
def mw_test(x, y):
    """computes the Mann-Whitney U statistic and the probability using the
    normal approximation"""
    if len(x) > len(y):
        x, y = y, x
    
    num_x = len(x)
    num_y = len(y)
    
    x = zip(x, zeros(len(x), int), zeros(len(x), int))
    y = zip(y, ones(len(y), int), zeros(len(y), int))
    combined = x+y
    combined = array(combined, dtype=[('stat', float), ('sample', int),
                                      ('rank', float)])
    combined.sort(order='stat')
    prev = None
    start = None
    ties = False
    T = 0.0
    for index in range(combined.shape[0]):
        value = combined['stat'][index]
        sample = combined['sample'][index]
        if value == prev and start is None:
            start = index
            continue
        
        if value != prev and start is not None:
            ties = True
            ave_rank = _average_rank(start, index)
            num_tied = index - start + 1
            T += (num_tied**3 - num_tied)
            for i in range(start-1, index):
                combined['rank'][i] = ave_rank
            start = None
        combined['rank'][index] = index+1
        prev = value
    
    if start is not None:
        ave_rank = _average_rank(start, index)
        num_tied = index - start + 2
        T += (num_tied**3 - num_tied)
        for i in range(start-1, index+1):
            combined['rank'][i] = ave_rank
    
    total = combined.shape[0]
    x_ranks_sum = sum(combined['rank'][i] for i in range(total) if combined['sample'][i] == 0)
    prod = num_x * num_y
    U1 = prod + (num_x * (num_x+1) / 2) - x_ranks_sum
    U2 = prod - U1
    U = max([U1, U2])
    numerator = U - prod / 2
    denominator = sqrt((prod / (total * (total-1)))*((total**3 - total - T)/12))
    z = (numerator/denominator)
    p = zprob(z)
    return U, p