Beispiel #1
0
def correlation(x_items, y_items):
    """Returns Pearson correlation between x and y, and its significance."""
    sum_x = sum_y = sum_x_sq = sum_y_sq = sum_xy = n = 0
    for x, y in zip(x_items, y_items):
        n += 1
        sum_x += x
        sum_x_sq += x * x
        sum_y += y
        sum_y_sq += y * y
        sum_xy += x * y
    try:
        r = 1.0 * ((n * sum_xy) - (sum_x * sum_y)) / \
           (sqrt((n * sum_x_sq)-(sum_x*sum_x))*sqrt((n*sum_y_sq)-(sum_y*sum_y)))
    except (ZeroDivisionError, ValueError): #no variation
        r = 0.0
    #check we didn't get a naughty value for r due to rounding error
    if r > 1.0:
        r = 1.0
    elif r < -1.0:
        r = -1.0
    if n < 3:
        prob = 1
    else:
        try:
            t = r/sqrt((1 - (r*r))/(n-2))
            prob = tprob(t, n-2)
        except ZeroDivisionError: #r was presumably 1
            prob = 0
    return (r, prob)
Beispiel #2
0
def t_tailed_prob(t, df, tails):
    """Return appropriate p-value for given t and df, depending on tails."""
    if tails == 'high':
        return t_high(t, df)
    elif tails == 'low':
        return t_low(t, df)
    else:
        return tprob(t,df)
    def test_tprob(self):
        """tprob should match twice the t_high probability for abs(t)"""

        probs = {
        1:  [ 2*i for i in 
            [   0.500000000, 0.496817007, 0.468274483, 0.352416382,
                0.250000000, 0.147583618, 0.062832958, 0.031725517,
                0.015902251, 0.010606402, 0.006365349, 0.001591536,
            ]],
        10: [ 2*i for i in
            [   5.000000e-01, 4.961090e-01, 4.611604e-01, 3.139468e-01,
                1.704466e-01, 3.669402e-02, 2.686668e-04, 7.947766e-07,
                1.073031e-09, 1.980896e-11, 1.237155e-13, 1.200254e-19,
            ]],
        100:[ 2*i for i in
            [   5.000000e-01, 4.960206e-01, 4.602723e-01, 3.090868e-01,
                1.598621e-01, 2.410609e-02, 1.225087e-06, 4.950844e-17,
                4.997134e-37, 4.190166e-52, 7.236082e-73, 2.774197e-132,
            ]],
        }
        for df in self.df:
            for x, p in zip(self.values, probs[df]):
                self.assertFloatEqualRel(tprob(x, df), p, eps=1e-4)