def correlation(x_items, y_items): """Returns Pearson correlation between x and y, and its significance.""" sum_x = sum_y = sum_x_sq = sum_y_sq = sum_xy = n = 0 for x, y in zip(x_items, y_items): n += 1 sum_x += x sum_x_sq += x * x sum_y += y sum_y_sq += y * y sum_xy += x * y try: r = 1.0 * ((n * sum_xy) - (sum_x * sum_y)) / \ (sqrt((n * sum_x_sq)-(sum_x*sum_x))*sqrt((n*sum_y_sq)-(sum_y*sum_y))) except (ZeroDivisionError, ValueError): #no variation r = 0.0 #check we didn't get a naughty value for r due to rounding error if r > 1.0: r = 1.0 elif r < -1.0: r = -1.0 if n < 3: prob = 1 else: try: t = r/sqrt((1 - (r*r))/(n-2)) prob = tprob(t, n-2) except ZeroDivisionError: #r was presumably 1 prob = 0 return (r, prob)
def t_tailed_prob(t, df, tails): """Return appropriate p-value for given t and df, depending on tails.""" if tails == 'high': return t_high(t, df) elif tails == 'low': return t_low(t, df) else: return tprob(t,df)
def test_tprob(self): """tprob should match twice the t_high probability for abs(t)""" probs = { 1: [ 2*i for i in [ 0.500000000, 0.496817007, 0.468274483, 0.352416382, 0.250000000, 0.147583618, 0.062832958, 0.031725517, 0.015902251, 0.010606402, 0.006365349, 0.001591536, ]], 10: [ 2*i for i in [ 5.000000e-01, 4.961090e-01, 4.611604e-01, 3.139468e-01, 1.704466e-01, 3.669402e-02, 2.686668e-04, 7.947766e-07, 1.073031e-09, 1.980896e-11, 1.237155e-13, 1.200254e-19, ]], 100:[ 2*i for i in [ 5.000000e-01, 4.960206e-01, 4.602723e-01, 3.090868e-01, 1.598621e-01, 2.410609e-02, 1.225087e-06, 4.950844e-17, 4.997134e-37, 4.190166e-52, 7.236082e-73, 2.774197e-132, ]], } for df in self.df: for x, p in zip(self.values, probs[df]): self.assertFloatEqualRel(tprob(x, df), p, eps=1e-4)