def calc_spearman_t(r,n,tails='two-tailed',eps=1e-10): """Calculate the t value, and probability for Spearman correlation Equation from Wikipedia article on Spearman rank correlation: http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient TODO: verify with additional / published sources. TODO: want to find the correct handling for the case of perfect correlation """ #Right now I subtract an arbitrarily tiny #epsilon if r is exactly 1.0 to prevent divide-by-zero #errors. if r == 1.0: r = r-eps t = r*(((n-2)/(1.0-r**2))**0.5) if tails == 'two-tailed': prob = tprob(t,n-2) elif tails == 'high': prob = t_high(t,n-2) elif tails == 'low': prob = t_low(t,n-2) else: raise RuntimeError("Valid prob. methods are 'two-tailed','high',and 'low'") return prob
def calc_spearman_t(r, n, tails='two-tailed', eps=1e-10): """Calculate the t value, and probability for Spearman correlation Equation from Wikipedia article on Spearman rank correlation: http://en.wikipedia.org/wiki/Spearman's_rank_correlation_coefficient TODO: verify with additional / published sources. TODO: want to find the correct handling for the case of perfect correlation """ #Right now I subtract an arbitrarily tiny #epsilon if r is exactly 1.0 to prevent divide-by-zero #errors. if r == 1.0: r = r - eps t = r * (((n - 2) / (1.0 - r**2))**0.5) if tails == 'two-tailed': prob = tprob(t, n - 2) elif tails == 'high': prob = t_high(t, n - 2) elif tails == 'low': prob = t_low(t, n - 2) else: raise RuntimeError( "Valid prob. methods are 'two-tailed','high',and 'low'") return prob
def t_tailed_prob(t, df, tails): """Return appropriate p-value for given t and df, depending on tails.""" if tails == 'high': return t_high(t, df) elif tails == 'low': return t_low(t, df) else: return tprob(t,df)
def correlation(x_items, y_items): """Returns Pearson correlation between x and y, and its significance. WARNING: x_items and y_items must be same length! """ r = pearson(x_items, y_items) n = len(x_items) if n < 3: prob = 1 else: try: t = r/sqrt((1 - (r*r))/(n-2)) prob = tprob(t, n-2) except (ZeroDivisionError, FloatingPointError): #r was presumably 1 prob = 0 return (r, prob)
def test_tprob(self): """tprob should match twice the t_high probability for abs(t)""" probs = { 1: [ 2*i for i in [ 0.500000000, 0.496817007, 0.468274483, 0.352416382, 0.250000000, 0.147583618, 0.062832958, 0.031725517, 0.015902251, 0.010606402, 0.006365349, 0.001591536, ]], 10: [ 2*i for i in [ 5.000000e-01, 4.961090e-01, 4.611604e-01, 3.139468e-01, 1.704466e-01, 3.669402e-02, 2.686668e-04, 7.947766e-07, 1.073031e-09, 1.980896e-11, 1.237155e-13, 1.200254e-19, ]], 100:[ 2*i for i in [ 5.000000e-01, 4.960206e-01, 4.602723e-01, 3.090868e-01, 1.598621e-01, 2.410609e-02, 1.225087e-06, 4.950844e-17, 4.997134e-37, 4.190166e-52, 7.236082e-73, 2.774197e-132, ]], } for df in self.df: for x, p in zip(self.values, probs[df]): self.assertFloatEqualRel(tprob(x, df), p, eps=1e-4)