def ttest_ind(a: List(float), b: List(float)) -> (float, float): """ Calculates the t-obtained T-test on TWO INDEPENDENT samples of scores a, and b. From Numerical Recipies, p.483. If printit=1, results are printed to the screen. If printit='filename', the results are output to 'filename' using the given writemode (default=append). Returns t-value, and prob. Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a') Returns: t-value, two-tailed prob """ printit = 0 name1 = 'Samp1' name2 = 'Samp2' writemode = 'a' #bg: optional args x1 = central_tendency.mean(a) x2 = central_tendency.mean(b) v1 = variability.stdev(a)**2 v2 = variability.stdev(b)**2 n1 = len(a) n2 = len(b) df = n1 + n2 - 2 svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / float(df) t = (x1 - x2) / sqrt(svar * (1.0 / n1 + 1.0 / n2)) prob = probability.betai(0.5 * df, 0.5, df / (df + t * t)) if printit != 0: statname = 'Independent samples T-test.' outputpairedstats(printit, writemode, name1, n1, x1, v1, min(a), max(a), name2, n2, x2, v2, min(b), max(b), statname, t, prob) return t, prob
def ttest_1samp(a: List(float), popmean: int) -> (float, float): """ Calculates the t-obtained for the independent samples T-test on ONE group of scores a, given a population mean. If printit=1, results are printed to the screen. If printit='filename', the results are output to 'filename' using the given writemode (default=append). Returns t-value, and prob. Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a') Returns: t-value, two-tailed prob """ printit = 0 #bg: optional arg name = 'Sample' #bg: optional arg writemode = 'a' #bg: optional arg x = central_tendency.mean(a) v = variability.var(a) n = len(a) df = n - 1 svar = ((n - 1) * v) / float(df) t = (x - popmean) / sqrt(svar * (1.0 / n)) prob = probability.betai(0.5 * df, 0.5, float(df) / (df + t * t)) if printit != 0: statname = 'Single-sample T-test.' outputpairedstats(printit, writemode, 'Population', '--', popmean, 0, 0, 0, name, n, x, v, min(a), max(a), statname, t, prob) return t, prob
def pointbiserialr(x:List(float),y:List(float))->(float,float): """ Calculates a point-biserial correlation coefficient and the associated probability value. Taken from Heiman's Basic Statistics for the Behav. Sci (1st), p.194. Usage: lpointbiserialr(x,y) where x,y are equal-length lists Returns: Point-biserial r, two-tailed p-value """ TINY = 1e-30 if len(x) != len(y): raise ValueError('INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.') data = pstat.abut(x,y) categories = pstat.unique(x) if len(categories) != 2: raise ValueError("Exactly 2 categories required for pointbiserialr().") else: # there are 2 categories, continue codemap = pstat.abut(categories,list(range(2))) recoded = pstat.recode(data,codemap,0) _x = pstat.linexand(data,0,categories[0]) _y = pstat.linexand(data,0,categories[1]) xmean = central_tendency.mean(pstat.colex(_x,1)) ymean = central_tendency.mean(pstat.colex(_y,1)) n = len(data) adjust = sqrt((len(_x)/float(n))*(len(_y)/float(n))) rpb = (ymean - xmean)/variability.samplestdev(pstat.colex(data,1))*adjust df = n-2 t = rpb*sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY))) prob = probability.betai(0.5*df,0.5,df/(df+t*t)) # t already a float return rpb, prob
def linregress(x:List(float),y:List(float))->(float,float,float,float,float): """ Calculates a regression line on x,y pairs. Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate """ TINY = 1.0e-20 if len(x) != len(y): raise ValueError('Input values not paired in linregress. Aborting.') n = len(x) x = list(map(float,x)) y = list(map(float,y)) xmean = central_tendency.mean(x) ymean = central_tendency.mean(y) r_num = float(n*(support.summult(x,y)) - sum(x)*sum(y)) r_den = sqrt((n*support.ss(x) - support.square_of_sums(x))*(n*support.ss(y)-support.square_of_sums(y))) r = r_num / r_den z = 0.5*log((1.0+r+TINY)/(1.0-r+TINY)) df = n-2 t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) prob = probability.betai(0.5*df,0.5,df/(df+t*t)) slope = r_num / float(n*support.ss(x) - support.square_of_sums(x)) intercept = ymean - slope*xmean sterrest = sqrt(1-r*r)*variability.samplestdev(y) return slope, intercept, r, prob, sterrest
def spearmanr(x:List(float),y:List(float))->(float,float): """ Calculates a Spearman rank-order correlation coefficient. Taken from Heiman's Basic Statistics for the Behav. Sci (1st), p.192. Usage: lspearmanr(x,y) where x and y are equal-length lists Returns: Spearman's r, two-tailed p-value """ TINY = 1e-30 if len(x) != len(y): raise ValueError('Input values not paired in spearmanr. Aborting.') n = len(x) rankx = rankdata(x) ranky = rankdata(y) dsq = sumdiffsquared(rankx,ranky) rs = 1 - 6*dsq / float(n*(n**2-1)) t = rs * sqrt((n-2) / ((rs+1.0)*(1.0-rs))) df = n-2 probrs = probability.betai(0.5*df,0.5,df/(df+t*t)) # t already a float # probability values for rs are from part 2 of the spearman function in # Numerical Recipies, p.510. They are close to tables, but not exact. (?) return rs, probrs
def ttest_rel(a: List(float), b: List(float)) -> (float, float): """ Calculates the t-obtained T-test on TWO RELATED samples of scores, a and b. From Numerical Recipies, p.483. If printit=1, results are printed to the screen. If printit='filename', the results are output to 'filename' using the given writemode (default=append). Returns t-value, and prob. Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a') Returns: t-value, two-tailed prob """ printit = 0 name1 = 'Sample1' name2 = 'Sample2' writemode = 'a' #bg: optional arg if len(a) != len(b): raise ValueError('Unequal length lists in ttest_rel.') x1 = central_tendency.mean(a) x2 = central_tendency.mean(b) v1 = variability.var(a) v2 = variability.var(b) n = len(a) cov = 0 for i in range(len(a)): cov = cov + (a[i] - x1) * (b[i] - x2) df = n - 1 _cov = cov / float(df) sd = sqrt((v1 + v2 - 2.0 * _cov) / float(n)) t = (x1 - x2) / sd prob = probability.betai(0.5 * df, 0.5, df / (df + t * t)) if printit != 0: statname = 'Related samples T-test.' outputpairedstats(printit, writemode, name1, n, x1, v1, min(a), max(a), name2, n, x2, v2, min(b), max(b), statname, t, prob) return t, prob
def pearsonr(x:List(float),y:List(float))->(float,float): """ Calculates a Pearson correlation coefficient and the associated probability value. Taken from Heiman's Basic Statistics for the Behav. Sci (2nd), p.195. Usage: lpearsonr(x,y) where x and y are equal-length lists Returns: Pearson's r value, two-tailed p-value """ TINY = 1.0e-30 if len(x) != len(y): raise ValueError('Input values not paired in pearsonr. Aborting.',x,y) n = len(x) x = list(map(float,x)) y = list(map(float,y)) xmean = central_tendency.mean(x) ymean = central_tendency.mean(y) r_num = n*(support.summult(x,y)) - sum(x)*sum(y) r_den = sqrt((n*support.ss(x) - support.square_of_sums(x))*(n*support.ss(y)-support.square_of_sums(y))) r = (r_num / r_den) # denominator already a float df = n-2 t = r*sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) prob = probability.betai(0.5*df,0.5,df/float(df+t*t)) return r, prob