def welch_t(n1, n2): """ Welch unequal variance two-sample t test, as proposed in: B.L. Welch. 1947., The generalization of "Student"'s problem when several different population variances are involved. Biometrika 34(1-2): 28-35. # "Student"'s famous sleep data >>> group1 = [0.7, -1.6, -0.2, -1.2, -0.1, 3.4, 3.7, 0.8, 0.0, 2.0] >>> group2 = [1.9, 0.8, 1.1, 0.1, -0.1, 4.4, 5.5, 1.6, 4.6, 3.4] >>> t_results = welch_t(group1, group2) >>> round(t_results['df.t'], 4) 17.7765 >>> round(t_results['t'], 4) -1.8608 >>> round(t_results['p.t'], 4) 0.0794 """ retval = {} sigma1 = sample_variance(n1) sigma2 = sample_variance(n2) scaled = sigma1 / len(n1) + sigma2 / len(n2) t = (mean(n1) - mean(n2)) / sqrt(scaled) df = (scaled ** 2) / ((sigma1 ** 2 / ((len(n1) - 1) * len(n1) ** 2)) + (sigma2 ** 2 / ((len(n2) - 1) * len(n2) ** 2))) p = p_t(t, df) if p > .5: p = 1. - p p *= 2. return {'t': t, 'p.t': p, 'df.t': df}
def cohen_d(n1, n2): """ Compute Cohen's $d$ for two independent samples n1, n2, defined as: d = \frac{\=x1 - \=x2}{s} where $s$, the pooled standard deviation, is s = \sqrt{\frac{\sum_{i=1}^{n} (x_{1,i} - \=x1) ^ 2 + \sum_{i=1}^{n} (x_{2,i} - \=x2) ^ 2}{n1 + n2}} This latter definition comes from: J. Hartung, G. Knapp, & B.K. Sinha. 2008. Statistical meta-analysis with application. Hoboken, NJ: Wiley. (p. 14) >>> from csv import DictReader >>> from collections import defaultdict >>> species2petal_width = defaultdict(list) >>> for row in DictReader(open('iris.csv', 'r')): ... species = row['Species'] ... width = row['Petal.Width'] ... species2petal_width[species].append(float(width)) >>> round(cohen_d(*species2petal_width.values()), 3) 2.955 """ mu1 = mean(n1) mu2 = mean(n2) diff = abs(mu1 - mu2) size = len(n1) + len(n2) return diff / sqrt((sse(n1, mu1) + sse(n2, mu2)) / size)
def chisquare(n): """ Compute one-way chi-square statistic """ mu = mean(n) chisq = sse(n, mu) / mu return chisq
def spearman_rho_tr(m, n): """ rho for tied ranks, checked by comparison with Pycluster >>> x = [2, 8, 5, 4, 2, 6, 1, 4, 5, 7, 4] >>> y = [3, 9, 4, 3, 1, 7, 2, 5, 6, 8, 3] >>> print round(spearman_rho_tr(x, y), 3) 0.935 """ assert len(m) == len(n), 'args must be the same length' m = rank(m) n = rank(n) num = 0. den_m = 0. den_n = 0. m_mean = mean(m) n_mean = mean(n) for (i, j) in zip(m, n): i = i - m_mean j = j - n_mean num += i * j den_m += i ** 2 den_n += j ** 2 return num / sqrt(den_m * den_n)
def glass_Delta(n1, n2): """ Compute Glass's $\Delta$, a variant on Cohen's $d$ for two independent samples; the denominator is replaced with standard deviation for the control group, which is the first sample here. This definition comes from: L.V. Hedges & I. Olkin. 1985. Staistical methods for meta-analysis. Orlando: Academic Press. (p. 78) >>> from csv import DictReader >>> from collections import defaultdict >>> species2petal_width = defaultdict(list) >>> for row in DictReader(open('iris.csv', 'r')): ... species = row['Species'] ... width = row['Petal.Width'] ... species2petal_width[species].append(float(width)) >>> round(glass_Delta(*species2petal_width.values()), 3) 2.575 """ mu1 = mean(n1) mu2 = mean(n2) return abs(mu1 - mu2) / sqrt(sse(n1, mu1) / len(n1))
def moment(n, moment=1): if moment == 1: return 0. mu = mean(n) return sum((x - mu) ** moment for x in n) / len(n)