예제 #1
0
파일: univariate.py 프로젝트: kzhai/pystats
def welch_t(n1, n2):
    """
    Welch unequal variance two-sample t test, as proposed in:

    B.L. Welch. 1947., The generalization of "Student"'s problem when
    several different population variances are involved. Biometrika
    34(1-2): 28-35.

    # "Student"'s famous sleep data
    >>> group1 = [0.7, -1.6, -0.2, -1.2, -0.1, 3.4, 3.7, 0.8, 0.0, 2.0]
    >>> group2 = [1.9,  0.8,  1.1,  0.1, -0.1, 4.4, 5.5, 1.6, 4.6, 3.4]
    >>> t_results = welch_t(group1, group2)
    >>> round(t_results['df.t'], 4)
    17.7765
    >>> round(t_results['t'], 4)
    -1.8608
    >>> round(t_results['p.t'], 4)
    0.0794
    """
    retval = {}
    sigma1 = sample_variance(n1)
    sigma2 = sample_variance(n2)
    scaled = sigma1 / len(n1) + sigma2 / len(n2)
    t = (mean(n1) - mean(n2)) / sqrt(scaled)
    df = (scaled ** 2) / ((sigma1 ** 2 / ((len(n1) - 1) * len(n1) ** 2)) +
                          (sigma2 ** 2 / ((len(n2) - 1) * len(n2) ** 2)))
    p = p_t(t, df)
    if p > .5:
        p = 1. - p
    p *= 2.
    return {'t': t, 'p.t': p, 'df.t': df}
예제 #2
0
def cohen_d(n1, n2):
    """
    Compute Cohen's $d$ for two independent samples n1, n2, defined as:

    d = \frac{\=x1 - \=x2}{s}

    where $s$, the pooled standard deviation, is

    s = \sqrt{\frac{\sum_{i=1}^{n} (x_{1,i} - \=x1) ^ 2 +
        \sum_{i=1}^{n} (x_{2,i} - \=x2) ^ 2}{n1 + n2}}

    This latter definition comes from:

    J. Hartung, G. Knapp, & B.K. Sinha. 2008. Statistical meta-analysis
    with application. Hoboken, NJ: Wiley. (p. 14)

    >>> from csv import DictReader
    >>> from collections import defaultdict
    >>> species2petal_width = defaultdict(list)
    >>> for row in DictReader(open('iris.csv', 'r')):
    ...     species = row['Species']
    ...     width = row['Petal.Width']
    ...     species2petal_width[species].append(float(width))
    >>> round(cohen_d(*species2petal_width.values()), 3)
    2.955
    """
    mu1 = mean(n1)
    mu2 = mean(n2)
    diff = abs(mu1 - mu2)
    size = len(n1) + len(n2)
    return diff / sqrt((sse(n1, mu1) + sse(n2, mu2)) / size)
예제 #3
0
파일: univariate.py 프로젝트: kzhai/pystats
def chisquare(n):
    """
    Compute one-way chi-square statistic
    """
    mu = mean(n)
    chisq = sse(n, mu) / mu
    return chisq
예제 #4
0
def spearman_rho_tr(m, n):
    """
    rho for tied ranks, checked by comparison with Pycluster

    >>> x = [2, 8, 5, 4, 2, 6, 1, 4, 5, 7, 4]
    >>> y = [3, 9, 4, 3, 1, 7, 2, 5, 6, 8, 3]
    >>> print round(spearman_rho_tr(x, y), 3)
    0.935
    """
    assert len(m) == len(n), 'args must be the same length'
    m = rank(m)
    n = rank(n)
    num = 0.
    den_m = 0.
    den_n = 0.
    m_mean = mean(m)
    n_mean = mean(n)
    for (i, j) in zip(m, n):
        i = i - m_mean
        j = j - n_mean
        num += i * j
        den_m += i ** 2
        den_n += j ** 2
    return num / sqrt(den_m * den_n)
예제 #5
0
def glass_Delta(n1, n2):
    """
    Compute Glass's $\Delta$, a variant on Cohen's $d$ for two independent
    samples; the denominator is replaced with standard deviation for the
    control group, which is the first sample here.

    This definition comes from:

    L.V. Hedges & I. Olkin. 1985. Staistical methods for meta-analysis.
    Orlando: Academic Press. (p. 78)

    >>> from csv import DictReader
    >>> from collections import defaultdict
    >>> species2petal_width = defaultdict(list)
    >>> for row in DictReader(open('iris.csv', 'r')):
    ...     species = row['Species']
    ...     width = row['Petal.Width']
    ...     species2petal_width[species].append(float(width))
    >>> round(glass_Delta(*species2petal_width.values()), 3)
    2.575
    """
    mu1 = mean(n1)
    mu2 = mean(n2)
    return abs(mu1 - mu2) / sqrt(sse(n1, mu1) / len(n1))
예제 #6
0
파일: moments.py 프로젝트: kzhai/pystats
def moment(n, moment=1):
    if moment == 1:
        return 0.
    mu = mean(n)
    return sum((x - mu) ** moment for x in n) / len(n)