예제 #1
0
파일: test.py 프로젝트: Jorge-C/bipy
def G_fit(obs, exp, williams=1):
    """G test for fit between two lists of counts.

    Usage: test, prob = G_fit(obs, exp, williams)

    obs and exp are two lists of numbers.
    williams is a boolean stating whether to do the Williams correction.

    SUM(2 f(obs)ln (f(obs)/f(exp)))

    See Sokal and Rohlf chapter 17.
    """
    k = len(obs)
    if k != len(exp):
        raise ValueError("G_fit requires two lists of equal length.")
    G = 0
    n = 0

    for o, e in zip(obs, exp):
        if o < 0:
            raise ValueError(
                "G_fit requires all observed values to be positive.")
        if e <= 0:
            raise ZeroExpectedError(
                "G_fit requires all expected values to be positive.")
        if o:  # if o is zero, o * log(o/e) must be zero as well.
            G += o * np.log(o / e)
            n += o

    G *= 2
    if williams:
        q = 1 + (k + 1) / (6 * n)
        G /= q

    return G, chi_high(G, k - 1)
예제 #2
0
파일: test.py 프로젝트: Jorge-C/bipy
def fisher(probs):
    """Uses Fisher's method to combine multiple tests of a hypothesis.

    -2 * SUM(ln(P)) gives chi-squared distribution with 2n degrees of freedom.
    """
    try:
        return chi_high(-2 * np.sum(map(np.log, probs)), 2 * len(probs))
    except OverflowError as e:
        return 0.0
예제 #3
0
    def test_chi_high(self):
        """chi_high should match R's pchisq(lower.tail=FALSE) function"""
        probs = {
            1: [
                1.000000e+00,
                9.203443e-01,
                7.518296e-01,
                4.795001e-01,
                3.173105e-01,
                1.572992e-01,
                2.534732e-02,
                1.565402e-03,
                7.744216e-06,
                4.320463e-08,
                1.537460e-12,
                2.088488e-45,
            ],
            10: [
                1.000000e+00,
                1.000000e-00,
                1.000000e-00,
                9.999934e-01,
                9.998279e-01,
                9.963402e-01,
                8.911780e-01,
                4.404933e-01,
                2.925269e-02,
                8.566412e-04,
                2.669083e-07,
                1.613931e-37,
            ],
            100: [
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                1.00000e+00,
                9.99993e-01,
                1.17845e-08,
            ],
        }

        for df in self.df:
            for x, p in zip(self.values, probs[df]):
                np.testing.assert_allclose(chi_high(x, df), p, atol=10e-7)
예제 #4
0
    def test_chi_high(self):
        """chi_high should match R's pchisq(lower.tail=FALSE) function"""
        probs = {
            1: [1.000000e+00, 9.203443e-01, 7.518296e-01, 4.795001e-01,
                3.173105e-01, 1.572992e-01, 2.534732e-02, 1.565402e-03,
                7.744216e-06, 4.320463e-08, 1.537460e-12, 2.088488e-45,
                ],
            10: [1.000000e+00, 1.000000e-00, 1.000000e-00, 9.999934e-01,
                 9.998279e-01, 9.963402e-01, 8.911780e-01, 4.404933e-01,
                 2.925269e-02, 8.566412e-04, 2.669083e-07, 1.613931e-37,
                 ],
            100: [1.00000e+00, 1.00000e+00, 1.00000e+00, 1.00000e+00,
                  1.00000e+00, 1.00000e+00, 1.00000e+00, 1.00000e+00,
                  1.00000e+00, 1.00000e+00, 9.99993e-01, 1.17845e-08,
                  ],
        }

        for df in self.df:
            for x, p in zip(self.values, probs[df]):
                np.testing.assert_allclose(chi_high(x, df), p, atol=10e-7)
예제 #5
0
파일: test.py 프로젝트: Jorge-C/bipy
def G_2_by_2(a, b, c, d, williams=1, directional=1):
    """G test for independence in a 2 x 2 table.

    Usage: G, prob = G_2_by_2(a, b, c, d, willliams, directional)

    Cells are in the order:

        a b
        c d

    a, b, c, and d can be int, float, or long.
    williams is a boolean stating whether to do the Williams correction.
    directional is a boolean stating whether the test is 1-tailed.

    Briefly, computes sum(f ln f) for cells - sum(f ln f) for
    rows and columns + f ln f for the table.

    Always has 1 degree of freedom

    To generalize the test to r x c, use the same protocol:
    2*(cells - rows/cols + table), then with (r-1)(c-1) df.

    Note that G is always positive: to get a directional test,
    the appropriate ratio (e.g. a/b > c/d) must be tested
    as a separate procedure. Find the probability for the
    observed G, and then either halve or halve and subtract from
    one depending on whether the directional prediction was
    upheld.

    The default test is now one-tailed (Rob Knight 4/21/03).

    See Sokal & Rohlf (1995), ch. 17. Specifically, see box 17.6 (p731).
    """
    cells = [a, b, c, d]
    n = np.sum(cells)
    # return 0 if table was empty
    if not n:
        return (0, 1)
    # raise error if any counts were negative
    if min(cells) < 0:
        raise ValueError(
            "G_2_by_2 got negative cell counts(s): must all be >= 0.")

    G = 0
    # Add x ln x for items, adding zero for items whose counts are zero
    for i in filter(None, cells):
        G += i * np.log(i)
    # Find totals for rows and cols
    ab = a + b
    cd = c + d
    ac = a + c
    bd = b + d
    rows_cols = [ab, cd, ac, bd]
    # exit if we are missing a row or column entirely: result counts as
    # never significant
    if min(rows_cols) == 0:
        return (0, 1)
    # Subtract x ln x for rows and cols
    for i in filter(None, rows_cols):
        G -= i * np.log(i)
    # Add x ln x for table
    G += n * np.log(n)
    # Result needs to be multiplied by 2
    G *= 2

    # apply Williams correction
    if williams:
        q = 1 + \
            ((((n / ab) + (n / cd)) - 1) * (((n / ac) + (n / bd)) - 1)) / \
            (6 * n)
        G /= q

    p = chi_high(max(G, 0), 1)

    # find which tail we were in if the test was directional
    if directional:
        is_high = ((b == 0) or (d != 0 and (a / b > c / d)))
        p = tail(p, is_high)
        if not is_high:
            G = -1 * G
    return G, p