Exemple #1
0
def test_logsumexp():
    assert logsumexp([-1000.]) == -1000.
    assert logsumexp([-1000., -1000.]) == -1000. + math.log(2.)
    assert relerr(math.log(2.), logsumexp([0., 0.])) < 1e-15
    assert logsumexp([-float('inf'), 1]) == 1
    assert logsumexp([-float('inf'), -float('inf')]) == -float('inf')
    assert logsumexp([float('inf'), float('inf')]) == float('inf')
    assert math.isnan(logsumexp([float('nan'), -float('inf')]))
Exemple #2
0
def test_logsumexp():
    assert logsumexp([-1000.]) == -1000.
    assert logsumexp([-1000., -1000.]) == -1000. + math.log(2.)
    assert relerr(math.log(2.), logsumexp([0., 0.])) < 1e-15
    assert logsumexp([-float('inf'), 1]) == 1
    assert logsumexp([-float('inf'), -float('inf')]) == -float('inf')
    assert logsumexp([float('inf'), float('inf')]) == float('inf')
    assert math.isnan(logsumexp([float('nan'), -float('inf')]))
Exemple #3
0
def test_gauss_suff_stats():
    # High mean, tiny variance would lead to catastrophic cancellation
    # in a naive implementation that maintained the sum of squares.
    big = 400
    small = 0.0000001
    data = [big - small, big, big + small]
    true_sigma = math.sqrt(2 * small**2 / 3)
    (ct, mean, sigma) = stats.gauss_suff_stats(data)
    assert ct == 3
    assert mean == big
    assert relerr(true_sigma, sigma) < 1e-5
Exemple #4
0
def test_logmeanexp():
    assert logmeanexp([-1000., -1000.]) == -1000.
    assert relerr(math.log(0.5 * (1 + math.exp(-1.))), logmeanexp([0., -1.])) \
        < 1e-15
    assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15
def test_correlation():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)
        bdb.sql_execute('CREATE TABLE u(id, c0, c1, n0, n1, r0, r1)')
        bdb.execute('''
            CREATE GENERATOR u_cc FOR u USING crosscat (
                c0 CATEGORICAL,
                c1 CATEGORICAL,
                n0 NUMERICAL,
                n1 NUMERICAL,
                r0 CYCLIC,
                r1 CYCLIC,
            )
        ''')
        assert bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                ' FROM PAIRWISE COLUMNS OF u_cc'
                ' WHERE name0 < name1'
                ' ORDER BY name0, name1').fetchall() == \
            [
                (1, 'c0', 'c1', None, None),
                (1, 'c0', 'n0', None, None),
                (1, 'c0', 'n1', None, None),
                (1, 'c0', 'r0', None, None),
                (1, 'c0', 'r1', None, None),
                (1, 'c1', 'n0', None, None),
                (1, 'c1', 'n1', None, None),
                (1, 'c1', 'r0', None, None),
                (1, 'c1', 'r1', None, None),
                (1, 'n0', 'n1', None, None),
                (1, 'n0', 'r0', None, None),
                (1, 'n0', 'r1', None, None),
                (1, 'n1', 'r0', None, None),
                (1, 'n1', 'r1', None, None),
                (1, 'r0', 'r1', None, None),
            ]
        bdb.sql_execute('CREATE TABLE t'
            '(id, c0, c1, cx, cy, n0, n1, nc, nl, nx, ny)')
        data = [
            ('foo', 'quagga', 'x', 'y', 0, -1, +1, 1, 0, 13),
            ('bar', 'eland', 'x', 'y', 87, -2, -1, 2, 0, 13),
            ('baz', 'caribou', 'x', 'y', 92.1, -3, +1, 3, 0, 13),
        ] * 10
        for i, row in enumerate(data):
            row = (i + 1,) + row
            bdb.sql_execute('INSERT INTO t VALUES (?,?,?,?,?,?,?,?,?,?,?)',
                row)
        bdb.execute('''
            CREATE GENERATOR t_cc FOR t USING crosscat (
                c0 CATEGORICAL,
                c1 CATEGORICAL,
                cx CATEGORICAL,
                cy CATEGORICAL,
                n0 NUMERICAL,
                n1 NUMERICAL,
                nc NUMERICAL,
                nl NUMERICAL,
                nx NUMERICAL,
                ny NUMERICAL
            )
        ''')
        result = bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
            ' FROM PAIRWISE COLUMNS OF t_cc'
            ' WHERE name0 < name1'
            ' ORDER BY name0, name1').fetchall()
        expected = [
                (2, 'c0', 'c1', 1., 2.900863120340436e-12),
                (2, 'c0', 'cx', None, None),
                (2, 'c0', 'cy', None, None),
                (2, 'c0', 'n0', 1., 0.),
                (2, 'c0', 'n1', 1., 0.),
                (2, 'c0', 'nc', 1., 0.),
                (2, 'c0', 'nl', 1., 0.),
                (2, 'c0', 'nx', None, None),
                (2, 'c0', 'ny', None, None),
                (2, 'c1', 'cx', None, None),
                (2, 'c1', 'cy', None, None),
                (2, 'c1', 'n0', 1., 0.),
                (2, 'c1', 'n1', 1., 0.),
                (2, 'c1', 'nc', 1., 0.),
                (2, 'c1', 'nl', 1., 0.),
                (2, 'c1', 'nx', None, None),
                (2, 'c1', 'ny', None, None),
                (2, 'cx', 'cy', None, None),
                (2, 'cx', 'n0', None, None),
                (2, 'cx', 'n1', None, None),
                (2, 'cx', 'nc', None, None),
                (2, 'cx', 'nl', None, None),
                (2, 'cx', 'nx', None, None),
                (2, 'cx', 'ny', None, None),
                (2, 'cy', 'n0', None, None),
                (2, 'cy', 'n1', None, None),
                (2, 'cy', 'nc', None, None),
                (2, 'cy', 'nl', None, None),
                (2, 'cy', 'nx', None, None),
                (2, 'cy', 'ny', None, None),
                (2, 'n0', 'n1', 0.7913965673596881, 0.),
                (2, 'n0', 'nc', 0.20860343264031175, 0.0111758925135),
                (2, 'n0', 'nl', 0.7913965673596881, 0.),
                (2, 'n0', 'nx', None, None),
                (2, 'n0', 'ny', None, None),
                (2, 'n1', 'nc', 0., 1.),
                (2, 'n1', 'nl', 1., 0.),
                (2, 'n1', 'nx', None, None),
                (2, 'n1', 'ny', None, None),
                (2, 'nc', 'nl', 0., 1.),
                (2, 'nc', 'nx', None, None),
                (2, 'nc', 'ny', None, None),
                (2, 'nl', 'nx', None, None),
                (2, 'nl', 'ny', None, None),
                (2, 'nx', 'ny', None, None),
            ]
    for expected_item, observed_item in zip(expected, result):
        (xpd_genid, xpd_name0, xpd_name1, xpd_corr, xpd_corr_p) = expected_item
        (obs_genid, obs_name0, obs_name1, obs_corr, obs_corr_p) = observed_item
        assert xpd_genid == obs_genid
        assert xpd_name0 == obs_name0
        assert xpd_name1 == obs_name1
        assert xpd_corr == obs_corr or relerr(xpd_corr, obs_corr) < 1e-10
        assert (xpd_corr_p == obs_corr_p or
                relerr(xpd_corr_p, obs_corr_p) < 1e-1)
Exemple #6
0
def test_correlation():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)
        bdb.sql_execute('CREATE TABLE u(id, c0, c1, n0, n1, r0, r1)')
        bdb.execute('''
            CREATE GENERATOR u_cc FOR u USING crosscat (
                c0 CATEGORICAL,
                c1 CATEGORICAL,
                n0 NUMERICAL,
                n1 NUMERICAL,
                r0 CYCLIC,
                r1 CYCLIC,
            )
        ''')
        assert bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                ' FROM PAIRWISE COLUMNS OF u_cc'
                ' WHERE name0 < name1'
                ' ORDER BY name0, name1').fetchall() == \
            [
                (1, 'c0', 'c1', None, None),
                (1, 'c0', 'n0', None, None),
                (1, 'c0', 'n1', None, None),
                (1, 'c0', 'r0', None, None),
                (1, 'c0', 'r1', None, None),
                (1, 'c1', 'n0', None, None),
                (1, 'c1', 'n1', None, None),
                (1, 'c1', 'r0', None, None),
                (1, 'c1', 'r1', None, None),
                (1, 'n0', 'n1', None, None),
                (1, 'n0', 'r0', None, None),
                (1, 'n0', 'r1', None, None),
                (1, 'n1', 'r0', None, None),
                (1, 'n1', 'r1', None, None),
                (1, 'r0', 'r1', None, None),
            ]
        bdb.sql_execute('CREATE TABLE t'
                        '(id, c0, c1, cx, cy, n0, n1, nc, nl, nx, ny)')
        data = [
            ('foo', 'quagga', 'x', 'y', 0, -1, +1, 1, 0, 13),
            ('bar', 'eland', 'x', 'y', 87, -2, -1, 2, 0, 13),
            ('baz', 'caribou', 'x', 'y', 92.1, -3, +1, 3, 0, 13),
        ] * 10
        for i, row in enumerate(data):
            row = (i + 1, ) + row
            bdb.sql_execute('INSERT INTO t VALUES (?,?,?,?,?,?,?,?,?,?,?)',
                            row)
        bdb.execute('''
            CREATE GENERATOR t_cc FOR t USING crosscat (
                c0 CATEGORICAL,
                c1 CATEGORICAL,
                cx CATEGORICAL,
                cy CATEGORICAL,
                n0 NUMERICAL,
                n1 NUMERICAL,
                nc NUMERICAL,
                nl NUMERICAL,
                nx NUMERICAL,
                ny NUMERICAL
            )
        ''')
        result = bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                             ' FROM PAIRWISE COLUMNS OF t_cc'
                             ' WHERE name0 < name1'
                             ' ORDER BY name0, name1').fetchall()
        expected = [
            (2, 'c0', 'c1', 1., 2.900863120340436e-12),
            (2, 'c0', 'cx', None, None),
            (2, 'c0', 'cy', None, None),
            (2, 'c0', 'n0', 1., 0.),
            (2, 'c0', 'n1', 1., 0.),
            (2, 'c0', 'nc', 1., 0.),
            (2, 'c0', 'nl', 1., 0.),
            (2, 'c0', 'nx', None, None),
            (2, 'c0', 'ny', None, None),
            (2, 'c1', 'cx', None, None),
            (2, 'c1', 'cy', None, None),
            (2, 'c1', 'n0', 1., 0.),
            (2, 'c1', 'n1', 1., 0.),
            (2, 'c1', 'nc', 1., 0.),
            (2, 'c1', 'nl', 1., 0.),
            (2, 'c1', 'nx', None, None),
            (2, 'c1', 'ny', None, None),
            (2, 'cx', 'cy', None, None),
            (2, 'cx', 'n0', None, None),
            (2, 'cx', 'n1', None, None),
            (2, 'cx', 'nc', None, None),
            (2, 'cx', 'nl', None, None),
            (2, 'cx', 'nx', None, None),
            (2, 'cx', 'ny', None, None),
            (2, 'cy', 'n0', None, None),
            (2, 'cy', 'n1', None, None),
            (2, 'cy', 'nc', None, None),
            (2, 'cy', 'nl', None, None),
            (2, 'cy', 'nx', None, None),
            (2, 'cy', 'ny', None, None),
            (2, 'n0', 'n1', 0.7913965673596881, 0.),
            (2, 'n0', 'nc', 0.20860343264031175, 0.0111758925135),
            (2, 'n0', 'nl', 0.7913965673596881, 0.),
            (2, 'n0', 'nx', None, None),
            (2, 'n0', 'ny', None, None),
            (2, 'n1', 'nc', 0., 1.),
            (2, 'n1', 'nl', 1., 0.),
            (2, 'n1', 'nx', None, None),
            (2, 'n1', 'ny', None, None),
            (2, 'nc', 'nl', 0., 1.),
            (2, 'nc', 'nx', None, None),
            (2, 'nc', 'ny', None, None),
            (2, 'nl', 'nx', None, None),
            (2, 'nl', 'ny', None, None),
            (2, 'nx', 'ny', None, None),
        ]
    for expected_item, observed_item in zip(expected, result):
        (xpd_genid, xpd_name0, xpd_name1, xpd_corr, xpd_corr_p) = expected_item
        (obs_genid, obs_name0, obs_name1, obs_corr, obs_corr_p) = observed_item
        assert xpd_genid == obs_genid
        assert xpd_name0 == obs_name0
        assert xpd_name1 == obs_name1
        assert xpd_corr == obs_corr or relerr(xpd_corr, obs_corr) < 1e-10
        assert (xpd_corr_p == obs_corr_p
                or relerr(xpd_corr_p, obs_corr_p) < 1e-1)
def test_correlation():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE u(id, c0, c1, n0, n1, r0, r1)')
        bdb.execute('''
            CREATE POPULATION q FOR u (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                r0 CYCLIC;
                r1 CYCLIC
            )
        ''')
        assert bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                ' FROM PAIRWISE COLUMNS OF q'
                ' WHERE name0 < name1'
                ' ORDER BY name0, name1').fetchall() == \
            [
                (1, 'c0', 'c1', None, None),
                (1, 'c0', 'n0', None, None),
                (1, 'c0', 'n1', None, None),
                (1, 'c0', 'r0', None, None),
                (1, 'c0', 'r1', None, None),
                (1, 'c1', 'n0', None, None),
                (1, 'c1', 'n1', None, None),
                (1, 'c1', 'r0', None, None),
                (1, 'c1', 'r1', None, None),
                (1, 'n0', 'n1', None, None),
                (1, 'n0', 'r0', None, None),
                (1, 'n0', 'r1', None, None),
                (1, 'n1', 'r0', None, None),
                (1, 'n1', 'r1', None, None),
                (1, 'r0', 'r1', None, None),
            ]
        bdb.sql_execute('CREATE TABLE t'
                        '(id, c0, c1, cx, cy, n0, n1, nc, nl, nx, ny)')
        data = [
            ('foo', 'quagga', 'x', 'y', 0, -1, +1, 1, 0, 13),
            ('bar', 'eland', 'x', 'y', 87, -2, -1, 2, 0, 13),
            ('baz', 'caribou', 'x', 'y', 92.1, -3, +1, 3, 0, 13),
        ] * 10
        for i, row in enumerate(data):
            row = (i + 1, ) + row
            bdb.sql_execute('INSERT INTO t VALUES (?,?,?,?,?,?,?,?,?,?,?)',
                            row)
        bdb.execute('''
            CREATE POPULATION p FOR t (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                cx NOMINAL;
                cy NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                nc NUMERICAL;
                nl NUMERICAL;
                nx NUMERICAL;
                ny NUMERICAL
            )
        ''')
        result = bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                             ' FROM PAIRWISE COLUMNS OF p'
                             ' WHERE name0 < name1'
                             ' ORDER BY name0, name1').fetchall()
        expected = [
            (2, 'c0', 'c1', 1., 2.900863120340436e-12),
            (2, 'c0', 'cx', None, None),
            (2, 'c0', 'cy', None, None),
            (2, 'c0', 'n0', 1., 0.),
            (2, 'c0', 'n1', 1., 0.),
            (2, 'c0', 'nc', 1., 0.),
            (2, 'c0', 'nl', 1., 0.),
            (2, 'c0', 'nx', None, None),
            (2, 'c0', 'ny', None, None),
            (2, 'c1', 'cx', None, None),
            (2, 'c1', 'cy', None, None),
            (2, 'c1', 'n0', 1., 0.),
            (2, 'c1', 'n1', 1., 0.),
            (2, 'c1', 'nc', 1., 0.),
            (2, 'c1', 'nl', 1., 0.),
            (2, 'c1', 'nx', None, None),
            (2, 'c1', 'ny', None, None),
            (2, 'cx', 'cy', None, None),
            (2, 'cx', 'n0', None, None),
            (2, 'cx', 'n1', None, None),
            (2, 'cx', 'nc', None, None),
            (2, 'cx', 'nl', None, None),
            (2, 'cx', 'nx', None, None),
            (2, 'cx', 'ny', None, None),
            (2, 'cy', 'n0', None, None),
            (2, 'cy', 'n1', None, None),
            (2, 'cy', 'nc', None, None),
            (2, 'cy', 'nl', None, None),
            (2, 'cy', 'nx', None, None),
            (2, 'cy', 'ny', None, None),
            (2, 'n0', 'n1', 0.7913965673596881, 0.),
            (2, 'n0', 'nc', 0.20860343264031175, 0.0111758925135),
            (2, 'n0', 'nl', 0.7913965673596881, 0.),
            (2, 'n0', 'nx', None, None),
            (2, 'n0', 'ny', None, None),
            (2, 'n1', 'nc', 0., 1.),
            (2, 'n1', 'nl', 1., 0.),
            (2, 'n1', 'nx', None, None),
            (2, 'n1', 'ny', None, None),
            (2, 'nc', 'nl', 0., 1.),
            (2, 'nc', 'nx', None, None),
            (2, 'nc', 'ny', None, None),
            (2, 'nl', 'nx', None, None),
            (2, 'nl', 'ny', None, None),
            (2, 'nx', 'ny', None, None),
        ]
    for expected_item, observed_item in zip(expected, result):
        (xpd_genid, xpd_name0, xpd_name1, xpd_corr, xpd_corr_p) = expected_item
        (obs_genid, obs_name0, obs_name1, obs_corr, obs_corr_p) = observed_item
        assert xpd_genid == obs_genid
        assert xpd_name0 == obs_name0
        assert xpd_name1 == obs_name1
        assert xpd_corr == obs_corr or relerr(xpd_corr, obs_corr) < 1e-10
        assert (xpd_corr == obs_corr or abserr(xpd_corr_p, obs_corr_p) < 1e-10
                or relerr(xpd_corr_p, obs_corr_p) < 1e-1)
Exemple #8
0
def test_f_sf():
    # Non-positive degrees of freedom should throw an error.
    with pytest.raises(ValueError):
        stats.f_sf(0,0,0)
    with pytest.raises(ValueError):
        stats.f_sf(2,-10,0)
    with pytest.raises(ValueError):
        stats.f_sf(2,0,-10)
    with pytest.raises(ValueError):
        stats.f_sf(2,-1,1)
    with pytest.raises(ValueError):
        stats.f_sf(2,1,-1)

    # Survival of x = 0 should be 1.
    assert relerr(1, stats.f_sf(0,1,12)) < .05
    assert relerr(1, stats.f_sf(0,6,0.5)) < .05
    assert relerr(1, stats.f_sf(0,130,121)) < .05
    
    # Survival of x < 0 should be 1.
    assert relerr(1, stats.f_sf(-1,1,12)) < .05
    assert relerr(1, stats.f_sf(-100,6,0.5)) < .05
    assert relerr(1, stats.f_sf(-0.02,130,121)) < .05

    # Test against reference values.
    assert relerr(.5173903, stats.f_sf(1,12,8)) < .05
    assert relerr(.2618860, stats.f_sf(1.9,1,3)) < .05
    assert relerr(.5000000, stats.f_sf(1,100,100)) < .05
    assert relerr(.1781364, stats.f_sf(19,14,1)) < .05
    assert relerr(.7306588, stats.f_sf(0.76,23,15)) < .05
    assert relerr(.0602978, stats.f_sf(4.3,1,12)) < .05
    assert relerr(.5590169, stats.f_sf(1.1,2,1)) < .05
    assert relerr(.1111111, stats.f_sf(8,2,2)) < .05
    assert relerr(.9999999, stats.f_sf(0.2,432,123)) < .05
    assert relerr(.9452528, stats.f_sf(0.8,432,123)) < .05
    assert relerr(.0434186, stats.f_sf(10,5,3)) < .05
    
    # Test against reference very close to zero.
    assert abserr(.0158130, stats.f_sf(11,19,4)) < .01
    assert abserr(.0022310, stats.f_sf(14,9,6)) < .01
    assert abserr(.1458691e-112, stats.f_sf(200,432,123)) < .01
    assert abserr(.2489256e-13, stats.f_sf(29,23,29)) < .01
    assert abserr(.1656276e-06, stats.f_sf(31,11,13)) < .01
    assert abserr(.6424023e-5, stats.f_sf(18,14,12)) < .01
Exemple #9
0
def test_chi2_sf():
    # Non-positive degrees of freedom should throw an error.
    with pytest.raises(ValueError):
        stats.chi2_sf(0, 0)
    with pytest.raises(ValueError):
        stats.chi2_sf(2, -10)
    
    # Survival of x = 0 should be 1.
    assert relerr(1., stats.chi2_sf(0,12)) < .05
    assert relerr(1., stats.chi2_sf(0,6)) < .05
    assert relerr(1., stats.chi2_sf(0,130)) < .05

    # Test x < 1, x >= df against reference values.
    assert relerr(.0357175, stats.chi2_sf(.8,.1)) < .05
    assert relerr(.2730426, stats.chi2_sf(.6,.6)) < .05
    assert relerr(.0602823, stats.chi2_sf(.1,.05)) < .05

    # Test x >= 1, x <= df against reference values.
    assert relerr(.7029304, stats.chi2_sf(9,12)) < .05
    assert relerr(.5934191, stats.chi2_sf(1.9,3)) < .05
    assert relerr(.9238371, stats.chi2_sf(1,4.2)) < .05

    # Test x >= 1, x > df against reference values.
    assert relerr(.3325939, stats.chi2_sf(8,7)) < .05
    assert relerr(.0482861, stats.chi2_sf(3.9,1)) < .05
    assert relerr(.3464377e-4, stats.chi2_sf(193,121)) < .05
Exemple #10
0
def test_f_oneway():
    data = [[6,8,4,5,3,4], [8,12,9,11,6,8], [13,9,11,8,7,12]]
    assert relerr(9.3, stats.f_oneway(data)) < 0.01
Exemple #11
0
def test_chi2_contingency():
    assert stats.chi2_contingency([[42]]) == 0.
    assert relerr(7.66, stats.chi2_contingency([[4,2,3], [3,16,2]])) < 0.01
Exemple #12
0
def test_t_cdf():
    # Non-positive degrees of freedom should throw an error.
    with pytest.raises(ValueError):
        stats.t_cdf(0,0)
    with pytest.raises(ValueError):
        stats.t_cdf(2,-10)
    
    # CDF of x = 0 should be 0.5.
    assert relerr(.5, stats.t_cdf(0,12)) < .01
    assert relerr(.5, stats.t_cdf(0,6)) < .01
    assert relerr(.5, stats.t_cdf(0,130)) < .01

    # Test against various reference values.
    assert relerr(.57484842931039226, stats.t_cdf(.8, .1)) < .05
    assert relerr(.64922051214061649, stats.t_cdf(.6, .6)) < .05
    assert relerr(.51046281131211058, stats.t_cdf(.1, .05)) < .05
    assert relerr(.99999944795492968, stats.t_cdf(9, 12)) < .05
    assert relerr(.92318422834700042, stats.t_cdf(1.9, 3)) < .05
    assert relerr(.81430689864299455, stats.t_cdf(1, 4.2)) < .05
    assert relerr(.99995442539414559, stats.t_cdf(8, 7)) < .05
    assert relerr(.92010336338282994, stats.t_cdf(3.9, 1)) < .05
    assert relerr(1.0, stats.t_cdf(193, 121)) < .05
    assert relerr(.42515157068960779, stats.t_cdf(-.8, .1)) < .05
    assert relerr(.35077948785938345, stats.t_cdf(-.6, .6)) < .05
    assert relerr(.48953718868788948, stats.t_cdf(-.1, .05)) < .05
    assert relerr(.076815771652999562, stats.t_cdf(-1.9, 3)) < .05
    assert relerr(.18569310135700545, stats.t_cdf(-1, 4.2)) < .05
    assert relerr(.17530833141010374, stats.t_cdf(-1, 7)) < .05
    assert relerr(.079896636617170003, stats.t_cdf(-3.9, 1)) < .05
    assert relerr(.30899158341328747, stats.t_cdf(-0.5, 121)) < .05
    
    # Test against reference very close to zero.
    # XXX Why are we testing chi2_sf here?
    assert relerr(.346437e-4, stats.chi2_sf(193,121)) < .01
Exemple #13
0
def test_logmeanexp():
    assert logmeanexp([-1000., -1000.]) == -1000.
    assert relerr(math.log(0.5 * (1 + math.exp(-1.))), logmeanexp([0., -1.])) \
        < 1e-15
    assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15
def test_correlation():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE u(id, c0, c1, n0, n1, r0, r1)')
        bdb.execute('''
            CREATE POPULATION q FOR u (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                r0 CYCLIC;
                r1 CYCLIC
            )
        ''')
        assert bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                ' FROM PAIRWISE COLUMNS OF q'
                ' WHERE name0 < name1'
                ' ORDER BY name0, name1').fetchall() == \
            [
                (1, 'c0', 'c1', None, None),
                (1, 'c0', 'n0', None, None),
                (1, 'c0', 'n1', None, None),
                (1, 'c0', 'r0', None, None),
                (1, 'c0', 'r1', None, None),
                (1, 'c1', 'n0', None, None),
                (1, 'c1', 'n1', None, None),
                (1, 'c1', 'r0', None, None),
                (1, 'c1', 'r1', None, None),
                (1, 'n0', 'n1', None, None),
                (1, 'n0', 'r0', None, None),
                (1, 'n0', 'r1', None, None),
                (1, 'n1', 'r0', None, None),
                (1, 'n1', 'r1', None, None),
                (1, 'r0', 'r1', None, None),
            ]
        bdb.sql_execute('CREATE TABLE t'
            '(id, c0, c1, cx, cy, n0, n1, nc, nl, nx, ny)')
        data = [
            ('foo', 'quagga', 'x', 'y', 0, -1, +1, 1, 0, 13),
            ('bar', 'eland', 'x', 'y', 87, -2, -1, 2, 0, 13),
            ('baz', 'caribou', 'x', 'y', 92.1, -3, +1, 3, 0, 13),
        ] * 10
        for i, row in enumerate(data):
            row = (i + 1,) + row
            bdb.sql_execute('INSERT INTO t VALUES (?,?,?,?,?,?,?,?,?,?,?)',
                row)
        bdb.execute('''
            CREATE POPULATION p FOR t (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                cx NOMINAL;
                cy NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                nc NUMERICAL;
                nl NUMERICAL;
                nx NUMERICAL;
                ny NUMERICAL
            )
        ''')
        result = bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
            ' FROM PAIRWISE COLUMNS OF p'
            ' WHERE name0 < name1'
            ' ORDER BY name0, name1').fetchall()
        expected = [
                (2, 'c0', 'c1', 1., 2.900863120340436e-12),
                (2, 'c0', 'cx', None, None),
                (2, 'c0', 'cy', None, None),
                (2, 'c0', 'n0', 1., 0.),
                (2, 'c0', 'n1', 1., 0.),
                (2, 'c0', 'nc', 1., 0.),
                (2, 'c0', 'nl', 1., 0.),
                (2, 'c0', 'nx', None, None),
                (2, 'c0', 'ny', None, None),
                (2, 'c1', 'cx', None, None),
                (2, 'c1', 'cy', None, None),
                (2, 'c1', 'n0', 1., 0.),
                (2, 'c1', 'n1', 1., 0.),
                (2, 'c1', 'nc', 1., 0.),
                (2, 'c1', 'nl', 1., 0.),
                (2, 'c1', 'nx', None, None),
                (2, 'c1', 'ny', None, None),
                (2, 'cx', 'cy', None, None),
                (2, 'cx', 'n0', None, None),
                (2, 'cx', 'n1', None, None),
                (2, 'cx', 'nc', None, None),
                (2, 'cx', 'nl', None, None),
                (2, 'cx', 'nx', None, None),
                (2, 'cx', 'ny', None, None),
                (2, 'cy', 'n0', None, None),
                (2, 'cy', 'n1', None, None),
                (2, 'cy', 'nc', None, None),
                (2, 'cy', 'nl', None, None),
                (2, 'cy', 'nx', None, None),
                (2, 'cy', 'ny', None, None),
                (2, 'n0', 'n1', 0.7913965673596881, 0.),
                (2, 'n0', 'nc', 0.20860343264031175, 0.0111758925135),
                (2, 'n0', 'nl', 0.7913965673596881, 0.),
                (2, 'n0', 'nx', None, None),
                (2, 'n0', 'ny', None, None),
                (2, 'n1', 'nc', 0., 1.),
                (2, 'n1', 'nl', 1., 0.),
                (2, 'n1', 'nx', None, None),
                (2, 'n1', 'ny', None, None),
                (2, 'nc', 'nl', 0., 1.),
                (2, 'nc', 'nx', None, None),
                (2, 'nc', 'ny', None, None),
                (2, 'nl', 'nx', None, None),
                (2, 'nl', 'ny', None, None),
                (2, 'nx', 'ny', None, None),
            ]
    for expected_item, observed_item in zip(expected, result):
        (xpd_genid, xpd_name0, xpd_name1, xpd_corr, xpd_corr_p) = expected_item
        (obs_genid, obs_name0, obs_name1, obs_corr, obs_corr_p) = observed_item
        assert xpd_genid == obs_genid
        assert xpd_name0 == obs_name0
        assert xpd_name1 == obs_name1
        assert xpd_corr == obs_corr or relerr(xpd_corr, obs_corr) < 1e-10
        assert (xpd_corr == obs_corr
            or abserr(xpd_corr_p, obs_corr_p) < 1e-10
            or relerr(xpd_corr_p, obs_corr_p) < 1e-1)