Esempio n. 1
0
def test_math_func_one_param(name, probe):
    # Retrieve result from python.
    python_value_error = None
    python_type_error = None
    try:
        result_python = get_python_math_call(name, probe)
    except ValueError:
        python_value_error = True
    except TypeError:
        python_type_error = True

    # Retrieve result from SQL.
    sql_value_error = None
    sql_type_error = None
    try:
        with bayesdb_open(':memory:') as bdb:
            cursor = bdb.execute(get_sql_math_call(name, probe))
            result_sql = cursor_value(cursor)
    except ValueError:
        sql_value_error = True
    except (TypeError, apsw.SQLError):
        sql_type_error = True

    # Domain error on both.
    if python_value_error or sql_value_error:
        assert python_value_error and sql_value_error
    # Arity error on both.
    elif python_type_error or sql_type_error:
        assert python_type_error and sql_type_error
    # Both invocations succeeded, confirm results match.
    else:
        assert abserr(result_python, result_sql) < 1e-4
Esempio n. 2
0
def test_math_func_one_param(name, probe):
    # Retrieve result from python.
    python_value_error = None
    python_type_error = None
    try:
        result_python = get_python_math_call(name, probe)
    except ValueError:
        python_value_error = True
    except TypeError:
        python_type_error = True

    # Retrieve result from SQL.
    sql_value_error = None
    sql_type_error = None
    try:
        with bayesdb_open(':memory:') as bdb:
            cursor = bdb.execute(get_sql_math_call(name, probe))
            result_sql = cursor_value(cursor)
    except ValueError:
        sql_value_error = True
    except (TypeError, apsw.SQLError):
        sql_type_error = True

    # Domain error on both.
    if python_value_error or sql_value_error:
        assert python_value_error and sql_value_error
    # Arity error on both.
    elif python_type_error or sql_type_error:
        assert python_type_error and sql_type_error
    # Both invocations succeeded, confirm results match.
    else:
        assert abserr(result_python, result_sql) < 1e-4
def test_correlation():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE u(id, c0, c1, n0, n1, r0, r1)')
        bdb.execute('''
            CREATE POPULATION q FOR u (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                r0 CYCLIC;
                r1 CYCLIC
            )
        ''')
        assert bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                ' FROM PAIRWISE COLUMNS OF q'
                ' WHERE name0 < name1'
                ' ORDER BY name0, name1').fetchall() == \
            [
                (1, 'c0', 'c1', None, None),
                (1, 'c0', 'n0', None, None),
                (1, 'c0', 'n1', None, None),
                (1, 'c0', 'r0', None, None),
                (1, 'c0', 'r1', None, None),
                (1, 'c1', 'n0', None, None),
                (1, 'c1', 'n1', None, None),
                (1, 'c1', 'r0', None, None),
                (1, 'c1', 'r1', None, None),
                (1, 'n0', 'n1', None, None),
                (1, 'n0', 'r0', None, None),
                (1, 'n0', 'r1', None, None),
                (1, 'n1', 'r0', None, None),
                (1, 'n1', 'r1', None, None),
                (1, 'r0', 'r1', None, None),
            ]
        bdb.sql_execute('CREATE TABLE t'
                        '(id, c0, c1, cx, cy, n0, n1, nc, nl, nx, ny)')
        data = [
            ('foo', 'quagga', 'x', 'y', 0, -1, +1, 1, 0, 13),
            ('bar', 'eland', 'x', 'y', 87, -2, -1, 2, 0, 13),
            ('baz', 'caribou', 'x', 'y', 92.1, -3, +1, 3, 0, 13),
        ] * 10
        for i, row in enumerate(data):
            row = (i + 1, ) + row
            bdb.sql_execute('INSERT INTO t VALUES (?,?,?,?,?,?,?,?,?,?,?)',
                            row)
        bdb.execute('''
            CREATE POPULATION p FOR t (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                cx NOMINAL;
                cy NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                nc NUMERICAL;
                nl NUMERICAL;
                nx NUMERICAL;
                ny NUMERICAL
            )
        ''')
        result = bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                             ' FROM PAIRWISE COLUMNS OF p'
                             ' WHERE name0 < name1'
                             ' ORDER BY name0, name1').fetchall()
        expected = [
            (2, 'c0', 'c1', 1., 2.900863120340436e-12),
            (2, 'c0', 'cx', None, None),
            (2, 'c0', 'cy', None, None),
            (2, 'c0', 'n0', 1., 0.),
            (2, 'c0', 'n1', 1., 0.),
            (2, 'c0', 'nc', 1., 0.),
            (2, 'c0', 'nl', 1., 0.),
            (2, 'c0', 'nx', None, None),
            (2, 'c0', 'ny', None, None),
            (2, 'c1', 'cx', None, None),
            (2, 'c1', 'cy', None, None),
            (2, 'c1', 'n0', 1., 0.),
            (2, 'c1', 'n1', 1., 0.),
            (2, 'c1', 'nc', 1., 0.),
            (2, 'c1', 'nl', 1., 0.),
            (2, 'c1', 'nx', None, None),
            (2, 'c1', 'ny', None, None),
            (2, 'cx', 'cy', None, None),
            (2, 'cx', 'n0', None, None),
            (2, 'cx', 'n1', None, None),
            (2, 'cx', 'nc', None, None),
            (2, 'cx', 'nl', None, None),
            (2, 'cx', 'nx', None, None),
            (2, 'cx', 'ny', None, None),
            (2, 'cy', 'n0', None, None),
            (2, 'cy', 'n1', None, None),
            (2, 'cy', 'nc', None, None),
            (2, 'cy', 'nl', None, None),
            (2, 'cy', 'nx', None, None),
            (2, 'cy', 'ny', None, None),
            (2, 'n0', 'n1', 0.7913965673596881, 0.),
            (2, 'n0', 'nc', 0.20860343264031175, 0.0111758925135),
            (2, 'n0', 'nl', 0.7913965673596881, 0.),
            (2, 'n0', 'nx', None, None),
            (2, 'n0', 'ny', None, None),
            (2, 'n1', 'nc', 0., 1.),
            (2, 'n1', 'nl', 1., 0.),
            (2, 'n1', 'nx', None, None),
            (2, 'n1', 'ny', None, None),
            (2, 'nc', 'nl', 0., 1.),
            (2, 'nc', 'nx', None, None),
            (2, 'nc', 'ny', None, None),
            (2, 'nl', 'nx', None, None),
            (2, 'nl', 'ny', None, None),
            (2, 'nx', 'ny', None, None),
        ]
    for expected_item, observed_item in zip(expected, result):
        (xpd_genid, xpd_name0, xpd_name1, xpd_corr, xpd_corr_p) = expected_item
        (obs_genid, obs_name0, obs_name1, obs_corr, obs_corr_p) = observed_item
        assert xpd_genid == obs_genid
        assert xpd_name0 == obs_name0
        assert xpd_name1 == obs_name1
        assert xpd_corr == obs_corr or relerr(xpd_corr, obs_corr) < 1e-10
        assert (xpd_corr == obs_corr or abserr(xpd_corr_p, obs_corr_p) < 1e-10
                or relerr(xpd_corr_p, obs_corr_p) < 1e-1)
Esempio n. 4
0
def test_cgpm_alter_basic():
    with cgpm_dummy_satellites_pop_bdb() as bdb:
        bdb.execute('''
            create generator g0 for satellites using cgpm(
                subsample 10
            );
        ''')
        bdb.execute('initialize 4 models for g0')

        population_id = bayeslite.core.bayesdb_get_population(
            bdb, 'satellites')
        generator_id = bayeslite.core.bayesdb_get_generator(
            bdb, population_id, 'g0')

        # Make all variables independent in models 0-1.
        bdb.execute('''
            alter generator g0 models (0,1)
                ensure variables * independent;
        ''')
        dependencies = bdb.execute('''
            estimate dependence probability from pairwise variables of
            satellites modeled by g0 using models 0-1;
        ''').fetchall()
        for _, var0, var1, value in dependencies:
            assert value == int(var0 == var1)

        # Make all variables dependent in all models.
        bdb.execute('''
            alter generator g0
                ensure variables * dependent;
        ''')
        dependencies = bdb.execute('''
            estimate dependence probability from pairwise variables of
            satellites
        ''').fetchall()
        for _, _, _, value in dependencies:
            assert value == 1

        # Make all variables independent again.
        bdb.execute('''
            alter generator g0
                ensure variables * independent;
        ''')

        # Move apogee, perigee->view(period), launch_mass->view(class_of_orbit)
        # All other variables should remain independent.
        bdb.execute('''
            alter generator g0
                ensure variables (apogee, period, perigee) in context of period,
                ensure variable launch_mass in context of class_of_orbit;
        ''')
        dependencies = bdb.execute('''
            estimate dependence probability from pairwise variables of
            satellites
        ''').fetchall()
        blocks = [
            ['apogee','perigee','period'],
            ['launch_mass', 'class_of_orbit']
        ]
        for _, var0, var1, value in dependencies:
            if var0 in blocks[0] and var1 in blocks[0]:
                assert value == 1.
            elif var0 in blocks[1] and var1 in blocks[1]:
                assert value == 1.
            elif var0 == var1:
                assert value == 1.
            else:
                assert value == 0.

        # Move period to a singleton in model 3.
        bdb.execute('''
            alter generator g0 model (3)
                ensure variable period in singleton view
        ''')
        dependencies = bdb.execute('''
            estimate dependence probability with period from variables of
            satellites modeled by g0 using models 0,1,2
        ''')
        for _, var0, value in dependencies:
            if var0 in blocks[0]:
                assert value == 1
            else:
                assert value == 0
        dependencies = bdb.execute('''
            estimate dependence probability with period from variables of
            satellites modeled by g0 using models 3
        ''')
        for _, var0, value in dependencies:
            assert value == 0

        # Change the column crp concentration.
        bdb.execute('''
            alter generator g0
                set view concentration parameter to 1000
        ''')
        engine = bdb.backends['cgpm']._engine(bdb, generator_id)
        for state in engine.states:
            assert abserr(1./1000, state.alpha()) < 1e-5

        # Change row crp concentration in view of period.
        engine = bdb.backends['cgpm']._engine(bdb, generator_id)
        varno = bayeslite.core.bayesdb_variable_number(
            bdb, population_id, generator_id, 'period')
        initial_alphas = [s.view_for(varno).alpha() for s in engine.states]
        bdb.execute('''
            alter generator g0 models (1,3)
                set row cluster concentration parameter
                    within view of period to 12;
        ''')
        for i, state in enumerate(engine.states):
            view_alpha = state.view_for(varno).alpha()
            if i in [1,3]:
                assert abserr(1./12, view_alpha) < 1e-5
            else:
                assert abserr(initial_alphas[i], view_alpha) < 1e-8

        # Run 10 steps of analysis.
        bdb.execute('analyze g0 for 4 iteration (optimized)')

        # Retrieve rows in the subsample.
        cursor = bdb.execute('''
            SELECT table_rowid FROM  bayesdb_cgpm_individual
            WHERE generator_id = ?
        ''', (generator_id,))
        subsample_rows = [c[0] for c in cursor]

        # Move all rows to same cluster in view of country_of_operator.
        bdb.execute('''
            alter generator g0
                ensure rows * in cluster of row %s
                within context of country_of_operator
        ''' % (subsample_rows[0],))

        similarities = bdb.execute('''
            estimate similarity in the context of country_of_operator
            from pairwise satellites
        ''')
        for row0, row1, value in similarities:
            if row0 in subsample_rows and row1 in subsample_rows:
                assert value == 1.
            else:
                assert value is None
Esempio n. 5
0
def test_correlation():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE u(id, c0, c1, n0, n1, r0, r1)')
        bdb.execute('''
            CREATE POPULATION q FOR u (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                r0 CYCLIC;
                r1 CYCLIC
            )
        ''')
        assert bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
                ' FROM PAIRWISE COLUMNS OF q'
                ' WHERE name0 < name1'
                ' ORDER BY name0, name1').fetchall() == \
            [
                (1, 'c0', 'c1', None, None),
                (1, 'c0', 'n0', None, None),
                (1, 'c0', 'n1', None, None),
                (1, 'c0', 'r0', None, None),
                (1, 'c0', 'r1', None, None),
                (1, 'c1', 'n0', None, None),
                (1, 'c1', 'n1', None, None),
                (1, 'c1', 'r0', None, None),
                (1, 'c1', 'r1', None, None),
                (1, 'n0', 'n1', None, None),
                (1, 'n0', 'r0', None, None),
                (1, 'n0', 'r1', None, None),
                (1, 'n1', 'r0', None, None),
                (1, 'n1', 'r1', None, None),
                (1, 'r0', 'r1', None, None),
            ]
        bdb.sql_execute('CREATE TABLE t'
            '(id, c0, c1, cx, cy, n0, n1, nc, nl, nx, ny)')
        data = [
            ('foo', 'quagga', 'x', 'y', 0, -1, +1, 1, 0, 13),
            ('bar', 'eland', 'x', 'y', 87, -2, -1, 2, 0, 13),
            ('baz', 'caribou', 'x', 'y', 92.1, -3, +1, 3, 0, 13),
        ] * 10
        for i, row in enumerate(data):
            row = (i + 1,) + row
            bdb.sql_execute('INSERT INTO t VALUES (?,?,?,?,?,?,?,?,?,?,?)',
                row)
        bdb.execute('''
            CREATE POPULATION p FOR t (
                id IGNORE;
                c0 NOMINAL;
                c1 NOMINAL;
                cx NOMINAL;
                cy NOMINAL;
                n0 NUMERICAL;
                n1 NUMERICAL;
                nc NUMERICAL;
                nl NUMERICAL;
                nx NUMERICAL;
                ny NUMERICAL
            )
        ''')
        result = bdb.execute('ESTIMATE CORRELATION, CORRELATION PVALUE'
            ' FROM PAIRWISE COLUMNS OF p'
            ' WHERE name0 < name1'
            ' ORDER BY name0, name1').fetchall()
        expected = [
                (2, 'c0', 'c1', 1., 2.900863120340436e-12),
                (2, 'c0', 'cx', None, None),
                (2, 'c0', 'cy', None, None),
                (2, 'c0', 'n0', 1., 0.),
                (2, 'c0', 'n1', 1., 0.),
                (2, 'c0', 'nc', 1., 0.),
                (2, 'c0', 'nl', 1., 0.),
                (2, 'c0', 'nx', None, None),
                (2, 'c0', 'ny', None, None),
                (2, 'c1', 'cx', None, None),
                (2, 'c1', 'cy', None, None),
                (2, 'c1', 'n0', 1., 0.),
                (2, 'c1', 'n1', 1., 0.),
                (2, 'c1', 'nc', 1., 0.),
                (2, 'c1', 'nl', 1., 0.),
                (2, 'c1', 'nx', None, None),
                (2, 'c1', 'ny', None, None),
                (2, 'cx', 'cy', None, None),
                (2, 'cx', 'n0', None, None),
                (2, 'cx', 'n1', None, None),
                (2, 'cx', 'nc', None, None),
                (2, 'cx', 'nl', None, None),
                (2, 'cx', 'nx', None, None),
                (2, 'cx', 'ny', None, None),
                (2, 'cy', 'n0', None, None),
                (2, 'cy', 'n1', None, None),
                (2, 'cy', 'nc', None, None),
                (2, 'cy', 'nl', None, None),
                (2, 'cy', 'nx', None, None),
                (2, 'cy', 'ny', None, None),
                (2, 'n0', 'n1', 0.7913965673596881, 0.),
                (2, 'n0', 'nc', 0.20860343264031175, 0.0111758925135),
                (2, 'n0', 'nl', 0.7913965673596881, 0.),
                (2, 'n0', 'nx', None, None),
                (2, 'n0', 'ny', None, None),
                (2, 'n1', 'nc', 0., 1.),
                (2, 'n1', 'nl', 1., 0.),
                (2, 'n1', 'nx', None, None),
                (2, 'n1', 'ny', None, None),
                (2, 'nc', 'nl', 0., 1.),
                (2, 'nc', 'nx', None, None),
                (2, 'nc', 'ny', None, None),
                (2, 'nl', 'nx', None, None),
                (2, 'nl', 'ny', None, None),
                (2, 'nx', 'ny', None, None),
            ]
    for expected_item, observed_item in zip(expected, result):
        (xpd_genid, xpd_name0, xpd_name1, xpd_corr, xpd_corr_p) = expected_item
        (obs_genid, obs_name0, obs_name1, obs_corr, obs_corr_p) = observed_item
        assert xpd_genid == obs_genid
        assert xpd_name0 == obs_name0
        assert xpd_name1 == obs_name1
        assert xpd_corr == obs_corr or relerr(xpd_corr, obs_corr) < 1e-10
        assert (xpd_corr == obs_corr
            or abserr(xpd_corr_p, obs_corr_p) < 1e-10
            or relerr(xpd_corr_p, obs_corr_p) < 1e-1)