예제 #1
0
def test_schema_compatible():
    for i, old_version in enumerate(USABLE_VERSIONS[:-1]):
        for new_version in USABLE_VERSIONS[i+1:]:
            with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
                with bayesdb_open(pathname=f.name, version=old_version) as bdb:
                    for same_or_older_version in USABLE_VERSIONS[:i+1]:
                        bayesdb_schema_required(
                            bdb, same_or_older_version,
                            'has%s needs% ok' % (old_version,
                                                 same_or_older_version))
                    msg = 'has%s needs%s should fail' % (
                        old_version, new_version)
                    try:
                        with pytest.raises(BayesDBException):
                            bayesdb_schema_required(bdb, new_version, msg)
                    except:
                        print msg
                        raise
                # Now open it in compatible mode. Nothing should change.
                with bayesdb_open(pathname=f.name, compatible=True) as bdb:
                    bayesdb_schema_required(bdb, old_version,
                                            'opened compatible, old still ok')
                    with pytest.raises(BayesDBException):
                        bayesdb_schema_required(
                            bdb, new_version,
                            'opened compatible, needs%s still fails' % (
                                new_version,))
                    # Now explicitly upgrade. Then everything should be okay.
                    bayesdb_upgrade_schema(bdb)
                with bayesdb_open(pathname=f.name, compatible=True) as bdb:
                    for v in USABLE_VERSIONS:
                        bayesdb_schema_required(
                            bdb, v, 'after explicit upgrade, needs%s ok' % (v,))
예제 #2
0
def test_schema_upgrade_on_open():
    for old_version in USABLE_VERSIONS[:-1]:
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
            try:
                with bayesdb_open(pathname=f.name,
                                  version=old_version, compatible=True) as bdb:
                    for needs_version in USABLE_VERSIONS:
                        case = 'has%s needs%s' % (old_version, needs_version)
                        if needs_version <= old_version:
                            bayesdb_schema_required(
                                bdb, needs_version, case + ' ok')
                        else:
                            try:
                                with pytest.raises(BayesDBException):
                                    bayesdb_schema_required(
                                        bdb, needs_version, case + ' fail')
                            except:
                                print case, "should fail"
                                raise
                    test_core.t1_schema(bdb)
                    test_core.t1_data(bdb)
                with bayesdb_open(pathname=f.name, compatible=False) as bdb:
                    for needs_version in USABLE_VERSIONS:
                        bayesdb_schema_required(
                            bdb, needs_version,
                            'needs%s after upgrade' % (needs_version,))
                    with pytest.raises(BayesDBException):
                        # Nobody'll ever bump the schema version this many
                        # times, right?
                        bayesdb_schema_required(bdb, 1000000, 'a gazillion')
            except:
                print "old_version =", old_version, "file =", f.name
                raise
예제 #3
0
def test_schema_compatible():
    for i, old_version in enumerate(USABLE_VERSIONS[:-1]):
        for new_version in USABLE_VERSIONS[i + 1:]:
            with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
                with bayesdb_open(pathname=f.name, version=old_version) as bdb:
                    for same_or_older_version in USABLE_VERSIONS[:i + 1]:
                        bayesdb_schema_required(
                            bdb, same_or_older_version, 'has%s needs% ok' %
                            (old_version, same_or_older_version))
                    msg = 'has%s needs%s should fail' % (old_version,
                                                         new_version)
                    try:
                        with pytest.raises(BayesDBException):
                            bayesdb_schema_required(bdb, new_version, msg)
                    except:
                        print msg
                        raise
                # Now open it in compatible mode. Nothing should change.
                with bayesdb_open(pathname=f.name, compatible=True) as bdb:
                    bayesdb_schema_required(bdb, old_version,
                                            'opened compatible, old still ok')
                    with pytest.raises(BayesDBException):
                        bayesdb_schema_required(
                            bdb, new_version,
                            'opened compatible, needs%s still fails' %
                            (new_version, ))
                    # Now explicitly upgrade. Then everything should be okay.
                    bayesdb_upgrade_schema(bdb)
                with bayesdb_open(pathname=f.name, compatible=True) as bdb:
                    for v in USABLE_VERSIONS:
                        bayesdb_schema_required(
                            bdb, v,
                            'after explicit upgrade, needs%s ok' % (v, ))
예제 #4
0
def test_schema_upgrade_on_open():
    for old_version in USABLE_VERSIONS[:-1]:
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
            try:
                with bayesdb_open(pathname=f.name,
                                  version=old_version,
                                  compatible=True) as bdb:
                    for needs_version in USABLE_VERSIONS:
                        case = 'has%s needs%s' % (old_version, needs_version)
                        if needs_version <= old_version:
                            bayesdb_schema_required(bdb, needs_version,
                                                    case + ' ok')
                        else:
                            try:
                                with pytest.raises(BayesDBException):
                                    bayesdb_schema_required(
                                        bdb, needs_version, case + ' fail')
                            except:
                                print case, "should fail"
                                raise
                    test_core.t1_schema(bdb)
                    test_core.t1_data(bdb)
                with bayesdb_open(pathname=f.name, compatible=False) as bdb:
                    for needs_version in USABLE_VERSIONS:
                        bayesdb_schema_required(
                            bdb, needs_version,
                            'needs%s after upgrade' % (needs_version, ))
                    with pytest.raises(BayesDBException):
                        # Nobody'll ever bump the schema version this many
                        # times, right?
                        bayesdb_schema_required(bdb, 1000000, 'a gazillion')
            except:
예제 #5
0
def test_example(persist, exname):
    if persist:
        with tempfile.NamedTemporaryFile(prefix="bayeslite") as f:
            with bayeslite.bayesdb_open(pathname=f.name, builtin_metamodels=False) as bdb:
                _test_example(bdb, exname)
            with bayeslite.bayesdb_open(pathname=f.name, builtin_metamodels=False) as bdb:
                _retest_example(bdb, exname)
    else:
        with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
            _test_example(bdb, exname)
예제 #6
0
def test_example(persist, exname):
    if persist:
        with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
            with bayeslite.bayesdb_open(pathname=f.name,
                                        builtin_backends=False) as bdb:
                _test_example(bdb, exname)
            with bayeslite.bayesdb_open(pathname=f.name,
                                        builtin_backends=False) as bdb:
                _retest_example(bdb, exname)
    else:
        with bayeslite.bayesdb_open(builtin_backends=False) as bdb:
            _test_example(bdb, exname)
예제 #7
0
def test_schema_incompatible():
    with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
        with bayesdb_open(pathname=f.name, version=6) as bdb:
            bayesdb_schema_required(bdb, 6, 'test incompatible 0/6')
            with pytest.raises(BayesDBException):
                bayesdb_schema_required(bdb, 7, 'test incompatible 0/7')
        with bayesdb_open(pathname=f.name) as bdb:
            bayesdb_schema_required(bdb, 6, 'test incompatible 1/6')
            bayesdb_schema_required(bdb, 7, 'test incompatible 1/7')
            bayesdb_upgrade_schema(bdb)
        with bayesdb_open(pathname=f.name) as bdb:
            bayesdb_schema_required(bdb, 6, 'test incompatible 2/6')
            bayesdb_schema_required(bdb, 7, 'test incompatible 2/7')
예제 #8
0
def test_schema_incompatible():
    with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
        with bayesdb_open(pathname=f.name, version=6) as bdb:
            bayesdb_schema_required(bdb, 6, 'test incompatible 0/6')
            with pytest.raises(BayesDBException):
                bayesdb_schema_required(bdb, 7, 'test incompatible 0/7')
        with bayesdb_open(pathname=f.name) as bdb:
            bayesdb_schema_required(bdb, 6, 'test incompatible 1/6')
            bayesdb_schema_required(bdb, 7, 'test incompatible 1/7')
            bayesdb_upgrade_schema(bdb)
        with bayesdb_open(pathname=f.name) as bdb:
            bayesdb_schema_required(bdb, 6, 'test incompatible 2/6')
            bayesdb_schema_required(bdb, 7, 'test incompatible 2/7')
예제 #9
0
def test_mutinf__ci_slow(seed):
    with bayesdb_open(':memory:', seed=seed) as bdb:
        npr = bdb.np_prng
        bdb.sql_execute('create table t(x, y, z)')
        D0_XY = npr.multivariate_normal([10,10], [[0,1],[2,0]], size=50)
        D1_XY = npr.multivariate_normal([0,0], [[0,-1],[2,0]], size=50)
        D_XY = np.concatenate([D0_XY, D1_XY])
        D_Z = npr.multivariate_normal([5], [[0.5]], size=100)
        D = np.hstack([D_XY, D_Z])
        for d in D:
            bdb.sql_execute('INSERT INTO t VALUES(?,?,?)', d)
        bdb.execute(
            'create population p for t(x numerical; y numerical; z numerical)')
        bdb.execute('create generator m for p')
        bdb.execute('initialize 10 models for m')
        bdb.execute('analyze m for 10 iterations (optimized; quiet)')
        vars_by_mutinf = bdb.execute('''
            estimate * from variables of p
                order by probability of (mutual information with x > 0.1) desc
        ''').fetchall()
        vars_by_depprob = bdb.execute('''
            estimate * from variables of p
                order by dependence probability with x desc
        ''').fetchall()
        assert vars_by_mutinf == [('x',), ('y',), ('z',)]
        assert vars_by_depprob == [('x',), ('y',), ('z',)]
예제 #10
0
def test_get_metadata():
    table_name = "tmp_table"
    generator_name = "tmp_cc"
    pandas_df = get_test_df()

    import os

    os.environ["BAYESDB_WIZARD_MODE"] = "1"
    with bayeslite.bayesdb_open() as bdb:
        bayesdb_read_pandas_df(bdb, table_name, pandas_df, create=True)
        bdb.execute(
            """
            create generator {} for {} using crosscat(guess(*))
        """.format(
                generator_name, table_name
            )
        )
        with pytest.raises(BLE):
            md = crosscat_utils.get_metadata(bdb, generator_name, 0)

        bdb.execute("INITIALIZE 2 MODELS FOR {}".format(generator_name))

        with pytest.raises(ValueError):  # XXX from BayesLite: should be a BLE?
            crosscat_utils.get_metadata(bdb, "Peter_Gabriel", 0)
        md = crosscat_utils.get_metadata(bdb, generator_name, 0)

        assert isinstance(md, dict)
        assert "X_D" in md.keys()
        assert "X_L" in md.keys()
예제 #11
0
def test_simulate_drawconstraint_error__ci_slow():
    with bayeslite.bayesdb_open() as bdb:
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bdb.backends['cgpm'].set_multiprocess(False)
        bayesdb_guess_population(bdb,
                                 'hospital',
                                 'dha',
                                 overrides=[('name', 'key')])
        bdb.execute('CREATE GENERATOR hospital_cc FOR hospital USING cgpm;')
        bdb.execute('INITIALIZE 1 MODEL FOR hospital_cc')
        bdb.execute('ANALYZE hospital_cc FOR 1 ITERATION (OPTIMIZED);')
        with pytest.raises(ValueError):
            # Raises a ValueError since the condition variables and query
            # variables both ttl_mdcr_spnd. ValueError is returned since the
            # CGPM runtime, not cgpm_backend, captures the error.
            bdb.execute('''
                SIMULATE ttl_mdcr_spnd, n_death_ill FROM hospital
                    GIVEN ttl_mdcr_spnd = 40000
                    LIMIT 100
            ''').fetchall()
        samples = bdb.execute('''
            SIMULATE n_death_ill FROM hospital
                GIVEN ttl_mdcr_spnd = 40000
                LIMIT 100
        ''').fetchall()
        assert len(samples) == 100
        assert all(len(s) == 1 for s in samples)
예제 #12
0
def test_register():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Check if globally registered.
    try:
        bdb.sql_execute('''
            SELECT * FROM bayesdb_metamodel WHERE name={}
        '''.format(quote(composer.name()))).next()
    except StopIteration:
        pytest.fail('Composer not registered in bayesdb_metamodel.')
    # Check all tables/triggers.
    schema = [('table', 'bayesdb_composer_cc_id'),
              ('table', 'bayesdb_composer_column_owner'),
              ('table', 'bayesdb_composer_column_toposort'),
              ('trigger', 'bayesdb_composer_column_toposort_check'),
              ('table', 'bayesdb_composer_column_parents'),
              ('table', 'bayesdb_composer_column_foreign_predictor'),
              ('trigger', 'bayesdb_composer_column_foreign_predictor_check')]
    for kind, name in schema:
        try:
            bdb.sql_execute('''
                SELECT * FROM sqlite_master WHERE type={} AND name={}
            '''.format(quote(kind), quote(name))).next()
        except StopIteration:
            pytest.fail('Missing from Composer schema: {}'.format(
                (kind, name)))
    bdb.close()
예제 #13
0
def _query_into_queue(query_string, queue, bdb_file):
    """
    Estimate pairwise similarity of a certain subset of the bdb according to
    query_string; place it in the multiprocessing Manager.Queue().

    For two technical reasons, this function is defined as a toplevel class and
    independently creates a bdb handle:

    1) Multiprocessing workers must be pickleable, and thus must be
       declared as toplevel functions;
    2) Multiple threads cannot access the same bdb handle, lest concurrency
       issues arise with corrupt data.

    Parameters
    ----------
    query_string : str
        Name of the query to execute, determined by estimate_similarity_mp.
    queue : multiprocessing.Manager.Queue
        Queue to place results into
    bdb_file : str
        File location of the BayesDB database. This function will
        independently open a new BayesDB handler.
    """
    bdb = bayesdb_open(pathname=bdb_file)
    res = bdb.execute(query_string)
    queue.put(cursor_to_df(res))
예제 #14
0
def cgpm_smoke_bdb():
    with bayesdb_open(':memory:', builtin_metamodels=False) as bdb:
        registry = {
            'piecewise': PieceWise,
        }
        bayesdb_register_metamodel(bdb, CGPM_Metamodel(registry,
                                                       multiprocess=0))

        bdb.sql_execute('CREATE TABLE t (Output, cat, Input)')
        for i in xrange(3):
            for j in xrange(3):
                for k in xrange(3):
                    output = i + j / (k + 1)
                    cat = -1 if (i + j * k) % 2 else +1
                    input = (i * j - k)**2
                    if i % 2:
                        output = None
                    if j % 2:
                        cat = None
                    if k % 2:
                        input = None
                    bdb.sql_execute(
                        '''
                        INSERT INTO t (output, cat, input) VALUES (?, ?, ?)
                    ''', (output, cat, input))

        bdb.execute('''
            CREATE POPULATION p FOR t WITH SCHEMA(
                MODEL output, input AS NUMERICAL;
                MODEL cat AS CATEGORICAL
            )
        ''')

        yield bdb
예제 #15
0
def test_guess_generator():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    bdb.sql_execute('CREATE TABLE t(x NUMERIC, y NUMERIC, z NUMERIC)')
    a_z = range(ord('a'), ord('z') + 1)
    aa_zz = ((c, d) for c in a_z for d in a_z)
    data = ((chr(c) + chr(d), (c + d) % 2, math.sqrt(c + d)) for c, d in aa_zz)
    for row in data:
        bdb.sql_execute('INSERT INTO t (x, y, z) VALUES (?, ?, ?)', row)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        # No modelled columns.  (x is key.)
        bayesdb_guess_generator(bdb,
                                't_cc',
                                't',
                                'crosscat',
                                overrides=[('y', 'ignore'), ('z', 'ignore')])
    bayesdb_guess_generator(bdb, 't_cc', 't', 'crosscat')
    with pytest.raises(ValueError):
        # Generator already exists.
        bayesdb_guess_generator(bdb, 't_cc', 't', 'crosscat')
    assert bdb.sql_execute('SELECT *'
                           ' FROM bayesdb_generator_column').fetchall() == [
                               (1, 1, 'categorical'),
                               (1, 2, 'numerical'),
                           ]
예제 #16
0
def test_subsample():
    with bayeslite.bayesdb_open(builtin_backends=False) as bdb:
        backend = CGPM_Backend(cgpm_registry={}, multiprocess=False)
        bayeslite.bayesdb_register_backend(bdb, backend)
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bayesdb_guess_population(bdb, 'hospitals_full', 'dha',
            overrides=[('name', 'key')])
        bayesdb_guess_population(bdb, 'hospitals_sub', 'dha',
            overrides=[('name', 'key')])
        bdb.execute('''
            CREATE GENERATOR hosp_full_cc FOR hospitals_full USING cgpm;
        ''')
        bdb.execute('''
            CREATE GENERATOR hosp_sub_cc FOR hospitals_sub USING cgpm(
                SUBSAMPLE 100
            )
        ''')
        bdb.execute('INITIALIZE 1 MODEL FOR hosp_sub_cc')
        bdb.execute('ANALYZE hosp_sub_cc FOR 1 ITERATION (OPTIMIZED)')
        bdb.execute('''
            ESTIMATE SIMILARITY TO (_rowid_=2) IN THE CONTEXT OF PNEUM_SCORE
            FROM hospitals_sub WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE SIMILARITY TO (_rowid_=102) IN THE CONTEXT OF
            N_DEATH_ILL FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc
            FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE SIMILARITY IN THE CONTEXT OF PNEUM_SCORE
            FROM PAIRWISE hospitals_sub
            WHERE (r0._rowid_ = 1 OR r0._rowid_ = 101) AND
            (r1._rowid_ = 1 OR r1._rowid_ = 101)
        ''').fetchall()
        bdb.execute('''
            INFER mdcr_spnd_amblnc FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        sql = '''
            SELECT table_rowid FROM bayesdb_cgpm_individual
                WHERE generator_id = ?
                ORDER BY cgpm_rowid ASC
                LIMIT 100
        '''
        gid_full = bayesdb_get_generator(bdb, None, 'hosp_full_cc')
        cursor = bdb.sql_execute(sql, (gid_full,))
        assert [row[0] for row in cursor] == range(1, 100 + 1)
        gid = bayesdb_get_generator(bdb, None, 'hosp_sub_cc')
        cursor = bdb.sql_execute(sql, (gid,))
        assert [row[0] for row in cursor] != range(1, 100 + 1)
        bdb.execute('DROP GENERATOR hosp_sub_cc')
        bdb.execute('DROP GENERATOR hosp_full_cc')
        bdb.execute('DROP POPULATION hospitals_sub')
        bdb.execute('DROP POPULATION hospitals_full')
예제 #17
0
def test_conditional_probability_pathologies():
    data = [
        ['x', 'a'], ['x', 'a'], ['x', 'a'],
        ['y', 'b'], ['y', 'b'], ['y', 'b'],
    ]
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('create table t(foo, bar)')
        for row in data:
            bdb.sql_execute('insert into t values (?, ?)', row)
        bdb.execute('''
            create generator t_cc for t using crosscat(
                foo categorical,
                bar categorical
            )
        ''')
        bdb.execute('initialize 1 models for t_cc')
        bdb.execute('analyze t_cc for 1 iterations wait')
        assert bdb.execute('''
            estimate probability of foo = 'x' by t_cc
        ''').fetchvalue() < 1
        assert bdb.execute('''
            estimate probability of foo = 'x' given (foo = 'x') by t_cc
        ''').fetchvalue() == 1
        assert bdb.execute('''
            estimate probability of value 'x' given (foo = 'x')
                from columns of t_cc
                where c.name = 'foo'
        ''').fetchvalue() == 1
        assert bdb.execute('''
            estimate probability of foo = 'x' given (foo = 'y') by t_cc
        ''').fetchvalue() == 0
def test_mix_ratio(seed):
    means = ((0,20), (20,0))
    sample_size = 100
    mix_ratio = [0.7, 0.3]
    table = 'data'

    with bayeslite.bayesdb_open(seed=seed) as bdb:
        sample_gaussians = axis_aligned_gaussians(means, sample_size, bdb._np_prng)
        samples = mix(sample_gaussians, mix_ratio, bdb._np_prng)
        register_loom(bdb)
        prepare_bdb(bdb, samples, table)

        cursor = bdb.execute('''
            SIMULATE "0", "1" FROM data LIMIT ?
        ''', (sample_size,))
        simulated_samples = [sample for sample in cursor]

    counts = collections.Counter(
        (0 if distance((x,y), means[0]) < distance((x,y), means[1]) else 1
            for x, y in simulated_samples))
    simulated_mix_ratio = [counts[key] / float(len(simulated_samples))
        for key in counts]

    for i in xrange(len(means)):
        difference = abs(mix_ratio[i] - simulated_mix_ratio[i])
        assert difference < 0.1
예제 #19
0
def test_table_from_url():
    with bayeslite.bayesdb_open(pathname=db_pathname) as bdb:
        table_from_url(bdb, table_name, url)
        result = bdb.execute('SELECT * FROM testTable LIMIT 3;')
        for i, x in enumerate(result):
            assert str(x) == output1[i]
        bdb.execute("DROP TABLE IF EXISTS " + table_name)
예제 #20
0
def _query_into_queue(query_string, params, queue, bdb_file):
    """
    Estimate pairwise similarity of a certain subset of the bdb according to
    query_string; place it in the multiprocessing Manager.Queue().

    For two technical reasons, this function is defined as a toplevel class and
    independently creates a bdb handle:

    1) Multiprocessing workers must be pickleable, and thus must be
       declared as toplevel functions;
    2) Multiple threads cannot access the same bdb handle, lest concurrency
       issues arise with corrupt data.

    Parameters
    ----------
    query_string : str
        Name of the query to execute, determined by estimate_similarity_mp.
    queue : multiprocessing.Manager.Queue
        Queue to place results into
    bdb_file : str
        File location of the BayesDB database. This function will
        independently open a new BayesDB handler.
    """
    bdb = bayesdb_open(pathname=bdb_file)
    res = bdb.execute(query_string, params)
    queue.put(cursor_to_df(res))
예제 #21
0
def test_impossible_duplicate_dependency():
    # Throw exception when two columns X and Y are both dependent and
    # independent.

    data = [(0, 1, 0, 0), (1, 0, 0, 1)]

    # Create the database.
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)

        # Read the dataset.
        bdb.sql_execute('CREATE TABLE foo(id,a,b,c)')
        for row in data:
            bdb.sql_execute('INSERT INTO foo VALUES(?,?,?,?)', row)

        # Create schema, we will force DEP(a c) and IND(a c).
        bql = '''
            CREATE GENERATOR bar FOR foo USING crosscat(
                GUESS(*),
                id IGNORE,
                a CATEGORICAL,
                b CATEGORICAL,
                c CATEGORICAL,
                INDEPENDENT(a,b,c),
                DEPENDENT(a,c),
            );
        '''

        # An error should be thrown about impossible schema.
        with pytest.raises(bayeslite.BQLError):
            bdb.execute(bql)
예제 #22
0
def test_nullify():
    with bayesdb_open(':memory:') as bdb:
        bdb.sql_execute('create table t(x,y)')
        for row in [
            ['1',''],
            ['nan','foo'],
            ['2','nan'],
            ['2','""'],
            ['', ''],
        ]:
            bdb.sql_execute('insert into t values(?,?)', row)
        assert bdb.execute('select * from t').fetchall() == [
            ('1',''),
            ('nan','foo'),
            ('2','nan'),
            ('2','""'),
            ('', ''),
        ]
        assert bayesdb_nullify(bdb, 't', '') == 3
        assert bdb.execute('select * from t').fetchall() == [
            ('1',None),
            ('nan','foo'),
            ('2','nan'),
            ('2','""'),
            (None, None),
        ]
        assert bayesdb_nullify(bdb, 't', 'nan', columns=['x']) == 1
        assert bdb.execute('select * from t').fetchall() == [
            ('1',None),
            (None,'foo'),
            ('2','nan'),
            ('2','""'),
            (None, None),
        ]
        assert bayesdb_nullify(bdb, 't', 'fnord') == 0
예제 #23
0
def cgpm_dummy_satellites_bdb():
    with bayesdb_open(':memory:', builtin_metamodels=False) as bdb:
        bdb.sql_execute('''
            CREATE TABLE satellites_ucs (
                apogee,
                class_of_orbit,
                country_of_operator,
                launch_mass,
                perigee,
                period
        )''')
        for l, f in [
            ('geo', lambda x, y: x + y**2),
            ('leo', lambda x, y: math.sin(x + y)),
            (None, lambda x, y: x + y**2),
            (None, lambda x, y: math.sin(x + y)),
        ]:
            for x in xrange(5):
                for y in xrange(5):
                    countries = ['US', 'Russia', 'China', 'Bulgaria']
                    country = countries[bdb._np_prng.randint(
                        0, len(countries))]
                    mass = bdb._np_prng.normal(1000, 50)
                    bdb.sql_execute(
                        '''
                        INSERT INTO satellites_ucs
                            (country_of_operator, launch_mass, class_of_orbit,
                                apogee, perigee, period)
                            VALUES (?,?,?,?,?,?)
                    ''', (country, mass, l, x, y, f(x, y)))
        yield bdb
예제 #24
0
def cgpm_smoke_bdb():
    with bayesdb_open(':memory:', builtin_backends=False) as bdb:
        registry = {
            'piecewise': PieceWise,
        }
        bayesdb_register_backend(
            bdb, CGPM_Backend(registry, multiprocess=0))

        bdb.sql_execute('CREATE TABLE t (Output, cat, Input)')
        for i in xrange(3):
            for j in xrange(3):
                for k in xrange(3):
                    output = i + j/(k + 1)
                    cat = -1 if (i + j*k) % 2 else +1
                    input = (i*j - k)**2
                    if i % 2:
                        output = None
                    if j % 2:
                        cat = None
                    if k % 2:
                        input = None
                    bdb.sql_execute('''
                        INSERT INTO t (output, cat, input) VALUES (?, ?, ?)
                    ''', (output, cat, input))

        bdb.execute('''
            CREATE POPULATION p FOR t WITH SCHEMA(
                output  NUMERICAL;
                input   NUMERICAL;
                cat     NOMINAL;
            )
        ''')

        yield bdb
예제 #25
0
def cgpm_dummy_satellites_bdb():
    with bayesdb_open(':memory:', builtin_backends=False) as bdb:
        bdb.sql_execute('''
            CREATE TABLE satellites_ucs (
                apogee,
                class_of_orbit,
                country_of_operator,
                launch_mass,
                perigee,
                period
        )''')
        for l, f in [
            ('geo', lambda x, y: x + y**2),
            ('leo', lambda x, y: math.sin(x + y)),
            (None, lambda x, y: x + y**2),
            (None, lambda x, y: math.sin(x + y)),
        ]:
            for x in xrange(5):
                for y in xrange(5):
                    countries = ['US', 'Russia', 'China', 'Bulgaria']
                    country = countries[bdb._np_prng.randint(0, len(countries))]
                    mass = bdb._np_prng.normal(1000, 50)
                    bdb.sql_execute('''
                        INSERT INTO satellites_ucs
                            (country_of_operator, launch_mass, class_of_orbit,
                                apogee, perigee, period)
                            VALUES (?,?,?,?,?,?)
                    ''', (country, mass, l, x, y, f(x, y)))
        yield bdb
def test_mix_ratio(seed):
    means = ((0, 20), (20, 0))
    sample_size = 100
    mix_ratio = [0.7, 0.3]
    table = 'data'

    with bayeslite.bayesdb_open(seed=seed) as bdb:
        sample_gaussians = axis_aligned_gaussians(means, sample_size,
                                                  bdb._np_prng)
        samples = mix(sample_gaussians, mix_ratio, bdb._np_prng)
        register_loom(bdb)
        prepare_bdb(bdb, samples, table)

        cursor = bdb.execute(
            '''
            SIMULATE "0", "1" FROM data LIMIT ?
        ''', (sample_size, ))
        simulated_samples = [sample for sample in cursor]

    counts = collections.Counter((0 if distance((x, y), means[0]) < distance(
        (x, y), means[1]) else 1 for x, y in simulated_samples))
    simulated_mix_ratio = [
        counts[key] / float(len(simulated_samples)) for key in counts
    ]

    for i in xrange(len(means)):
        difference = abs(mix_ratio[i] - simulated_mix_ratio[i])
        assert difference < 0.1
예제 #27
0
def test_guess_population():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE t(x NUMERIC, y NUMERIC, z NUMERIC)')
        a_z = range(ord('a'), ord('z') + 1)
        aa_zz = ((c, d) for c in a_z for d in a_z)
        data = ((chr(c) + chr(d), (c + d) % 2, math.sqrt(c + d))
                for c, d in aa_zz)
        for row in data:
            bdb.sql_execute('INSERT INTO t (x, y, z) VALUES (?, ?, ?)', row)
        with pytest.raises(ValueError):
            # No modeled columns.  (x is key.)
            bayesdb_guess_population(bdb,
                                     'p',
                                     't',
                                     overrides=[('y', 'ignore'),
                                                ('z', 'ignore')])
        bayesdb_guess_population(bdb, 'p', 't')
        with pytest.raises(ValueError):
            # Population already exists.
            bayesdb_guess_population(bdb, 'p', 't')
        assert bdb.sql_execute(
            'SELECT * FROM bayesdb_variable').fetchall() == [
                (1, None, 1, 'y', 'nominal'),
                (1, None, 2, 'z', 'numerical'),
            ]
예제 #28
0
def bdb_for_checking_cmi(backend, iterations, seed):
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:', seed=seed) as bdb:
            bdb.sql_execute('CREATE TABLE t (a, b, c)')
            for row in generate_v_structured_data(1000, bdb.np_prng):
                bdb.sql_execute(
                    '''
                    INSERT INTO t (a, b, c) VALUES (?, ?, ?)
                ''', row)

            bdb.execute('''
                CREATE POPULATION p FOR t WITH SCHEMA (
                    SET STATTYPES OF a, b, c TO NOMINAL;
                )
            ''')
            if backend == 'loom':
                try:
                    from bayeslite.backends.loom_backend import LoomBackend
                except ImportError:
                    pytest.skip('Failed to import Loom.')
                bayesdb_register_backend(
                    bdb, LoomBackend(loom_store_path=loom_store_path))
                bdb.execute('CREATE GENERATOR m FOR p using loom')
            elif backend == 'cgpm':
                bdb.execute('CREATE GENERATOR m FOR p using cgpm')
                bdb.backends['cgpm'].set_multiprocess('on')
            else:
                raise ValueError('Backend %s unknown' % (backend, ))
            # XXX we may want to downscale this eventually.
            bdb.execute('INITIALIZE 10 MODELS FOR m;')
            bdb.execute('ANALYZE m FOR %d ITERATIONS;' % (iterations, ))
            if backend == 'cgpm':
                bdb.backends['cgpm'].set_multiprocess('off')
            yield bdb
예제 #29
0
def test_hackbackend():
    bdb = bayeslite.bayesdb_open(builtin_backends=False)
    bdb.sql_execute('CREATE TABLE t(a INTEGER, b TEXT)')
    bdb.sql_execute("INSERT INTO t (a, b) VALUES (42, 'fnord')")
    bdb.sql_execute('CREATE TABLE u AS SELECT * FROM t')
    bdb.execute('CREATE POPULATION p FOR t(b IGNORE; a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_cc FOR p USING cgpm;')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog;')
    dotdog_backend = DotdogBackend()
    bayeslite.bayesdb_register_backend(bdb, dotdog_backend)
    bayeslite.bayesdb_deregister_backend(bdb, dotdog_backend)
    bayeslite.bayesdb_register_backend(bdb, dotdog_backend)
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_cc FOR p USING cgpm;')
    bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_cc FOR p USING cgpm;')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog(a NUMERICAL)')
    # XXX Rest of test originally exercised default backend, but
    # syntax doesn't support that now.  Not clear that's wrong either.
    bdb.execute('CREATE GENERATOR q_dd FOR p USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR q_dd FOR p USING dotdog(a NUMERICAL)')
예제 #30
0
def test_loom_guess_schema_nominal():
    """Test to make sure that LoomBackend handles the case where the user
    provides a nominal variable with more than 256 distinct values. In this
    case, Loom automatically specifies the unbounded_nominal type.
    """
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('create table t (v)')
            vals_to_insert = []
            for i in xrange(300):
                word = ""
                for _j in xrange(20):
                    letter_index = bdb._prng.weakrandom_uniform(
                        len(string.letters))
                    word += string.letters[letter_index]
                vals_to_insert.append(word)
            for i in xrange(len(vals_to_insert)):
                bdb.sql_execute(
                    '''
                    insert into t (v) values (?)
                ''', (vals_to_insert[i], ))

            bdb.execute('create population p for t (v nominal)')
            bdb.execute('create generator g for p using loom')
            bdb.execute('initialize 1 model for g')
            bdb.execute('analyze g for 50 iterations')
            bdb.execute('drop models from g')
            bdb.execute('drop generator g')
            bdb.execute('drop population p')
            bdb.execute('drop table t')
예제 #31
0
def test_nig_normal_latent_numbering():
    with bayesdb_open(':memory:') as bdb:
        bayesdb_register_metamodel(bdb, NIGNormalMetamodel())
        bdb.sql_execute('create table t(id integer primary key, x, y)')
        for x in xrange(100):
            bdb.sql_execute('insert into t(x, y) values(?, ?)',
                            (x, x * x - 100))
        bdb.execute('''
            create population p for t(id ignore; model x,y as numerical)
        ''')
        assert core.bayesdb_has_population(bdb, 'p')
        pid = core.bayesdb_get_population(bdb, 'p')
        assert core.bayesdb_variable_numbers(bdb, pid, None) == [1, 2]

        bdb.execute('create generator g0 for p using nig_normal')
        bdb.execute('''
            create generator g1 for p using nig_normal(xe deviation(x))
        ''')

        assert core.bayesdb_has_generator(bdb, pid, 'g0')
        g0 = core.bayesdb_get_generator(bdb, pid, 'g0')
        assert core.bayesdb_has_generator(bdb, pid, 'g1')
        g1 = core.bayesdb_get_generator(bdb, pid, 'g1')
        assert core.bayesdb_variable_numbers(bdb, pid, None) == [1, 2]
        assert core.bayesdb_variable_numbers(bdb, pid, g0) == [1, 2]
        assert core.bayesdb_generator_column_numbers(bdb, g0) == [1, 2]
        assert core.bayesdb_variable_numbers(bdb, pid, g1) == [-1, 1, 2]
        assert core.bayesdb_generator_column_numbers(bdb, g1) == [-1, 1, 2]
예제 #32
0
def test_conditional_probability_pathologies():
    data = [
        ['x', 'a'],
        ['x', 'a'],
        ['x', 'a'],
        ['y', 'b'],
        ['y', 'b'],
        ['y', 'b'],
    ]
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('create table t(foo, bar)')
        for row in data:
            bdb.sql_execute('insert into t values (?, ?)', row)
        bdb.execute('''
            create population p for t (
                model foo, bar as categorical
            )
        ''')
        bdb.execute('create generator p_cc for p using crosscat()')
        bdb.execute('initialize 1 models for p_cc')
        bdb.execute('analyze p_cc for 1 iterations wait')
        assert bdb.execute('''
            estimate probability of foo = 'x' by p
        ''').fetchvalue() < 1
        assert bdb.execute('''
            estimate probability of foo = 'x' given (foo = 'x') by p
        ''').fetchvalue() == 1
        assert bdb.execute('''
            estimate probability of value 'x' given (foo = 'x')
                from columns of p
                where c.name = 'foo'
        ''').fetchvalue() == 1
        assert bdb.execute('''
            estimate probability of foo = 'x' given (foo = 'y') by p
        ''').fetchvalue() == 0
예제 #33
0
def test_hackmetamodel():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    bdb.sql_execute('CREATE TABLE t(a INTEGER, b TEXT)')
    bdb.sql_execute("INSERT INTO t (a, b) VALUES (42, 'fnord')")
    bdb.sql_execute('CREATE TABLE u AS SELECT * FROM t')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_cc FOR t USING crosscat(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    dotdog_metamodel = DotdogMetamodel()
    bayeslite.bayesdb_register_metamodel(bdb, dotdog_metamodel)
    bayeslite.bayesdb_deregister_metamodel(bdb, dotdog_metamodel)
    bayeslite.bayesdb_register_metamodel(bdb, dotdog_metamodel)
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_cc FOR t USING crosscat(a NUMERICAL)')
    bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_cc FOR t USING crosscat(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    # XXX Rest of test originally exercised default metamodel, but
    # syntax doesn't support that now.  Not clear that's wrong either.
    bdb.execute('CREATE GENERATOR u_dd FOR u USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR u_dd FOR u USING dotdog(a NUMERICAL)')
예제 #34
0
def test_mutinf__ci_slow(seed):
    with bayesdb_open(':memory:', seed=seed) as bdb:
        npr = bdb.np_prng
        bdb.sql_execute('create table t(x, y, z)')
        D0_XY = npr.multivariate_normal([10, 10], [[0, 1], [2, 0]], size=50)
        D1_XY = npr.multivariate_normal([0, 0], [[0, -1], [2, 0]], size=50)
        D_XY = np.concatenate([D0_XY, D1_XY])
        D_Z = npr.multivariate_normal([5], [[0.5]], size=100)
        D = np.hstack([D_XY, D_Z])
        for d in D:
            bdb.sql_execute('INSERT INTO t VALUES(?,?,?)', d)
        bdb.execute(
            'create population p for t(x numerical; y numerical; z numerical)')
        bdb.execute('create generator m for p')
        bdb.execute('initialize 10 models for m')
        bdb.execute('analyze m for 10 iterations (optimized; quiet)')
        vars_by_mutinf = bdb.execute('''
            estimate * from variables of p
                order by probability of (mutual information with x > 0.1) desc
        ''').fetchall()
        vars_by_depprob = bdb.execute('''
            estimate * from variables of p
                order by dependence probability with x desc
        ''').fetchall()
        assert vars_by_mutinf == [('x', ), ('y', ), ('z', )]
        assert vars_by_depprob == [('x', ), ('y', ), ('z', )]
예제 #35
0
 def initialize(self):
   if self.bdb:
     self.check_representation()
     return
   self.bdb = bayeslite.bayesdb_open(self.bdb_path)
   if not bayeslite.core.bayesdb_has_table(self.bdb, self.name):
     if self.df is not None:
       bayeslite.read_pandas.bayesdb_read_pandas_df(
         self.bdb, self.name, self.df, create=True, ifnotexists=True)
     elif self.csv_path:
       bayeslite.bayesdb_read_csv_file(
         self.bdb, self.name, self.csv_path,
         header=True, create=True, ifnotexists=True)
     else:
       tables = self.list_tables()
       metamodels = self.list_metamodels()
       if len(tables) + len(metamodels) == 0:
         raise BLE(ValueError("No data sources specified, and an empty bdb."))
       else:
         raise BLE(ValueError("The name of the population must be the same"
                              " as a table in the bdb, one of: " +
                              ", ".join(tables) +
                              "\nNote also that the bdb has the following"
                              " metamodels defined: " + ", ".join(metamodels)))
   self.generators = self.query('''SELECT * FROM bayesdb_generator''')
   if len(self.generators) == 0:
     size = self.query('''SELECT COUNT(*) FROM %t''').ix[0, 0]
     assert 0 < size
     self.query('''
       CREATE GENERATOR %g IF NOT EXISTS FOR %t USING crosscat( GUESS(*) )''')
   self.check_representation()
예제 #36
0
def test_impossible_duplicate_dependency():
    # Throw exception when two columns X and Y are both dependent and
    # independent.

    data = [(0, 1, 0, 0), (1, 0, 0, 1)]

    # Create the database.
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        ccme = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, ccme)

        # Read the dataset.
        bdb.sql_execute('CREATE TABLE foo(id,a,b,c)')
        for row in data:
            bdb.sql_execute('INSERT INTO foo VALUES(?,?,?,?)', row)

        # Create schema, we will force DEP(a c) and IND(a c).
        bql = '''
            CREATE GENERATOR bar FOR foo USING crosscat(
                GUESS(*),
                id IGNORE,
                a CATEGORICAL,
                b CATEGORICAL,
                c CATEGORICAL,
                INDEPENDENT(a,b,c),
                DEPENDENT(a,c),
            );
        '''

        # An error should be thrown about impossible schema.
        with pytest.raises(bayeslite.BQLError):
            bdb.execute(bql)
예제 #37
0
def test_simulate_drawconstraint():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        metamodel = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, metamodel)
        with open(dha_csv, "rU") as f:
            read_csv.bayesdb_read_csv(bdb, "dha", f, header=True, create=True)
        bdb.execute(
            """
            CREATE GENERATOR dha_cc FOR dha USING crosscat (
                GUESS(*),
                name KEY
            )
        """
        )
        bdb.execute("INITIALIZE 1 MODEL FOR dha_cc")
        bdb.execute("ANALYZE dha_cc FOR 1 ITERATION WAIT")
        samples = bdb.execute(
            """
            SIMULATE ttl_mdcr_spnd, n_death_ill FROM dha_cc
                GIVEN TTL_MDCR_SPND = 40000
                LIMIT 100
        """
        ).fetchall()
        assert [s[0] for s in samples] == [40000] * 100
예제 #38
0
def test_simulate_drawconstraint_error__ci_slow():
    with bayeslite.bayesdb_open() as bdb:
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bdb.backends['cgpm'].set_multiprocess(False)
        bayesdb_guess_population(
            bdb, 'hospital', 'dha', overrides=[('name', 'key')])
        bdb.execute(
            'CREATE GENERATOR hospital_cc FOR hospital USING cgpm;')
        bdb.execute('INITIALIZE 1 MODEL FOR hospital_cc')
        bdb.execute('ANALYZE hospital_cc FOR 1 ITERATION (OPTIMIZED);')
        with pytest.raises(ValueError):
            # Raises a ValueError since the condition variables and query
            # variables both ttl_mdcr_spnd. ValueError is returned since the
            # CGPM runtime, not cgpm_backend, captures the error.
            bdb.execute('''
                SIMULATE ttl_mdcr_spnd, n_death_ill FROM hospital
                    GIVEN ttl_mdcr_spnd = 40000
                    LIMIT 100
            ''').fetchall()
        samples = bdb.execute('''
            SIMULATE n_death_ill FROM hospital
                GIVEN ttl_mdcr_spnd = 40000
                LIMIT 100
        ''').fetchall()
        assert len(samples) == 100
        assert all(len(s) == 1 for s in samples)
예제 #39
0
def test_math_func_one_param(name, probe):
    # Retrieve result from python.
    python_value_error = None
    python_type_error = None
    try:
        result_python = get_python_math_call(name, probe)
    except ValueError:
        python_value_error = True
    except TypeError:
        python_type_error = True

    # Retrieve result from SQL.
    sql_value_error = None
    sql_type_error = None
    try:
        with bayesdb_open(':memory:') as bdb:
            cursor = bdb.execute(get_sql_math_call(name, probe))
            result_sql = cursor_value(cursor)
    except ValueError:
        sql_value_error = True
    except (TypeError, apsw.SQLError):
        sql_type_error = True

    # Domain error on both.
    if python_value_error or sql_value_error:
        assert python_value_error and sql_value_error
    # Arity error on both.
    elif python_type_error or sql_type_error:
        assert python_type_error and sql_type_error
    # Both invocations succeeded, confirm results match.
    else:
        assert abserr(result_python, result_sql) < 1e-4
def test_simulate_conflict():
    """Cannot override existing value in table using GIVEN in SIMULATE."""
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('''
            CREATE TABLE data (
                "0" NUMERIC PRIMARY KEY,
                "1" NUMERIC
            );
        ''')
        insert_row(bdb, 'data', 1, 1)
        bdb.execute('''
            CREATE POPULATION FOR data WITH SCHEMA (
                "0" NUMERICAL;
                "1" NUMERICAL;
            );
        ''')
        bdb.execute('CREATE GENERATOR FOR data USING cgpm;')
        bdb.execute('INITIALIZE 1 MODELS FOR data;')

        rowid = insert_row(bdb, 'data', 0, None)
        with pytest.raises(bayeslite.BQLError):
            bdb.execute(
                '''
                SIMULATE "0" FROM data GIVEN rowid=?, "0"= 0, "1"=0 LIMIT 1;
            ''', (rowid, ))
예제 #41
0
def test_hackbackend():
    bdb = bayeslite.bayesdb_open(builtin_backends=False)
    bdb.sql_execute('CREATE TABLE t(a INTEGER, b TEXT)')
    bdb.sql_execute("INSERT INTO t (a, b) VALUES (42, 'fnord')")
    bdb.sql_execute('CREATE TABLE u AS SELECT * FROM t')
    bdb.execute('CREATE POPULATION p FOR t(b IGNORE; a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_cc FOR p USING cgpm;')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog;')
    dotdog_backend = DotdogBackend()
    bayeslite.bayesdb_register_backend(bdb, dotdog_backend)
    bayeslite.bayesdb_deregister_backend(bdb, dotdog_backend)
    bayeslite.bayesdb_register_backend(bdb, dotdog_backend)
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_cc FOR p USING cgpm;')
    bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_cc FOR p USING cgpm;')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR p_dd FOR p USING dotdog(a NUMERICAL)')
    # XXX Rest of test originally exercised default backend, but
    # syntax doesn't support that now.  Not clear that's wrong either.
    bdb.execute('CREATE GENERATOR q_dd FOR p USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR q_dd FOR p USING dotdog(a NUMERICAL)')
예제 #42
0
def test_math_func_one_param(name, probe):
    # Retrieve result from python.
    python_value_error = None
    python_type_error = None
    try:
        result_python = get_python_math_call(name, probe)
    except ValueError:
        python_value_error = True
    except TypeError:
        python_type_error = True

    # Retrieve result from SQL.
    sql_value_error = None
    sql_type_error = None
    try:
        with bayesdb_open(':memory:') as bdb:
            cursor = bdb.execute(get_sql_math_call(name, probe))
            result_sql = cursor_value(cursor)
    except ValueError:
        sql_value_error = True
    except (TypeError, apsw.SQLError):
        sql_type_error = True

    # Domain error on both.
    if python_value_error or sql_value_error:
        assert python_value_error and sql_value_error
    # Arity error on both.
    elif python_type_error or sql_type_error:
        assert python_type_error and sql_type_error
    # Both invocations succeeded, confirm results match.
    else:
        assert abserr(result_python, result_sql) < 1e-4
예제 #43
0
def bdb_for_checking_cmi(backend, iterations, seed):
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:', seed=seed) as bdb:
            bdb.sql_execute('CREATE TABLE t (a, b, c)')
            for row in generate_v_structured_data(1000, bdb.np_prng):
                bdb.sql_execute('''
                    INSERT INTO t (a, b, c) VALUES (?, ?, ?)
                ''', row)

            bdb.execute('''
                CREATE POPULATION p FOR t WITH SCHEMA (
                    SET STATTYPES OF a, b, c TO NOMINAL;
                )
            ''')
            if backend == 'loom':
                try:
                    from bayeslite.backends.loom_backend import LoomBackend
                except ImportError:
                    pytest.skip('Failed to import Loom.')
                bayesdb_register_backend(
                    bdb, LoomBackend(loom_store_path=loom_store_path))
                bdb.execute('CREATE GENERATOR m FOR p using loom')
            elif backend == 'cgpm':
                bdb.execute('CREATE GENERATOR m FOR p using cgpm')
                bdb.backends['cgpm'].set_multiprocess('on')
            else:
                raise ValueError('Backend %s unknown' % (backend,))
            # XXX we may want to downscale this eventually.
            bdb.execute('INITIALIZE 10 MODELS FOR m;')
            bdb.execute('ANALYZE m FOR %d ITERATIONS;' % (iterations,))
            if backend == 'cgpm':
                bdb.backends['cgpm'].set_multiprocess('off')
            yield bdb
예제 #44
0
def smoke_loom():
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            try:
                from bayeslite.backends.loom_backend import LoomBackend
            except ImportError:
                pytest.skip('Failed to import Loom.')
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('CREATE TABLE t (a, b, c, d, e)')

            for a, b, c, d, e in itertools.product(*([range(2)] * 4 +
                                                     [['x', 'y']])):
                # XXX Insert synthetic data generator here.
                bdb.sql_execute(
                    '''
                    INSERT INTO t (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)
                ''', (a, b, c, d, e))

            bdb.execute('''
                CREATE POPULATION p FOR t WITH SCHEMA (
                    SET STATTYPES OF a, b, c, d TO NUMERICAL;
                    SET STATTYPES OF e TO NOMINAL
                )
            ''')

            bdb.execute('CREATE GENERATOR m FOR p using loom;')
            bdb.execute('INITIALIZE 1 MODELS FOR m;')

            yield bdb
예제 #45
0
def smoke_loom():
    with tempdir('bayeslite-loom') as loom_store_path:
        with bayesdb_open(':memory:') as bdb:
            try:
                from bayeslite.backends.loom_backend import LoomBackend
            except ImportError:
                pytest.skip('Failed to import Loom.')
            bayesdb_register_backend(
                bdb, LoomBackend(loom_store_path=loom_store_path))
            bdb.sql_execute('CREATE TABLE t (a, b, c, d, e)')

            for a, b, c, d, e in itertools.product(*([range(2)]*4+[['x','y']])):
                # XXX Insert synthetic data generator here.
                bdb.sql_execute('''
                    INSERT INTO t (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)
                ''', (a, b, c, d, e))

            bdb.execute('''
                CREATE POPULATION p FOR t WITH SCHEMA (
                    SET STATTYPES OF a, b, c, d TO NUMERICAL;
                    SET STATTYPES OF e TO NOMINAL
                )
            ''')

            bdb.execute('CREATE GENERATOR m FOR p using loom;')
            bdb.execute('INITIALIZE 1 MODELS FOR m;')

            yield bdb
예제 #46
0
def test_nig_normal_latent_numbering():
    with bayesdb_open(':memory:') as bdb:
        bayesdb_register_backend(bdb, NIGNormalBackend())
        bdb.sql_execute('create table t(id integer primary key, x, y)')
        for x in xrange(100):
            bdb.sql_execute('insert into t(x, y) values(?, ?)', (x, x*x - 100))
        bdb.execute('''
            create population p for t(
                id ignore;
                set stattypes of x,y to numerical;
            )
        ''')
        assert core.bayesdb_has_population(bdb, 'p')
        pid = core.bayesdb_get_population(bdb, 'p')
        assert core.bayesdb_variable_numbers(bdb, pid, None) == [1, 2]

        bdb.execute('create generator g0 for p using nig_normal')
        bdb.execute('''
            create generator g1 for p using nig_normal(xe deviation(x))
        ''')

        assert core.bayesdb_has_generator(bdb, pid, 'g0')
        g0 = core.bayesdb_get_generator(bdb, pid, 'g0')
        assert core.bayesdb_has_generator(bdb, pid, 'g1')
        g1 = core.bayesdb_get_generator(bdb, pid, 'g1')
        assert core.bayesdb_variable_numbers(bdb, pid, None) == [1, 2]
        assert core.bayesdb_variable_numbers(bdb, pid, g0) == [1, 2]
        assert core.bayesdb_variable_numbers(bdb, pid, g1) == [-1, 1, 2]
예제 #47
0
def test_hackmetamodel():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    bdb.sql_execute('CREATE TABLE t(a INTEGER, b TEXT)')
    bdb.sql_execute("INSERT INTO t (a, b) VALUES (42, 'fnord')")
    bdb.sql_execute('CREATE TABLE u AS SELECT * FROM t')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_cc FOR t USING crosscat(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    crosscat = local_crosscat()
    crosscat_metamodel = CrosscatMetamodel(crosscat)
    dotdog_metamodel = DotdogMetamodel()
    bayeslite.bayesdb_register_metamodel(bdb, dotdog_metamodel)
    bayeslite.bayesdb_deregister_metamodel(bdb, dotdog_metamodel)
    bayeslite.bayesdb_register_metamodel(bdb, dotdog_metamodel)
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_cc FOR t USING crosscat(a NUMERICAL)')
    bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_cc FOR t USING crosscat(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR t_dd FOR t USING dotdog(a NUMERICAL)')
    # XXX Rest of test originally exercised default metamodel, but
    # syntax doesn't support that now.  Not clear that's wrong either.
    bdb.execute('CREATE GENERATOR u_dd FOR u USING dotdog(a NUMERICAL)')
    with pytest.raises(bayeslite.BQLError):
        bdb.execute('CREATE GENERATOR u_dd FOR u USING dotdog(a NUMERICAL)')
예제 #48
0
def test_nonintegral_noindex():
    with bayesdb_open() as bdb:
        df = pandas.DataFrame([(1, 2, 'foo'), (4, 5, 6), (7, 8, 9),
                               (10, 11, 12)],
                              index=[42, 78, 62, 43])
        with pytest.raises(ValueError):
            bayesdb_read_pandas_df(bdb, 't', df)
예제 #49
0
def main():
    with bayeslite.bayesdb_open(pathname=db_handle) as bdb:
        with open(csv_handle, 'r') as f:
            table_from_csv(bdb, table_name, f)

        ignore = ["date", "time"]
        numerical = [
            "distance_to_nearest_traffic_light", "speed_limit",
            "estimated_speed_of_collision"
        ]
        nominal = [
            "vehicle_type", "second_vehicle_type", "road_has_pavement",
            "distance_to_nearest_traffic_light", "seat_belt_used",
            "injury_sustained", "lethal", "land_use", "city"
        ]

        schema = \
            "(" + \
            "; ".join("ignore "+i for i in ignore) + "; " + \
            "; ".join(i + " numerical " for i in numerical) + "; " + \
            "; ".join(i + " nominal" for i in nominal) + \
            ")"

        print "Creating population..."
        bdb.execute("CREATE POPULATION FOR " + table_name + " " + schema)
        print "Creating generator..."
        bdb.execute("CREATE GENERATOR crimegen FOR " + table_name)
        print "Initialising model..."
        bdb.execute("INITIALIZE 1 MODEL FOR crimegen")
예제 #50
0
    def do_POST(self):
        self.send_response(200)
	popcolumns = "vehicle_type,second_vehicle_type,road_has_pavement,distance_to_nearest_traffic_tight,speed_limit,estimated_speed_of_collision,seat_belt_used,injury_sustained,lethal,land_use,city"
	sentinel = 0 
        try:
            content_length = int(self.headers['Content-Length'])
            text = self.rfile.read(content_length).decode("utf8")
	    if 'xxxGET POPULATION COLUMNS' in text:
	        sentinel = 1
		print("SET SENTINEL")
            if sentinel == 0:
                db_name, queries = RequestHandler.text_to_queries(text)
        except UnicodeDecodeError:
            self.send_err("Error: Couldn't decode request, make sure it's utf8")
            return
        except AssertionError:
            self.send_err("Error: You must give at least 1 query.")
            return
        
	if sentinel != 1:
	    print("Running queries.")
            results = []
            with bayeslite.bayesdb_open(pathname=db_name) as bdb:
                for query in queries:
                    try:
                        if query[0:3].upper() != "SQL":
                            results.append(conv_cursor_to_json(bdb.execute(query)))
                        else:
                            results.append(conv_cursor_to_json(bdb.sql_execute(query[4:])))
                    except (BQLError, BQLParseError, BayesDBException), e:
                        self.send_err(e)
                        return
예제 #51
0
def test_register():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Check if globally registered.
    try:
        bdb.sql_execute('''
            SELECT * FROM bayesdb_metamodel WHERE name={}
        '''.format(quote(composer.name()))).next()
    except StopIteration:
        pytest.fail('Composer not registered in bayesdb_metamodel.')
    # Check all tables/triggers.
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('trigger', 'bayesdb_composer_column_toposort_check'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
        ('trigger', 'bayesdb_composer_column_foreign_predictor_check')
    ]
    for kind, name in schema:
        try:
            bdb.sql_execute('''
                SELECT * FROM sqlite_master WHERE type={} AND name={}
            '''.format(quote(kind), quote(name))).next()
        except StopIteration:
            pytest.fail('Missing from Composer schema: {}'.format((kind,name)))
    bdb.close()
예제 #52
0
def test_geweke_iid_gaussian():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        bayeslite.bayesdb_register_metamodel(bdb, gauss.StdNormalMetamodel())
        kl_est = geweke.geweke_kl(bdb, "std_normal",
            [['column', 'numerical']], ['column'],
            [(1,0), (2,0)], 2, 2, 2, 2)
        assert kl_est == (2, 0, 0)
예제 #53
0
def smoke_bdb():
    with bayesdb_open(':memory:') as bdb:
        bdb.sql_execute('CREATE TABLE t (a, b, c, d, e)')

        for a, b, c, d, e in itertools.product(*([range(2)] * 4 +
                                                 [['x', 'y']])):
            # XXX Insert synthetic data generator here.
            bdb.sql_execute(
                '''
                INSERT INTO t (a, b, c, d, e) VALUES (?, ?, ?, ?, ?)
            ''', (a, b, c, d, e))

        bdb.execute('''
            CREATE POPULATION p FOR t WITH SCHEMA (
                SET STATTYPES OF a, b, c, d TO NUMERICAL;
                SET STATTYPES OF e TO NOMINAL
            )
        ''')

        bdb.execute('CREATE GENERATOR m1 FOR p;')
        bdb.execute('INITIALIZE 10 MODELS FOR m1;')

        bdb.execute('CREATE GENERATOR m2 FOR p;')
        bdb.execute('INITIALIZE 10 MODELS FOR m2;')
        yield bdb
예제 #54
0
def test_legacy_models_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat',
            dha_models, create=True)
    with open(dha_csv, 'rU') as f:
        read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat',
        dha_models, create=True)
    # Make sure guessing also works.
    bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))')
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook)
    bql = '''
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql, ('Albany NY',)).fetchall() == [
            ('Albany NY',),
            ('Scranton PA',),
            ('United States US',),
            ('Norfolk VA',),
            ('Reading PA',),
            ('Salisbury MD',),
            ('Louisville KY',),
            ('Cleveland OH',),
            ('Covington KY',),
            ('Akron OH',),
        ]
    # Tickles an issue in case-folding of column names.
    bql = '''
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    '''
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ('McAllen TX',),
            ('Worcester MA',),
            ('Beaumont TX',),
            ('Temple TX',),
            ('Corpus Christi TX',),
            ('Takoma Park MD',),
            ('Kingsport TN',),
            ('Bangor ME',),
            ('Lebanon NH',),
            ('Panama City FL',),
        ]
예제 #55
0
def test_engine_stamp_two_clients():
    """Confirm analysis by one worker makes cache in other worker stale."""
    with tempfile.NamedTemporaryFile(prefix='bayeslite') as f:
        with bayeslite.bayesdb_open(f.name) as bdb0:
            bayeslite.bayesdb_read_csv(bdb0,
                                       't',
                                       StringIO(test_csv.csv_data),
                                       header=True,
                                       create=True)
            bdb0.execute('''
                CREATE POPULATION p FOR t (
                    age NUMERICAL;
                    gender CATEGORICAL;
                    salary NUMERICAL;
                    height IGNORE;
                    division CATEGORICAL;
                    rank CATEGORICAL
                )
            ''')

            bdb0.execute('CREATE METAMODEL m FOR p WITH BASELINE crosscat;')
            cgpm_metamodel = bdb0.metamodels['cgpm']
            population_id = bayeslite.core.bayesdb_get_population(bdb0, 'p')
            generator_id = bayeslite.core.bayesdb_get_generator(
                bdb0, population_id, 'm')

            assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 0

            with bayeslite.bayesdb_open(f.name) as bdb1:
                bdb1.execute('INITIALIZE 1 MODEL FOR m')
                assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 1
                assert cgpm_metamodel._engine_stamp(bdb1, generator_id) == 1

            bdb0.execute('ANALYZE m FOR 1 ITERATION WAIT')
            assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 2
            assert cgpm_metamodel._get_cache_entry(bdb0, generator_id,
                                                   'engine') is not None

            with bayeslite.bayesdb_open(f.name) as bdb2:
                bdb2.execute('ANALYZE m FOR 1 ITERATION WAIT')
                assert cgpm_metamodel._engine_stamp(bdb2, generator_id) == 3
                assert cgpm_metamodel._engine_stamp(bdb0, generator_id) == 3

            # Engine in cache of bdb0 should be stale, since bdb2 analyzed.
            assert cgpm_metamodel._engine_latest(bdb0, generator_id) is None
예제 #56
0
def bayesdb(backend=None, **kwargs):
    if backend is None:
        backend = CGPM_Backend(cgpm_registry={}, multiprocess=False)
    bdb = bayeslite.bayesdb_open(builtin_backends=False, **kwargs)
    bayeslite.bayesdb_register_backend(bdb, backend)
    try:
        yield bdb
    finally:
        bdb.close()
예제 #57
0
def test_estimate_pairwise_similarity_long():
    """
    Tests larger queries that need to be broken into batch inserts of 500
    values each, as well as the N parameter.
    """
    os.environ['BAYESDB_WIZARD_MODE'] = '1'

    with tempfile.NamedTemporaryFile(suffix='.bdb') as bdb_file:
        bdb = bayeslite.bayesdb_open(bdb_file.name)
        with tempfile.NamedTemporaryFile() as temp:
            # n = 40 -> 40**2 -> 1600 rows total
            temp.write(_bigger_csv_data(40))
            temp.seek(0)
            bayeslite.bayesdb_read_csv_file(
                bdb, 't', temp.name, header=True, create=True)
        bdb.execute('''
            CREATE GENERATOR t_cc FOR t USING crosscat (
                GUESS(*),
                id IGNORE
            )
        ''')

        bdb.execute('INITIALIZE 3 MODELS FOR t_cc')
        bdb.execute('ANALYZE t_cc MODELS 0-2 FOR 10 ITERATIONS WAIT')

        # test N = 0
        parallel.estimate_pairwise_similarity(
            bdb_file.name, 't', 't_cc', N=0
        )
        assert cursor_to_df(
            bdb.execute('SELECT * FROM t_similarity')
        ).shape == (0, 0)

        # test other values of N
        for N in [1, 2, 10, 20, 40]:
            parallel.estimate_pairwise_similarity(
                bdb_file.name, 't', 't_cc', N=N, overwrite=True
            )
            assert cursor_to_df(
                bdb.execute('SELECT * FROM t_similarity')
            ).shape == (N**2, 3)
        # N too high should fail
        with pytest.raises(BLE):
            parallel.estimate_pairwise_similarity(
                bdb_file.name, 't', 't_cc', N=41, overwrite=True
            )

        parallel_sim = cursor_to_df(
            bdb.execute('SELECT * FROM t_similarity')
        ).sort_values(by=['rowid0', 'rowid1'])
        parallel_sim.index = range(parallel_sim.shape[0])

        std_sim = cursor_to_df(
            bdb.execute('ESTIMATE SIMILARITY FROM PAIRWISE t_cc')
        )

        assert_frame_equal(std_sim, parallel_sim, check_column_type=True)
예제 #58
0
def test_legacy_models__ci_slow():
    bdb = bayeslite.bayesdb_open(builtin_metamodels=False)
    cc = crosscat.LocalEngine.LocalEngine(seed=0)
    metamodel = CrosscatMetamodel(cc)
    bayeslite.bayesdb_register_metamodel(bdb, metamodel)
    with pytest.raises(ValueError):
        bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True)
    with open(dha_csv, "rU") as f:
        read_csv.bayesdb_read_csv(bdb, "dha", f, header=True, create=True)
    bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True)
    # Make sure guessing also works.
    bdb.execute("create generator dha_cc0 for dha using crosscat(guess(*))")
    bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook)
    # Need to be able to overwrite existing codebook.
    #
    # XXX Not sure this is the right API.  What if overwrite is a
    # mistake?
    bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook)
    bql = """
        ESTIMATE name FROM dha_cc
            ORDER BY SIMILARITY TO (name = ?) DESC
            LIMIT 10
    """
    with bdb.savepoint():
        assert bdb.execute(bql, ("Albany NY",)).fetchall() == [
            ("Albany NY",),
            ("Scranton PA",),
            ("United States US",),
            ("Norfolk VA",),
            ("Reading PA",),
            ("Salisbury MD",),
            ("Louisville KY",),
            ("Cleveland OH",),
            ("Covington KY",),
            ("Akron OH",),
        ]
    # Tickles an issue in case-folding of column names.
    bql = """
        ESTIMATE name
            FROM dha_cc
            ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC
            LIMIT 10
    """
    with bdb.savepoint():
        assert bdb.execute(bql).fetchall() == [
            ("McAllen TX",),
            ("Worcester MA",),
            ("Beaumont TX",),
            ("Temple TX",),
            ("Corpus Christi TX",),
            ("Takoma Park MD",),
            ("Kingsport TN",),
            ("Bangor ME",),
            ("Lebanon NH",),
            ("Panama City FL",),
        ]
예제 #59
0
def test_geweke_nig_normal():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        nig = normal.NIGNormalMetamodel(seed=1)
        bayeslite.bayesdb_register_metamodel(bdb, nig)
        (ct, kl, error) = geweke.geweke_kl(bdb, "nig_normal",
            [['column', 'numerical']], ['column'],
            [(1,0), (2,0)], 2, 2, 2, 2)
        assert ct == 2
        assert 0 < kl and kl < 10 # KL should be positive
        assert 0 < error and error < 10 # KL error estimate too