예제 #1
0
def test_subsample():
    with bayeslite.bayesdb_open(builtin_backends=False) as bdb:
        backend = CGPM_Backend(cgpm_registry={}, multiprocess=False)
        bayeslite.bayesdb_register_backend(bdb, backend)
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bayesdb_guess_population(bdb, 'hospitals_full', 'dha',
            overrides=[('name', 'key')])
        bayesdb_guess_population(bdb, 'hospitals_sub', 'dha',
            overrides=[('name', 'key')])
        bdb.execute('''
            CREATE GENERATOR hosp_full_cc FOR hospitals_full USING cgpm;
        ''')
        bdb.execute('''
            CREATE GENERATOR hosp_sub_cc FOR hospitals_sub USING cgpm(
                SUBSAMPLE 100
            )
        ''')
        bdb.execute('INITIALIZE 1 MODEL FOR hosp_sub_cc')
        bdb.execute('ANALYZE hosp_sub_cc FOR 1 ITERATION (OPTIMIZED)')
        bdb.execute('''
            ESTIMATE SIMILARITY TO (_rowid_=2) IN THE CONTEXT OF PNEUM_SCORE
            FROM hospitals_sub WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE SIMILARITY TO (_rowid_=102) IN THE CONTEXT OF
            N_DEATH_ILL FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc
            FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE SIMILARITY IN THE CONTEXT OF PNEUM_SCORE
            FROM PAIRWISE hospitals_sub
            WHERE (r0._rowid_ = 1 OR r0._rowid_ = 101) AND
            (r1._rowid_ = 1 OR r1._rowid_ = 101)
        ''').fetchall()
        bdb.execute('''
            INFER mdcr_spnd_amblnc FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        sql = '''
            SELECT table_rowid FROM bayesdb_cgpm_individual
                WHERE generator_id = ?
                ORDER BY cgpm_rowid ASC
                LIMIT 100
        '''
        gid_full = bayesdb_get_generator(bdb, None, 'hosp_full_cc')
        cursor = bdb.sql_execute(sql, (gid_full,))
        assert [row[0] for row in cursor] == range(1, 100 + 1)
        gid = bayesdb_get_generator(bdb, None, 'hosp_sub_cc')
        cursor = bdb.sql_execute(sql, (gid,))
        assert [row[0] for row in cursor] != range(1, 100 + 1)
        bdb.execute('DROP GENERATOR hosp_sub_cc')
        bdb.execute('DROP GENERATOR hosp_full_cc')
        bdb.execute('DROP POPULATION hospitals_sub')
        bdb.execute('DROP POPULATION hospitals_full')
예제 #2
0
def test_simulate_drawconstraint_error__ci_slow():
    with bayeslite.bayesdb_open() as bdb:
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bdb.backends['cgpm'].set_multiprocess(False)
        bayesdb_guess_population(
            bdb, 'hospital', 'dha', overrides=[('name', 'key')])
        bdb.execute(
            'CREATE GENERATOR hospital_cc FOR hospital USING cgpm;')
        bdb.execute('INITIALIZE 1 MODEL FOR hospital_cc')
        bdb.execute('ANALYZE hospital_cc FOR 1 ITERATION (OPTIMIZED);')
        with pytest.raises(ValueError):
            # Raises a ValueError since the condition variables and query
            # variables both ttl_mdcr_spnd. ValueError is returned since the
            # CGPM runtime, not cgpm_backend, captures the error.
            bdb.execute('''
                SIMULATE ttl_mdcr_spnd, n_death_ill FROM hospital
                    GIVEN ttl_mdcr_spnd = 40000
                    LIMIT 100
            ''').fetchall()
        samples = bdb.execute('''
            SIMULATE n_death_ill FROM hospital
                GIVEN ttl_mdcr_spnd = 40000
                LIMIT 100
        ''').fetchall()
        assert len(samples) == 100
        assert all(len(s) == 1 for s in samples)
예제 #3
0
def test_simulate_drawconstraint_error__ci_slow():
    with bayeslite.bayesdb_open() as bdb:
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bdb.backends['cgpm'].set_multiprocess(False)
        bayesdb_guess_population(bdb,
                                 'hospital',
                                 'dha',
                                 overrides=[('name', 'key')])
        bdb.execute('CREATE GENERATOR hospital_cc FOR hospital USING cgpm;')
        bdb.execute('INITIALIZE 1 MODEL FOR hospital_cc')
        bdb.execute('ANALYZE hospital_cc FOR 1 ITERATION (OPTIMIZED);')
        with pytest.raises(ValueError):
            # Raises a ValueError since the condition variables and query
            # variables both ttl_mdcr_spnd. ValueError is returned since the
            # CGPM runtime, not cgpm_backend, captures the error.
            bdb.execute('''
                SIMULATE ttl_mdcr_spnd, n_death_ill FROM hospital
                    GIVEN ttl_mdcr_spnd = 40000
                    LIMIT 100
            ''').fetchall()
        samples = bdb.execute('''
            SIMULATE n_death_ill FROM hospital
                GIVEN ttl_mdcr_spnd = 40000
                LIMIT 100
        ''').fetchall()
        assert len(samples) == 100
        assert all(len(s) == 1 for s in samples)
예제 #4
0
def test_subsample():
    with bayeslite.bayesdb_open(builtin_metamodels=False) as bdb:
        cc = crosscat.LocalEngine.LocalEngine(seed=0)
        metamodel = CrosscatMetamodel(cc)
        bayeslite.bayesdb_register_metamodel(bdb, metamodel)
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bayesdb_guess_population(bdb,
                                 'hospitals_full',
                                 'dha',
                                 overrides=[('name', 'key')])
        bayesdb_guess_population(bdb,
                                 'hospitals_sub',
                                 'dha',
                                 overrides=[('name', 'key')])
        bdb.execute('''
            CREATE GENERATOR hosp_full_cc FOR hospitals_full USING crosscat (
                SUBSAMPLE(OFF)
            )
        ''')
        bdb.execute('''
            CREATE GENERATOR hosp_sub_cc FOR hospitals_sub USING crosscat (
                SUBSAMPLE(100)
            )
        ''')
        bdb.execute('INITIALIZE 1 MODEL FOR hosp_sub_cc')
        bdb.execute('ANALYZE hosp_sub_cc FOR 1 ITERATION WAIT')
        bdb.execute('ESTIMATE SIMILARITY TO (_rowid_=2) FROM hospitals_sub'
                    ' WHERE _rowid_ = 1 OR _rowid_ = 101').fetchall()
        bdb.execute('ESTIMATE SIMILARITY TO (_rowid_=102) FROM hospitals_sub'
                    ' WHERE _rowid_ = 1 OR _rowid_ = 101').fetchall()
        bdb.execute('ESTIMATE PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc'
                    ' FROM hospitals_sub'
                    ' WHERE _rowid_ = 1 OR _rowid_ = 101').fetchall()
        bdb.execute('ESTIMATE SIMILARITY FROM PAIRWISE hospitals_sub'
                    ' WHERE (r0._rowid_ = 1 OR r0._rowid_ = 101) AND'
                    ' (r1._rowid_ = 1 OR r1._rowid_ = 101)').fetchall()
        bdb.execute('INFER mdcr_spnd_amblnc FROM hospitals_sub'
                    ' WHERE _rowid_ = 1 OR _rowid_ = 101').fetchall()
        sql = '''
            SELECT sql_rowid FROM bayesdb_crosscat_subsample
                WHERE generator_id = ?
                ORDER BY cc_row_id ASC
                LIMIT 100
        '''
        gid_full = bayesdb_get_generator(bdb, None, 'hosp_full_cc')
        cursor = bdb.sql_execute(sql, (gid_full, ))
        assert [row[0] for row in cursor] == range(1, 100 + 1)
        gid = bayesdb_get_generator(bdb, None, 'hosp_sub_cc')
        cursor = bdb.sql_execute(sql, (gid, ))
        assert [row[0] for row in cursor] != range(1, 100 + 1)
        bdb.execute('DROP GENERATOR hosp_sub_cc')
        bdb.execute('DROP GENERATOR hosp_full_cc')
        bdb.execute('DROP POPULATION hospitals_sub')
        bdb.execute('DROP POPULATION hospitals_full')
예제 #5
0
def test_simulate_drawconstraint():
    with bayeslite.bayesdb_open() as bdb:
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bayesdb_guess_population(
            bdb, 'hospital', 'dha', overrides=[('name', 'key')])
        bdb.execute(
            'CREATE METAMODEL hospital_cc FOR hospital USING crosscat()')
        bdb.execute('INITIALIZE 1 MODEL FOR hospital_cc')
        bdb.execute('ANALYZE hospital_cc FOR 1 ITERATION WAIT')
        samples = bdb.execute('''
            SIMULATE ttl_mdcr_spnd, n_death_ill FROM hospital
                GIVEN TTL_MDCR_SPND = 40000
                LIMIT 100
        ''').fetchall()
        assert [s[0] for s in samples] == [40000] * 100
예제 #6
0
    def dot_guess(self, line):
        '''guess population schema
        <population> <table>

        Create a population named <population> with variables
        corresponding to columns in table <table>, heuristically
        guessing their statistical types.
        '''
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 2:
            self.stdout.write('Usage: .guess <population> <table>\n')
            return
        population = tokens[0]
        table = tokens[1]
        try:
            guess.bayesdb_guess_population(self._bdb, population, table)
        except Exception:
            self.stdout.write(traceback.format_exc())
예제 #7
0
파일: core.py 프로젝트: probcomp/bayeslite
    def dot_guess(self, line):
        '''guess population schema
        <population> <table>

        Create a population named <population> with variables
        corresponding to columns in table <table>, heuristically
        guessing their statistical types.
        '''
        # XXX Lousy, lousy tokenizer.
        tokens = line.split()
        if len(tokens) != 2:
            self.stdout.write('Usage: .guess <population> <table>\n')
            return
        population = tokens[0]
        table = tokens[1]
        try:
            guess.bayesdb_guess_population(self._bdb, population, table)
        except Exception:
            self.stdout.write(traceback.format_exc())
예제 #8
0
def test_guess_population():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE t(x NUMERIC, y NUMERIC, z NUMERIC)')
        a_z = range(ord('a'), ord('z') + 1)
        aa_zz = ((c, d) for c in a_z for d in a_z)
        data = ((chr(c) + chr(d), (c + d) % 2, math.sqrt(c + d))
                for c, d in aa_zz)
        for row in data:
            bdb.sql_execute('INSERT INTO t (x, y, z) VALUES (?, ?, ?)', row)
        with pytest.raises(ValueError):
            # No modeled columns.  (x is key.)
            bayesdb_guess_population(bdb,
                                     'p',
                                     't',
                                     overrides=[('y', 'ignore'),
                                                ('z', 'ignore')])
        bayesdb_guess_population(bdb, 'p', 't')
        with pytest.raises(ValueError):
            # Population already exists.
            bayesdb_guess_population(bdb, 'p', 't')
        assert bdb.sql_execute(
            'SELECT * FROM bayesdb_variable').fetchall() == [
                (1, None, 1, 'y', 'nominal'),
                (1, None, 2, 'z', 'numerical'),
            ]
예제 #9
0
def test_guess_population():
    with bayeslite.bayesdb_open() as bdb:
        bdb.sql_execute('CREATE TABLE t(x NUMERIC, y NUMERIC, z NUMERIC)')
        a_z = range(ord('a'), ord('z') + 1)
        aa_zz = ((c, d) for c in a_z for d in a_z)
        data = ((chr(c) + chr(d), (c + d) % 2, math.sqrt(c + d))
            for c, d in aa_zz)
        for row in data:
            bdb.sql_execute('INSERT INTO t (x, y, z) VALUES (?, ?, ?)', row)
        with pytest.raises(ValueError):
            # No modeled columns.  (x is key.)
            bayesdb_guess_population(bdb, 'p', 't',
                overrides=[('y', 'ignore'), ('z', 'ignore')])
        bayesdb_guess_population(bdb, 'p', 't')
        with pytest.raises(ValueError):
            # Population already exists.
            bayesdb_guess_population(bdb, 'p', 't')
        assert bdb.sql_execute('SELECT * FROM bayesdb_variable').fetchall() == [
            (1, None, 1, 'y', 'nominal'),
            (1, None, 2, 'z', 'numerical'),
        ]
예제 #10
0
def test_subsample():
    with bayeslite.bayesdb_open(builtin_backends=False) as bdb:
        backend = CGPM_Backend(cgpm_registry={}, multiprocess=False)
        bayeslite.bayesdb_register_backend(bdb, backend)
        with open(dha_csv, 'rU') as f:
            read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True)
        bayesdb_guess_population(bdb,
                                 'hospitals_full',
                                 'dha',
                                 overrides=[('name', 'key')])
        bayesdb_guess_population(bdb,
                                 'hospitals_sub',
                                 'dha',
                                 overrides=[('name', 'key')])
        bdb.execute('''
            CREATE GENERATOR hosp_full_cc FOR hospitals_full USING cgpm;
        ''')
        bdb.execute('''
            CREATE GENERATOR hosp_sub_cc FOR hospitals_sub USING cgpm(
                SUBSAMPLE 100
            )
        ''')
        bdb.execute('INITIALIZE 1 MODEL FOR hosp_sub_cc')
        bdb.execute('ANALYZE hosp_sub_cc FOR 1 ITERATION (OPTIMIZED)')
        bdb.execute('''
            ESTIMATE SIMILARITY TO (_rowid_=2) IN THE CONTEXT OF PNEUM_SCORE
            FROM hospitals_sub WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE SIMILARITY TO (_rowid_=102) IN THE CONTEXT OF
            N_DEATH_ILL FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc
            FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        bdb.execute('''
            ESTIMATE SIMILARITY IN THE CONTEXT OF PNEUM_SCORE
            FROM PAIRWISE hospitals_sub
            WHERE (r0._rowid_ = 1 OR r0._rowid_ = 101) AND
            (r1._rowid_ = 1 OR r1._rowid_ = 101)
        ''').fetchall()
        bdb.execute('''
            INFER mdcr_spnd_amblnc FROM hospitals_sub
            WHERE _rowid_ = 1 OR _rowid_ = 101
        ''').fetchall()
        sql = '''
            SELECT table_rowid FROM bayesdb_cgpm_individual
                WHERE generator_id = ?
                ORDER BY cgpm_rowid ASC
                LIMIT 100
        '''
        gid_full = bayesdb_get_generator(bdb, None, 'hosp_full_cc')
        cursor = bdb.sql_execute(sql, (gid_full, ))
        assert [row[0] for row in cursor] == range(1, 100 + 1)
        gid = bayesdb_get_generator(bdb, None, 'hosp_sub_cc')
        cursor = bdb.sql_execute(sql, (gid, ))
        assert [row[0] for row in cursor] != range(1, 100 + 1)
        bdb.execute('DROP GENERATOR hosp_sub_cc')
        bdb.execute('DROP GENERATOR hosp_full_cc')
        bdb.execute('DROP POPULATION hospitals_sub')
        bdb.execute('DROP POPULATION hospitals_full')