Esempio n. 1
0
def test_register():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Check if globally registered.
    try:
        bdb.sql_execute('''
            SELECT * FROM bayesdb_metamodel WHERE name={}
        '''.format(quote(composer.name()))).next()
    except StopIteration:
        pytest.fail('Composer not registered in bayesdb_metamodel.')
    # Check all tables/triggers.
    schema = [('table', 'bayesdb_composer_cc_id'),
              ('table', 'bayesdb_composer_column_owner'),
              ('table', 'bayesdb_composer_column_toposort'),
              ('trigger', 'bayesdb_composer_column_toposort_check'),
              ('table', 'bayesdb_composer_column_parents'),
              ('table', 'bayesdb_composer_column_foreign_predictor'),
              ('trigger', 'bayesdb_composer_column_foreign_predictor_check')]
    for kind, name in schema:
        try:
            bdb.sql_execute('''
                SELECT * FROM sqlite_master WHERE type={} AND name={}
            '''.format(quote(kind), quote(name))).next()
        except StopIteration:
            pytest.fail('Missing from Composer schema: {}'.format(
                (kind, name)))
    bdb.close()
Esempio n. 2
0
def test_register():
    bdb = bayeslite.bayesdb_open()
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    # Check if globally registered.
    try:
        bdb.sql_execute('''
            SELECT * FROM bayesdb_metamodel WHERE name={}
        '''.format(quote(composer.name()))).next()
    except StopIteration:
        pytest.fail('Composer not registered in bayesdb_metamodel.')
    # Check all tables/triggers.
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('trigger', 'bayesdb_composer_column_toposort_check'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
        ('trigger', 'bayesdb_composer_column_foreign_predictor_check')
    ]
    for kind, name in schema:
        try:
            bdb.sql_execute('''
                SELECT * FROM sqlite_master WHERE type={} AND name={}
            '''.format(quote(kind), quote(name))).next()
        except StopIteration:
            pytest.fail('Missing from Composer schema: {}'.format((kind,name)))
    bdb.close()
Esempio n. 3
0
 def drop_generator(self, bdb, genid):
     with bdb.savepoint():
         # Clear caches.
         keys = [k for k in self._predictor_cache(bdb) if k[0] == genid]
         for k in keys:
             del self._predictor_cache(bdb)[k]
         # Obtain before losing references.
         cc_name = core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid))
         # Delete tables reverse order of insertion.
         bdb.sql_execute('''
             DELETE FROM bayesdb_composer_column_foreign_predictor
                 WHERE generator_id = ?
         ''', (genid,))
         bdb.sql_execute('''
             DELETE FROM bayesdb_composer_cc_id
                 WHERE generator_id = ?
         ''', (genid,))
         bdb.sql_execute('''
             DELETE FROM bayesdb_composer_column_toposort
                 WHERE generator_id = ?
         ''', (genid,))
         bdb.sql_execute('''
             DELETE FROM bayesdb_composer_column_parents
                 WHERE generator_id = ?
         ''', (genid,))
         bdb.sql_execute('''
             DELETE FROM bayesdb_composer_column_owner
                 WHERE generator_id = ?
         ''', (genid,))
         # Drop internal crosscat.
         bdb.execute('''
             DROP GENERATOR {}
         '''.format(quote(cc_name)))
Esempio n. 4
0
 def drop_models(self, bdb, genid, modelnos=None):
     qg = quote(core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid)))
     if modelnos is not None:
         models = ",".join(str(modelno) for modelno in modelnos)
         bql = 'DROP MODELS {} FROM {};'.format(models, qg)
     else:
         bql = 'DROP MODELS FROM {};'.format(qg)
     bdb.execute(bql)
Esempio n. 5
0
 def initialize_models(self, bdb, genid, modelnos, model_config):
     # Initialize internal crosscat, maintaining equality of model numbers.
     # The semantics of INITIALIZE are that it guarantees the existence
     # of a sequence of models up to the requested number of them,
     # and BayesDB computes the numbers that need to be filled in.
     # The inverse of that computation is max(modelnos)+1.
     qg = quote(core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid)))
     bql = 'INITIALIZE {} MODELS FOR {};'.format(max(modelnos)+1, qg)
     bdb.execute(bql)
     # Initialize the foriegn predictors.
     for fcol in self.fcols(bdb, genid):
         # Convert column numbers to names.
         targets = \
             [(core.bayesdb_generator_column_name(bdb, genid, fcol),
               core.bayesdb_generator_column_stattype(bdb, genid, fcol))]
         conditions = \
             [(core.bayesdb_generator_column_name(bdb, genid, pcol),
               core.bayesdb_generator_column_stattype(bdb, genid, pcol))
              for pcol in self.pcols(bdb, genid, fcol)]
         # Initialize the foreign predictor.
         table_name = core.bayesdb_generator_table(bdb, genid)
         predictor_name = self.predictor_name(bdb, genid, fcol)
         builder = self.predictor_builder[predictor_name]
         predictor = builder.create(bdb, table_name, targets, conditions)
         # Store in the database.
         with bdb.savepoint():
             sql = '''
                 UPDATE bayesdb_composer_column_foreign_predictor SET
                     predictor_binary = :predictor_binary
                     WHERE generator_id = :genid AND colno = :colno
             '''
             predictor_binary = builder.serialize(bdb, predictor)
             bdb.sql_execute(sql, {
                 'genid': genid,
                 'predictor_binary': sqlite3.Binary(predictor_binary),
                 'colno': fcol
             })
Esempio n. 6
0
def test_drop_generator():
    bdb = bayeslite.bayesdb_open()
    # Initialize the database
    bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV,
        header=True, create=True)
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    composer.register_foreign_predictor(random_forest.RandomForest)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg,
                    Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            DEPENDENT(Contractor, Country_of_Contractor),
            INDEPENDENT(Country_of_Operator, Date_of_Launch)
        );''')
    generator_id = bayeslite.core.bayesdb_get_generator(bdb, 't1')
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
    ]
    # Iterate through tables before dropping.
    for _, name in schema:
        bdb.sql_execute('''
            SELECT * FROM {} WHERE generator_id=?
        '''.format(quote(name)), (generator_id,)).next()
    # Drop generator and ensure table lookups with generator_id throw error.
    bdb.execute('DROP GENERATOR t1')
    for _, name in schema:
        with pytest.raises(StopIteration):
            bdb.sql_execute('''
                SELECT * FROM {} WHERE generator_id=?
            '''.format(quote(name)), (generator_id,)).next()
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1')
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1_cc')
    bdb.close()
Esempio n. 7
0
 def create_generator(self, bdb, table, schema, instantiate):
     # Parse the schema.
     (columns, lcols, _fcols, fcol_to_pcols, fcol_to_fpred,
         dependencies) = self.parse(schema)
     # Instantiate **this** generator.
     genid, bdbcolumns = instantiate(columns.items())
     # Create internal crosscat generator. The name will be the same as
     # this generator name, with a _cc suffix.
     SUFFIX = '_cc'
     cc_name = bayeslite.core.bayesdb_generator_name(bdb, genid) + SUFFIX
     # Create strings for crosscat schema.
     cc_cols = ','.join('{} {}'.format(quote(c), quote(columns[c]))
                        for c in lcols)
     cc_dep = []
     for dep, colnames in dependencies:
         qcns = ','.join(map(quote, colnames))
         if dep:
             cc_dep.append('DEPENDENT({})'.format(qcns))
         else:
             cc_dep.append('INDEPENDENT({})'.format(qcns))
     bql = """
         CREATE GENERATOR {} FOR {} USING crosscat(
             {}, {}
         );
     """.format(quote(cc_name), quote(table), cc_cols, ','.join(cc_dep))
     bdb.execute(bql)
     # Convert strings to column numbers.
     fcolno_to_pcolnos = {}
     for f in fcol_to_pcols:
         fcolno = core.bayesdb_generator_column_number(bdb, genid, f)
         fcolno_to_pcolnos[fcolno] = [core.bayesdb_generator_column_number(
             bdb, genid, col) for col in fcol_to_pcols[f]]
     with bdb.savepoint():
         # Save internal cc generator id.
         bdb.sql_execute('''
             INSERT INTO bayesdb_composer_cc_id
                 (generator_id, crosscat_generator_id) VALUES (?,?)
         ''', (genid, core.bayesdb_get_generator(bdb, cc_name),))
         # Save lcols/fcolnos.
         for colno, _, _ in bdbcolumns:
             local = colno not in fcolno_to_pcolnos
             bdb.sql_execute('''
                 INSERT INTO bayesdb_composer_column_owner
                     (generator_id, colno, local) VALUES (?,?,?)
             ''', (genid, colno, int(local),))
         # Save parents of foreign columns.
         for fcolno in fcolno_to_pcolnos:
             for pcolno in fcolno_to_pcolnos[fcolno]:
                 bdb.sql_execute('''
                     INSERT INTO bayesdb_composer_column_parents
                         (generator_id, fcolno, pcolno) VALUES (?,?,?)
                 ''', (genid, fcolno, pcolno,))
         # Save topological order.
         topo = self.topological_sort(fcolno_to_pcolnos)
         for position, (colno, _) in enumerate(topo):
             bdb.sql_execute('''
                 INSERT INTO bayesdb_composer_column_toposort
                     (generator_id, colno, position) VALUES (?,?,?)
                 ''', (genid, colno, position,))
         # Save predictor names of foreign columns.
         for fcolno in fcolno_to_pcolnos:
             fp_name = fcol_to_fpred[casefold(
                 core.bayesdb_generator_column_name(bdb,genid, fcolno))]
             bdb.sql_execute('''
                 INSERT INTO bayesdb_composer_column_foreign_predictor
                     (generator_id, colno, predictor_name) VALUES (?,?,?)
             ''', (genid, fcolno, casefold(fp_name)))
Esempio n. 8
0
def test_drop_generator():
    bdb = bayeslite.bayesdb_open()
    # Initialize the database
    bayeslite.bayesdb_read_csv_file(bdb,
                                    'satellites',
                                    PATH_SATELLITES_CSV,
                                    header=True,
                                    create=True)
    composer = Composer(n_samples=5)
    bayeslite.bayesdb_register_metamodel(bdb, composer)
    composer.register_foreign_predictor(random_forest.RandomForest)
    composer.register_foreign_predictor(multiple_regression.MultipleRegression)
    composer.register_foreign_predictor(keplers_law.KeplersLaw)
    bdb.execute('''
        CREATE GENERATOR t1 FOR satellites USING composer(
            default (
                Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL,
                Users CATEGORICAL, Purpose CATEGORICAL,
                Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL,
                Apogee_km NUMERICAL, Eccentricity NUMERICAL,
                Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL,
                Power_watts NUMERICAL, Date_of_Launch NUMERICAL,
                Contractor CATEGORICAL,
                Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL,
                Launch_Vehicle CATEGORICAL,
                Source_Used_for_Orbital_Data CATEGORICAL,
                longitude_radians_of_geo NUMERICAL,
                Inclination_radians NUMERICAL,
            ),
            random_forest (
                Type_of_Orbit CATEGORICAL
                    GIVEN Apogee_km, Perigee_km,
                        Eccentricity, Period_minutes, Launch_Mass_kg,
                        Power_watts, Anticipated_Lifetime, Class_of_orbit
            ),
            keplers_law (
                Period_minutes NUMERICAL
                    GIVEN Perigee_km, Apogee_km
            ),
            multiple_regression (
                Anticipated_Lifetime NUMERICAL
                    GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg,
                    Contractor
            ),
            DEPENDENT(Apogee_km, Perigee_km, Eccentricity),
            DEPENDENT(Contractor, Country_of_Contractor),
            INDEPENDENT(Country_of_Operator, Date_of_Launch)
        );''')
    generator_id = bayeslite.core.bayesdb_get_generator(bdb, 't1')
    schema = [
        ('table', 'bayesdb_composer_cc_id'),
        ('table', 'bayesdb_composer_column_owner'),
        ('table', 'bayesdb_composer_column_toposort'),
        ('table', 'bayesdb_composer_column_parents'),
        ('table', 'bayesdb_composer_column_foreign_predictor'),
    ]
    # Iterate through tables before dropping.
    for _, name in schema:
        bdb.sql_execute(
            '''
            SELECT * FROM {} WHERE generator_id=?
        '''.format(quote(name)), (generator_id, )).next()
    # Drop generator and ensure table lookups with generator_id throw error.
    bdb.execute('DROP GENERATOR t1')
    for _, name in schema:
        with pytest.raises(StopIteration):
            bdb.sql_execute(
                '''
                SELECT * FROM {} WHERE generator_id=?
            '''.format(quote(name)), (generator_id, )).next()
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1')
    assert not bayeslite.core.bayesdb_has_generator(bdb, 't1_cc')
    bdb.close()