def test_register(): bdb = bayeslite.bayesdb_open() composer = Composer(n_samples=5) bayeslite.bayesdb_register_metamodel(bdb, composer) # Check if globally registered. try: bdb.sql_execute(''' SELECT * FROM bayesdb_metamodel WHERE name={} '''.format(quote(composer.name()))).next() except StopIteration: pytest.fail('Composer not registered in bayesdb_metamodel.') # Check all tables/triggers. schema = [('table', 'bayesdb_composer_cc_id'), ('table', 'bayesdb_composer_column_owner'), ('table', 'bayesdb_composer_column_toposort'), ('trigger', 'bayesdb_composer_column_toposort_check'), ('table', 'bayesdb_composer_column_parents'), ('table', 'bayesdb_composer_column_foreign_predictor'), ('trigger', 'bayesdb_composer_column_foreign_predictor_check')] for kind, name in schema: try: bdb.sql_execute(''' SELECT * FROM sqlite_master WHERE type={} AND name={} '''.format(quote(kind), quote(name))).next() except StopIteration: pytest.fail('Missing from Composer schema: {}'.format( (kind, name))) bdb.close()
def test_register(): bdb = bayeslite.bayesdb_open() composer = Composer(n_samples=5) bayeslite.bayesdb_register_metamodel(bdb, composer) # Check if globally registered. try: bdb.sql_execute(''' SELECT * FROM bayesdb_metamodel WHERE name={} '''.format(quote(composer.name()))).next() except StopIteration: pytest.fail('Composer not registered in bayesdb_metamodel.') # Check all tables/triggers. schema = [ ('table', 'bayesdb_composer_cc_id'), ('table', 'bayesdb_composer_column_owner'), ('table', 'bayesdb_composer_column_toposort'), ('trigger', 'bayesdb_composer_column_toposort_check'), ('table', 'bayesdb_composer_column_parents'), ('table', 'bayesdb_composer_column_foreign_predictor'), ('trigger', 'bayesdb_composer_column_foreign_predictor_check') ] for kind, name in schema: try: bdb.sql_execute(''' SELECT * FROM sqlite_master WHERE type={} AND name={} '''.format(quote(kind), quote(name))).next() except StopIteration: pytest.fail('Missing from Composer schema: {}'.format((kind,name))) bdb.close()
def drop_generator(self, bdb, genid): with bdb.savepoint(): # Clear caches. keys = [k for k in self._predictor_cache(bdb) if k[0] == genid] for k in keys: del self._predictor_cache(bdb)[k] # Obtain before losing references. cc_name = core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid)) # Delete tables reverse order of insertion. bdb.sql_execute(''' DELETE FROM bayesdb_composer_column_foreign_predictor WHERE generator_id = ? ''', (genid,)) bdb.sql_execute(''' DELETE FROM bayesdb_composer_cc_id WHERE generator_id = ? ''', (genid,)) bdb.sql_execute(''' DELETE FROM bayesdb_composer_column_toposort WHERE generator_id = ? ''', (genid,)) bdb.sql_execute(''' DELETE FROM bayesdb_composer_column_parents WHERE generator_id = ? ''', (genid,)) bdb.sql_execute(''' DELETE FROM bayesdb_composer_column_owner WHERE generator_id = ? ''', (genid,)) # Drop internal crosscat. bdb.execute(''' DROP GENERATOR {} '''.format(quote(cc_name)))
def drop_models(self, bdb, genid, modelnos=None): qg = quote(core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid))) if modelnos is not None: models = ",".join(str(modelno) for modelno in modelnos) bql = 'DROP MODELS {} FROM {};'.format(models, qg) else: bql = 'DROP MODELS FROM {};'.format(qg) bdb.execute(bql)
def initialize_models(self, bdb, genid, modelnos, model_config): # Initialize internal crosscat, maintaining equality of model numbers. # The semantics of INITIALIZE are that it guarantees the existence # of a sequence of models up to the requested number of them, # and BayesDB computes the numbers that need to be filled in. # The inverse of that computation is max(modelnos)+1. qg = quote(core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid))) bql = 'INITIALIZE {} MODELS FOR {};'.format(max(modelnos)+1, qg) bdb.execute(bql) # Initialize the foriegn predictors. for fcol in self.fcols(bdb, genid): # Convert column numbers to names. targets = \ [(core.bayesdb_generator_column_name(bdb, genid, fcol), core.bayesdb_generator_column_stattype(bdb, genid, fcol))] conditions = \ [(core.bayesdb_generator_column_name(bdb, genid, pcol), core.bayesdb_generator_column_stattype(bdb, genid, pcol)) for pcol in self.pcols(bdb, genid, fcol)] # Initialize the foreign predictor. table_name = core.bayesdb_generator_table(bdb, genid) predictor_name = self.predictor_name(bdb, genid, fcol) builder = self.predictor_builder[predictor_name] predictor = builder.create(bdb, table_name, targets, conditions) # Store in the database. with bdb.savepoint(): sql = ''' UPDATE bayesdb_composer_column_foreign_predictor SET predictor_binary = :predictor_binary WHERE generator_id = :genid AND colno = :colno ''' predictor_binary = builder.serialize(bdb, predictor) bdb.sql_execute(sql, { 'genid': genid, 'predictor_binary': sqlite3.Binary(predictor_binary), 'colno': fcol })
def test_drop_generator(): bdb = bayeslite.bayesdb_open() # Initialize the database bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV, header=True, create=True) composer = Composer(n_samples=5) bayeslite.bayesdb_register_metamodel(bdb, composer) composer.register_foreign_predictor(random_forest.RandomForest) composer.register_foreign_predictor(multiple_regression.MultipleRegression) composer.register_foreign_predictor(keplers_law.KeplersLaw) bdb.execute(''' CREATE GENERATOR t1 FOR satellites USING composer( default ( Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL, Users CATEGORICAL, Purpose CATEGORICAL, Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL, Apogee_km NUMERICAL, Eccentricity NUMERICAL, Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL, Power_watts NUMERICAL, Date_of_Launch NUMERICAL, Contractor CATEGORICAL, Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL, Launch_Vehicle CATEGORICAL, Source_Used_for_Orbital_Data CATEGORICAL, longitude_radians_of_geo NUMERICAL, Inclination_radians NUMERICAL, ), random_forest ( Type_of_Orbit CATEGORICAL GIVEN Apogee_km, Perigee_km, Eccentricity, Period_minutes, Launch_Mass_kg, Power_watts, Anticipated_Lifetime, Class_of_orbit ), keplers_law ( Period_minutes NUMERICAL GIVEN Perigee_km, Apogee_km ), multiple_regression ( Anticipated_Lifetime NUMERICAL GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg, Contractor ), DEPENDENT(Apogee_km, Perigee_km, Eccentricity), DEPENDENT(Contractor, Country_of_Contractor), INDEPENDENT(Country_of_Operator, Date_of_Launch) );''') generator_id = bayeslite.core.bayesdb_get_generator(bdb, 't1') schema = [ ('table', 'bayesdb_composer_cc_id'), ('table', 'bayesdb_composer_column_owner'), ('table', 'bayesdb_composer_column_toposort'), ('table', 'bayesdb_composer_column_parents'), ('table', 'bayesdb_composer_column_foreign_predictor'), ] # Iterate through tables before dropping. for _, name in schema: bdb.sql_execute(''' SELECT * FROM {} WHERE generator_id=? '''.format(quote(name)), (generator_id,)).next() # Drop generator and ensure table lookups with generator_id throw error. bdb.execute('DROP GENERATOR t1') for _, name in schema: with pytest.raises(StopIteration): bdb.sql_execute(''' SELECT * FROM {} WHERE generator_id=? '''.format(quote(name)), (generator_id,)).next() assert not bayeslite.core.bayesdb_has_generator(bdb, 't1') assert not bayeslite.core.bayesdb_has_generator(bdb, 't1_cc') bdb.close()
def create_generator(self, bdb, table, schema, instantiate): # Parse the schema. (columns, lcols, _fcols, fcol_to_pcols, fcol_to_fpred, dependencies) = self.parse(schema) # Instantiate **this** generator. genid, bdbcolumns = instantiate(columns.items()) # Create internal crosscat generator. The name will be the same as # this generator name, with a _cc suffix. SUFFIX = '_cc' cc_name = bayeslite.core.bayesdb_generator_name(bdb, genid) + SUFFIX # Create strings for crosscat schema. cc_cols = ','.join('{} {}'.format(quote(c), quote(columns[c])) for c in lcols) cc_dep = [] for dep, colnames in dependencies: qcns = ','.join(map(quote, colnames)) if dep: cc_dep.append('DEPENDENT({})'.format(qcns)) else: cc_dep.append('INDEPENDENT({})'.format(qcns)) bql = """ CREATE GENERATOR {} FOR {} USING crosscat( {}, {} ); """.format(quote(cc_name), quote(table), cc_cols, ','.join(cc_dep)) bdb.execute(bql) # Convert strings to column numbers. fcolno_to_pcolnos = {} for f in fcol_to_pcols: fcolno = core.bayesdb_generator_column_number(bdb, genid, f) fcolno_to_pcolnos[fcolno] = [core.bayesdb_generator_column_number( bdb, genid, col) for col in fcol_to_pcols[f]] with bdb.savepoint(): # Save internal cc generator id. bdb.sql_execute(''' INSERT INTO bayesdb_composer_cc_id (generator_id, crosscat_generator_id) VALUES (?,?) ''', (genid, core.bayesdb_get_generator(bdb, cc_name),)) # Save lcols/fcolnos. for colno, _, _ in bdbcolumns: local = colno not in fcolno_to_pcolnos bdb.sql_execute(''' INSERT INTO bayesdb_composer_column_owner (generator_id, colno, local) VALUES (?,?,?) ''', (genid, colno, int(local),)) # Save parents of foreign columns. for fcolno in fcolno_to_pcolnos: for pcolno in fcolno_to_pcolnos[fcolno]: bdb.sql_execute(''' INSERT INTO bayesdb_composer_column_parents (generator_id, fcolno, pcolno) VALUES (?,?,?) ''', (genid, fcolno, pcolno,)) # Save topological order. topo = self.topological_sort(fcolno_to_pcolnos) for position, (colno, _) in enumerate(topo): bdb.sql_execute(''' INSERT INTO bayesdb_composer_column_toposort (generator_id, colno, position) VALUES (?,?,?) ''', (genid, colno, position,)) # Save predictor names of foreign columns. for fcolno in fcolno_to_pcolnos: fp_name = fcol_to_fpred[casefold( core.bayesdb_generator_column_name(bdb,genid, fcolno))] bdb.sql_execute(''' INSERT INTO bayesdb_composer_column_foreign_predictor (generator_id, colno, predictor_name) VALUES (?,?,?) ''', (genid, fcolno, casefold(fp_name)))
def test_drop_generator(): bdb = bayeslite.bayesdb_open() # Initialize the database bayeslite.bayesdb_read_csv_file(bdb, 'satellites', PATH_SATELLITES_CSV, header=True, create=True) composer = Composer(n_samples=5) bayeslite.bayesdb_register_metamodel(bdb, composer) composer.register_foreign_predictor(random_forest.RandomForest) composer.register_foreign_predictor(multiple_regression.MultipleRegression) composer.register_foreign_predictor(keplers_law.KeplersLaw) bdb.execute(''' CREATE GENERATOR t1 FOR satellites USING composer( default ( Country_of_Operator CATEGORICAL, Operator_Owner CATEGORICAL, Users CATEGORICAL, Purpose CATEGORICAL, Class_of_orbit CATEGORICAL, Perigee_km NUMERICAL, Apogee_km NUMERICAL, Eccentricity NUMERICAL, Launch_Mass_kg NUMERICAL, Dry_Mass_kg NUMERICAL, Power_watts NUMERICAL, Date_of_Launch NUMERICAL, Contractor CATEGORICAL, Country_of_Contractor CATEGORICAL, Launch_Site CATEGORICAL, Launch_Vehicle CATEGORICAL, Source_Used_for_Orbital_Data CATEGORICAL, longitude_radians_of_geo NUMERICAL, Inclination_radians NUMERICAL, ), random_forest ( Type_of_Orbit CATEGORICAL GIVEN Apogee_km, Perigee_km, Eccentricity, Period_minutes, Launch_Mass_kg, Power_watts, Anticipated_Lifetime, Class_of_orbit ), keplers_law ( Period_minutes NUMERICAL GIVEN Perigee_km, Apogee_km ), multiple_regression ( Anticipated_Lifetime NUMERICAL GIVEN Dry_Mass_kg, Power_watts, Launch_Mass_kg, Contractor ), DEPENDENT(Apogee_km, Perigee_km, Eccentricity), DEPENDENT(Contractor, Country_of_Contractor), INDEPENDENT(Country_of_Operator, Date_of_Launch) );''') generator_id = bayeslite.core.bayesdb_get_generator(bdb, 't1') schema = [ ('table', 'bayesdb_composer_cc_id'), ('table', 'bayesdb_composer_column_owner'), ('table', 'bayesdb_composer_column_toposort'), ('table', 'bayesdb_composer_column_parents'), ('table', 'bayesdb_composer_column_foreign_predictor'), ] # Iterate through tables before dropping. for _, name in schema: bdb.sql_execute( ''' SELECT * FROM {} WHERE generator_id=? '''.format(quote(name)), (generator_id, )).next() # Drop generator and ensure table lookups with generator_id throw error. bdb.execute('DROP GENERATOR t1') for _, name in schema: with pytest.raises(StopIteration): bdb.sql_execute( ''' SELECT * FROM {} WHERE generator_id=? '''.format(quote(name)), (generator_id, )).next() assert not bayeslite.core.bayesdb_has_generator(bdb, 't1') assert not bayeslite.core.bayesdb_has_generator(bdb, 't1_cc') bdb.close()