def _retest_example(bdb, exname): mm, t, t_sql, data_sql, data, g, g_bql, g_bqlbad0, g_bqlbad1 = examples[exname] qt = bql_quote_name(t) qg = bql_quote_name(g) bayeslite.bayesdb_register_metamodel(bdb, mm()) assert core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_generator(bdb, g) gid = core.bayesdb_get_generator(bdb, g) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert core.bayesdb_generator_has_model(bdb, gid, 1) bdb.execute("ANALYZE %s FOR 1 ITERATION WAIT" % (qg,)) bdb.execute("ANALYZE %s MODEL 0 FOR 1 ITERATION WAIT" % (qg,)) bdb.execute("ANALYZE %s MODEL 1 FOR 1 ITERATION WAIT" % (qg,))
def _retest_example(bdb, exname): mm, t, t_sql, data_sql, data, g, g_bql, g_bqlbad0, g_bqlbad1 = \ examples[exname] qt = bql_quote_name(t) qg = bql_quote_name(g) bayeslite.bayesdb_register_metamodel(bdb, mm()) assert core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_generator(bdb, g) gid = core.bayesdb_get_generator(bdb, g) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert core.bayesdb_generator_has_model(bdb, gid, 1) bdb.execute('ANALYZE %s FOR 1 ITERATION WAIT' % (qg, )) bdb.execute('ANALYZE %s MODEL 0 FOR 1 ITERATION WAIT' % (qg, )) bdb.execute('ANALYZE %s MODEL 1 FOR 1 ITERATION WAIT' % (qg, ))
def _retest_example(bdb, exname): (be, t, t_sql, data_sql, data, p, g, p_bql, g_bql, g_bqlbad0, g_bqlbad1, cleanup) = examples[exname] qg = bql_quote_name(g) backend = be() bayeslite.bayesdb_register_backend(bdb, backend) p_id = core.bayesdb_get_population(bdb, p) assert core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_generator(bdb, p_id, g) gid = core.bayesdb_get_generator(bdb, p_id, g) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert core.bayesdb_generator_has_model(bdb, gid, 1) bdb.execute('ANALYZE %s FOR 1 ITERATION' % (qg,)) try: # Test analyzing models. bdb.execute('ANALYZE %s MODEL 0 FOR 1 ITERATION' % (qg,)) bdb.execute('ANALYZE %s MODEL 1 FOR 1 ITERATION' % (qg,)) except bayeslite.BQLError, e: # loom does not allow model numbers to be specified in analyze models assert exname == 'loom'
def _retest_example(bdb, exname): (mm, t, t_sql, data_sql, data, p, g, p_bql, g_bql, g_bqlbad0, g_bqlbad1, cleanup) = examples[exname] qg = bql_quote_name(g) metamodel = mm() bayeslite.bayesdb_register_backend(bdb, mm()) p_id = core.bayesdb_get_population(bdb, p) assert core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_generator(bdb, p_id, g) gid = core.bayesdb_get_generator(bdb, p_id, g) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert core.bayesdb_generator_has_model(bdb, gid, 1) bdb.execute('ANALYZE %s FOR 1 ITERATION' % (qg, )) try: # Test analyzing models. bdb.execute('ANALYZE %s MODEL 0 FOR 1 ITERATION' % (qg, )) bdb.execute('ANALYZE %s MODEL 1 FOR 1 ITERATION' % (qg, )) except bayeslite.BQLError, e: # loom does not allow model numbers to be specified in analyze models assert exname == 'loom'
def _test_example(bdb, exname): (mm, t, t_sql, data_sql, data, p, g, p_bql, g_bql, g_bqlbad0, g_bqlbad1, cleanup) = examples[exname] qt = bql_quote_name(t) qg = bql_quote_name(g) metamodel = mm() bayeslite.bayesdb_register_backend(bdb, metamodel) # Create a table. assert not core.bayesdb_has_table(bdb, t) with bdb.savepoint_rollback(): bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) assert not core.bayesdb_has_table(bdb, t) bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) # Insert data into the table. assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt, )).fetchvalue() == 0 for row in data: bdb.sql_execute(data_sql, row) n = len(data) assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt, )).fetchvalue() == n # Create a population. assert not core.bayesdb_has_population(bdb, p) bdb.execute(p_bql) p_id = core.bayesdb_get_population(bdb, p) # Create a generator. Make sure savepoints work for this. assert not core.bayesdb_has_generator(bdb, p_id, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad0) assert not core.bayesdb_has_generator(bdb, p_id, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad1) assert not core.bayesdb_has_generator(bdb, p_id, g) with bdb.savepoint_rollback(): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, p_id, g) assert not core.bayesdb_has_generator(bdb, p_id, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, p_id, g) assert not core.bayesdb_has_generator(bdb, p_id + 1, g) with pytest.raises(Exception): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, p_id, g) gid = core.bayesdb_get_generator(bdb, p_id, g) assert not core.bayesdb_generator_has_model(bdb, gid, 0) assert [] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 1 MODEL FOR %s' % (qg, )) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 10 MODELS FOR %s' % (qg, )) for i in range(10): assert core.bayesdb_generator_has_model(bdb, gid, i) assert range(10) == core.bayesdb_generator_modelnos(bdb, gid) bdb.execute('INITIALIZE 2 MODELS FOR %s' % (qg, )) # Test dropping things. with pytest.raises(bayeslite.BQLError): bdb.execute('DROP TABLE %s' % (qt, )) with bdb.savepoint_rollback(): # Note that sql_execute does not protect us! bdb.sql_execute('DROP TABLE %s' % (qt, )) assert not core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_table(bdb, t) # XXX Should we reject dropping a generator when there remain # models? Should we not reject dropping a table when there remain # generators? A table can be dropped when there remain indices. # # with pytest.raises(bayeslite.BQLError): # # Models remain. # bdb.execute('DROP GENERATOR %s' % (qg,)) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg, )) assert not core.bayesdb_has_generator(bdb, None, g) assert core.bayesdb_has_generator(bdb, p_id, g) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg, )) assert not core.bayesdb_has_generator(bdb, None, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, None, g) assert core.bayesdb_has_generator(bdb, p_id, g) assert core.bayesdb_has_generator(bdb, None, g) assert gid == core.bayesdb_get_generator(bdb, p_id, g) # Test dropping models. with bdb.savepoint_rollback(): try: bdb.execute('DROP MODEL 1 FROM %s' % (qg, )) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert not core.bayesdb_generator_has_model(bdb, gid, 1) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) except bayeslite.BQLError, e: # loom does not allow model numbers to be specified in drop models assert exname == 'loom'
def _test_example(bdb, exname): (be, t, t_sql, data_sql, data, p, g, p_bql, g_bql, g_bqlbad0, g_bqlbad1, cleanup) = examples[exname] qt = bql_quote_name(t) qg = bql_quote_name(g) backend = be() bayeslite.bayesdb_register_backend(bdb, backend) # Create a table. assert not core.bayesdb_has_table(bdb, t) with bdb.savepoint_rollback(): bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) assert not core.bayesdb_has_table(bdb, t) bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) # Insert data into the table. assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt,)).fetchvalue() == 0 for row in data: bdb.sql_execute(data_sql, row) n = len(data) assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt,)).fetchvalue() == n # Create a population. assert not core.bayesdb_has_population(bdb, p) bdb.execute(p_bql) p_id = core.bayesdb_get_population(bdb, p) # Create a generator. Make sure savepoints work for this. assert not core.bayesdb_has_generator(bdb, p_id, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad0) assert not core.bayesdb_has_generator(bdb, p_id, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad1) assert not core.bayesdb_has_generator(bdb, p_id, g) with bdb.savepoint_rollback(): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, p_id, g) assert not core.bayesdb_has_generator(bdb, p_id, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, p_id, g) assert not core.bayesdb_has_generator(bdb, p_id+1, g) with pytest.raises(Exception): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, p_id, g) gid = core.bayesdb_get_generator(bdb, p_id, g) assert not core.bayesdb_generator_has_model(bdb, gid, 0) assert [] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 1 MODEL FOR %s' % (qg,)) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 10 MODELS FOR %s' % (qg,)) for i in range(10): assert core.bayesdb_generator_has_model(bdb, gid, i) assert range(10) == core.bayesdb_generator_modelnos(bdb, gid) bdb.execute('INITIALIZE 2 MODELS FOR %s' % (qg,)) # Test dropping things. with pytest.raises(bayeslite.BQLError): bdb.execute('DROP TABLE %s' % (qt,)) with bdb.savepoint_rollback(): # Note that sql_execute does not protect us! bdb.sql_execute('DROP TABLE %s' % (qt,)) assert not core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_table(bdb, t) # XXX Should we reject dropping a generator when there remain # models? Should we not reject dropping a table when there remain # generators? A table can be dropped when there remain indices. # # with pytest.raises(bayeslite.BQLError): # # Models remain. # bdb.execute('DROP GENERATOR %s' % (qg,)) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg,)) assert not core.bayesdb_has_generator(bdb, None, g) assert core.bayesdb_has_generator(bdb, p_id, g) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg,)) assert not core.bayesdb_has_generator(bdb, None, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, None, g) assert core.bayesdb_has_generator(bdb, p_id, g) assert core.bayesdb_has_generator(bdb, None, g) assert gid == core.bayesdb_get_generator(bdb, p_id, g) # Test dropping models. with bdb.savepoint_rollback(): try: bdb.execute('DROP MODEL 1 FROM %s' % (qg,)) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert not core.bayesdb_generator_has_model(bdb, gid, 1) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) except bayeslite.BQLError, e: # loom does not allow model numbers to be specified in drop models assert exname == 'loom'
def execute_phrase(bdb, phrase, bindings=()): """Execute the BQL AST phrase `phrase` and return a cursor of results.""" if isinstance(phrase, ast.Parametrized): n_numpar = phrase.n_numpar nampar_map = phrase.nampar_map phrase = phrase.phrase assert 0 < n_numpar else: n_numpar = 0 nampar_map = None # Ignore extraneous bindings. XXX Bad idea? if ast.is_query(phrase): # Compile the query in the transaction in case we need to # execute subqueries to determine column lists. Compiling is # a quick tree descent, so this should be fast. out = compiler.Output(n_numpar, nampar_map, bindings) with bdb.savepoint(): compiler.compile_query(bdb, phrase, out) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.Begin): txn.bayesdb_begin_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Rollback): txn.bayesdb_rollback_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Commit): txn.bayesdb_commit_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabAs): assert ast.is_query(phrase.query) with bdb.savepoint(): out = compiler.Output(n_numpar, nampar_map, bindings) qt = sqlite3_quote_name(phrase.name) temp = "TEMP " if phrase.temp else "" ifnotexists = "IF NOT EXISTS " if phrase.ifnotexists else "" out.write("CREATE %sTABLE %s%s AS " % (temp, ifnotexists, qt)) compiler.compile_query(bdb, phrase.query, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): bdb.sql_execute(out.getvalue(), out.getbindings()) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabSim): assert isinstance(phrase.simulation, ast.Simulate) with bdb.savepoint(): if core.bayesdb_has_generator(bdb, phrase.name): raise BQLError(bdb, "Name already defined as generator: %s" % (repr(phrase.name),)) if core.bayesdb_has_table(bdb, phrase.name): raise BQLError(bdb, "Name already defined as table: %s" % (repr(phrase.name),)) if not core.bayesdb_has_generator_default(bdb, phrase.simulation.generator): raise BQLError(bdb, "No such generator: %s" % (phrase.simulation.generator,)) generator_id = core.bayesdb_get_generator_default(bdb, phrase.simulation.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) table = core.bayesdb_generator_table(bdb, generator_id) qn = sqlite3_quote_name(phrase.name) qt = sqlite3_quote_name(table) qgn = sqlite3_quote_name(phrase.simulation.generator) column_names = phrase.simulation.columns qcns = map(sqlite3_quote_name, column_names) cursor = bdb.sql_execute("PRAGMA table_info(%s)" % (qt,)) column_sqltypes = {} for _colno, name, sqltype, _nonnull, _default, _primary in cursor: assert casefold(name) not in column_sqltypes column_sqltypes[casefold(name)] = sqltype assert 0 < len(column_sqltypes) for column_name in column_names: if casefold(column_name) not in column_sqltypes: raise BQLError( bdb, "No such column" " in generator %s table %s: %s" % (repr(phrase.simulation.generator), repr(table), repr(column_name)), ) for column_name, _expression in phrase.simulation.constraints: if casefold(column_name) not in column_sqltypes: raise BQLError( bdb, "No such column" " in generator %s table %s: %s" % (repr(phrase.simulation.generator), repr(table), repr(column_name)), ) # XXX Move to compiler.py. # XXX Copypasta of this in compile_simulate! out = compiler.Output(n_numpar, nampar_map, bindings) out.write("SELECT ") with compiler.compiling_paren(bdb, out, "CAST(", " AS INTEGER)"): compiler.compile_nobql_expression(bdb, phrase.simulation.nsamples, out) out.write(", ") with compiler.compiling_paren(bdb, out, "CAST(", " AS INTEGER)"): compiler.compile_nobql_expression(bdb, phrase.simulation.modelno, out) for _column_name, expression in phrase.simulation.constraints: out.write(", ") compiler.compile_nobql_expression(bdb, expression, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): cursor = bdb.sql_execute(out.getvalue(), out.getbindings()).fetchall() assert len(cursor) == 1 nsamples = cursor[0][0] assert isinstance(nsamples, int) modelno = cursor[0][1] assert modelno is None or isinstance(modelno, int) constraints = [ (core.bayesdb_generator_column_number(bdb, generator_id, name), value) for (name, _expression), value in zip(phrase.simulation.constraints, cursor[0][2:]) ] colnos = [core.bayesdb_generator_column_number(bdb, generator_id, name) for name in column_names] bdb.sql_execute( "CREATE %sTABLE %s%s (%s)" % ( "TEMP " if phrase.temp else "", "IF NOT EXISTS " if phrase.ifnotexists else "", qn, ",".join( "%s %s" % (qcn, column_sqltypes[casefold(column_name)]) for qcn, column_name in zip(qcns, column_names) ), ) ) insert_sql = """ INSERT INTO %s (%s) VALUES (%s) """ % ( qn, ",".join(qcns), ",".join("?" for qcn in qcns), ) for row in bqlfn.bayesdb_simulate( bdb, generator_id, constraints, colnos, modelno=modelno, numpredictions=nsamples ): bdb.sql_execute(insert_sql, row) return empty_cursor(bdb) if isinstance(phrase, ast.DropTab): with bdb.savepoint(): sql = "SELECT COUNT(*) FROM bayesdb_generator WHERE tabname = ?" cursor = bdb.sql_execute(sql, (phrase.name,)) if 0 < cursor_value(cursor): # XXX Automatically delete the generators? Generators # are more interesting than triggers and indices, so # automatic deletion is not obviously right. raise BQLError(bdb, "Table still in use by generators: %s" % (repr(phrase.name),)) bdb.sql_execute("DELETE FROM bayesdb_column WHERE tabname = ?", (phrase.name,)) ifexists = "IF EXISTS " if phrase.ifexists else "" qt = sqlite3_quote_name(phrase.name) return bdb.sql_execute("DROP TABLE %s%s" % (ifexists, qt)) if isinstance(phrase, ast.AlterTab): with bdb.savepoint(): table = phrase.table if not core.bayesdb_has_table(bdb, table): raise BQLError(bdb, "No such table: %s" % (repr(table),)) for cmd in phrase.commands: if isinstance(cmd, ast.AlterTabRenameTab): # If the names differ only in case, we have to do # some extra work because SQLite will reject the # table rename. Note that we may even have table # == cmd.name here, but if the stored table name # differs in case from cmd.name, we want to update # it anyway. if casefold(table) == casefold(cmd.name): # Go via a temporary table. temp = table + "_temp" while core.bayesdb_has_table(bdb, temp) or core.bayesdb_has_generator(bdb, temp): temp += "_temp" rename_table(bdb, table, temp) rename_table(bdb, temp, cmd.name) else: # Make sure nothing else has this name and # rename it. if core.bayesdb_has_table(bdb, cmd.name): raise BQLError(bdb, "Name already defined as table" ": %s" % (repr(cmd.name),)) if core.bayesdb_has_generator(bdb, cmd.name): raise BQLError(bdb, "Name already defined" " as generator: %s" % (repr(cmd.name),)) rename_table(bdb, table, cmd.name) # Remember the new name for subsequent commands. table = cmd.name elif isinstance(cmd, ast.AlterTabRenameCol): # XXX Need to deal with this in the compiler. raise NotImplementedError("Renaming columns" " not yet implemented.") # Make sure the old name exist and the new name does not. old_folded = casefold(cmd.old) new_folded = casefold(cmd.new) if old_folded != new_folded: if not core.bayesdb_table_has_column(bdb, table, cmd.old): raise BQLError(bdb, "No such column in table %s" ": %s" % (repr(table), repr(cmd.old))) if core.bayesdb_table_has_column(bdb, table, cmd.new): raise BQLError( bdb, "Column already exists" " in table %s: %s" % (repr(table), repr(cmd.new)) ) # Update bayesdb_column. Everything else refers # to columns by (tabname, colno) pairs rather than # by names. update_column_sql = """ UPDATE bayesdb_column SET name = :new WHERE tabname = :table AND name = :old """ total_changes = bdb.sqlite3.total_changes bdb.sql_execute(update_column_sql, {"table": table, "old": cmd.old, "new": cmd.new}) assert bdb.sqlite3.total_changes - total_changes == 1 # ...except metamodels may have the (case-folded) # name cached. if old_folded != new_folded: generators_sql = """ SELECT id FROM bayesdb_generator WHERE tabname = ? """ cursor = bdb.sql_execute(generators_sql, (table,)) for (generator_id,) in cursor: metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) metamodel.rename_column(bdb, generator_id, old_folded, new_folded) elif isinstance(cmd, ast.AlterTabSetDefGen): if not core.bayesdb_has_generator(bdb, cmd.generator): raise BQLError(bdb, "No such generator: %s" % (repr(cmd.generator),)) generator_id = core.bayesdb_get_generator(bdb, cmd.generator) unset_default_sql = """ UPDATE bayesdb_generator SET defaultp = 0 WHERE tabname = ? AND defaultp """ total_changes = bdb.sqlite3.total_changes bdb.sql_execute(unset_default_sql, (table,)) assert bdb.sqlite3.total_changes - total_changes in (0, 1) set_default_sql = """ UPDATE bayesdb_generator SET defaultp = 1 WHERE id = ? """ total_changes = bdb.sqlite3.total_changes bdb.sql_execute(set_default_sql, (generator_id,)) assert bdb.sqlite3.total_changes - total_changes == 1 elif isinstance(cmd, ast.AlterTabUnsetDefGen): unset_default_sql = """ UPDATE bayesdb_generator SET defaultp = 0 WHERE tabname = ? AND defaultp """ total_changes = bdb.sqlite3.total_changes bdb.sql_execute(unset_default_sql, (table,)) assert bdb.sqlite3.total_changes - total_changes in (0, 1) else: assert False, "Invalid alter table command: %s" % (cmd,) return empty_cursor(bdb) if isinstance(phrase, ast.CreateGen): # Find the metamodel. if phrase.metamodel not in bdb.metamodels: raise BQLError(bdb, "No such metamodel: %s" % (repr(phrase.metamodel),)) metamodel = bdb.metamodels[phrase.metamodel] # Let the metamodel parse the schema itself and call # create_generator with the modelled columns. with bdb.savepoint(): def instantiate(columns): return instantiate_generator( bdb, phrase.name, phrase.table, metamodel, columns, ifnotexists=phrase.ifnotexists, default=phrase.default, ) metamodel.create_generator(bdb, phrase.table, phrase.schema, instantiate) # All done. Nothing to return. return empty_cursor(bdb) if isinstance(phrase, ast.DropGen): with bdb.savepoint(): if not core.bayesdb_has_generator(bdb, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, "No such generator: %s" % (repr(phrase.name),)) generator_id = core.bayesdb_get_generator(bdb, phrase.name) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # Metamodel-specific destruction. metamodel.drop_generator(bdb, generator_id) # Drop the columns, models, and, finally, generator. drop_columns_sql = """ DELETE FROM bayesdb_generator_column WHERE generator_id = ? """ bdb.sql_execute(drop_columns_sql, (generator_id,)) drop_model_sql = """ DELETE FROM bayesdb_generator_model WHERE generator_id = ? """ bdb.sql_execute(drop_model_sql, (generator_id,)) drop_generator_sql = """ DELETE FROM bayesdb_generator WHERE id = ? """ bdb.sql_execute(drop_generator_sql, (generator_id,)) return empty_cursor(bdb) if isinstance(phrase, ast.AlterGen): with bdb.savepoint(): generator = phrase.generator if not core.bayesdb_has_generator(bdb, generator): raise BQLError(bdb, "No such generator: %s" % (repr(generator),)) generator_id = core.bayesdb_get_generator(bdb, generator) for cmd in phrase.commands: if isinstance(cmd, ast.AlterGenRenameGen): # Make sure nothing else has this name. if casefold(generator) != casefold(cmd.name): if core.bayesdb_has_table(bdb, cmd.name): raise BQLError(bdb, "Name already defined as table" ": %s" % (repr(cmd.name),)) if core.bayesdb_has_generator(bdb, cmd.name): raise BQLError(bdb, "Name already defined" " as generator: %s" % (repr(cmd.name),)) # Update bayesdb_generator. Everything else # refers to it by id. update_generator_sql = """ UPDATE bayesdb_generator SET name = ? WHERE id = ? """ total_changes = bdb.sqlite3.total_changes bdb.sql_execute(update_generator_sql, (cmd.name, generator_id)) assert bdb.sqlite3.total_changes - total_changes == 1 # Remember the new name for subsequent commands. generator = cmd.name else: assert False, "Invalid ALTER GENERATOR command: %s" % (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.InitModels): if not core.bayesdb_has_generator_default(bdb, phrase.generator): raise BQLError(bdb, "No such generator: %s" % (phrase.generator,)) generator_id = core.bayesdb_get_generator_default(bdb, phrase.generator) modelnos = range(phrase.nmodels) model_config = None # XXX For now. with bdb.savepoint(): # Find the model numbers. Omit existing ones for # ifnotexists; reject existing ones otherwise. if phrase.ifnotexists: modelnos = set( modelno for modelno in modelnos if not core.bayesdb_generator_has_model(bdb, generator_id, modelno) ) else: existing = set( modelno for modelno in modelnos if core.bayesdb_generator_has_model(bdb, generator_id, modelno) ) if 0 < len(existing): raise BQLError( bdb, "Generator %s already has models: %s" % (repr(phrase.generator), sorted(existing)) ) # Stop now if there's nothing to initialize. if len(modelnos) == 0: return # Create the bayesdb_generator_model records. modelnos = sorted(modelnos) insert_model_sql = """ INSERT INTO bayesdb_generator_model (generator_id, modelno, iterations) VALUES (:generator_id, :modelno, :iterations) """ for modelno in modelnos: bdb.sql_execute(insert_model_sql, {"generator_id": generator_id, "modelno": modelno, "iterations": 0}) # Do metamodel-specific initialization. metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) metamodel.initialize_models(bdb, generator_id, modelnos, model_config) return empty_cursor(bdb) if isinstance(phrase, ast.AnalyzeModels): if not phrase.wait: raise NotImplementedError("No background analysis -- use WAIT.") # WARNING: It is the metamodel's responsibility to work in a # transaction. # # WARNING: It is the metamodel's responsibility to update the # iteration count in bayesdb_generator_model records. # # We do this so that the metamodel can save incremental # progress in case of ^C in the middle. # # XXX Put these warning somewhere more appropriate. if not core.bayesdb_has_generator_default(bdb, phrase.generator): raise BQLError(bdb, "No such generator: %s" % (phrase.generator,)) generator_id = core.bayesdb_get_generator_default(bdb, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # XXX Should allow parameters for iterations and ckpt/iter. metamodel.analyze_models( bdb, generator_id, modelnos=phrase.modelnos, iterations=phrase.iterations, max_seconds=phrase.seconds, ckpt_iterations=phrase.ckpt_iterations, ckpt_seconds=phrase.ckpt_seconds, ) return empty_cursor(bdb) if isinstance(phrase, ast.DropModels): with bdb.savepoint(): generator_id = core.bayesdb_get_generator_default(bdb, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) modelnos = None if phrase.modelnos is not None: lookup_model_sql = """ SELECT COUNT(*) FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno """ modelnos = sorted(list(phrase.modelnos)) for modelno in modelnos: cursor = bdb.sql_execute(lookup_model_sql, {"generator_id": generator_id, "modelno": modelno}) if cursor_value(cursor) == 0: raise BQLError( bdb, "No such model" " in generator %s: %s" % (repr(phrase.generator), repr(modelno)) ) metamodel.drop_models(bdb, generator_id, modelnos=modelnos) if modelnos is None: drop_models_sql = """ DELETE FROM bayesdb_generator_model WHERE generator_id = ? """ bdb.sql_execute(drop_models_sql, (generator_id,)) else: drop_model_sql = """ DELETE FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno """ for modelno in modelnos: bdb.sql_execute(drop_model_sql, {"generator_id": generator_id, "modelno": modelno}) return empty_cursor(bdb) assert False # XXX
def dot_describe(self, line): '''describe BayesDB entities [table(s)|generator(s)|columns|model(s)] [<name>...] Print a human-readable description of the specified BayesDB entities. ''' # XXX Lousy, lousy tokenizer. tokens = line.split() if len(tokens) == 0: self.stdout.write('Usage: .describe table(s) [<table>...]\n') self.stdout.write(' .describe population(s) [<pop>...]\n') self.stdout.write(' .describe variables <pop>\n') self.stdout.write(' .describe generator(s) [<gen>...]\n') self.stdout.write(' .describe model(s) <gen> [<model>...]\n') return if casefold(tokens[0]) == 'table' or \ casefold(tokens[0]) == 'tables': params = None qualifier = None if len(tokens) == 1: params = () qualifier = '1' else: params = tokens[1:] qualifier = \ '(' + ' OR '.join(['tabname = ?' for _p in params]) + ')' ok = True for table in params: if not core.bayesdb_has_table(self._bdb, table): self.stdout.write('No such table: %s\n' % (repr(table), )) ok = False if not ok: return for table in params: core.bayesdb_table_guarantee_columns(self._bdb, table) sql = ''' SELECT tabname, colno, name, shortname FROM bayesdb_column WHERE %s ORDER BY tabname ASC, colno ASC ''' % (qualifier, ) with self._bdb.savepoint(): pretty.pp_cursor(self.stdout, self._bdb.execute(sql, params)) elif casefold(tokens[0]) in ('population', 'populations'): params = None qualifier = None if len(tokens) == 1: params = () qualifier = '1' else: params = tokens[1:] names = ','.join('?%d' % (i + 1, ) for i in xrange(len(params))) qualifier = '(name IN (%s))' % (names, ) ok = True for population in params: if not core.bayesdb_has_population(self._bdb, population): self.stdout.write('No such population: %s\n' % (repr(population), )) ok = False if not ok: return with self._bdb.savepoint(): cursor = self._bdb.sql_execute( ''' SELECT id, name, tabname FROM bayesdb_population WHERE %s ''' % (qualifier, ), params) pretty.pp_cursor(self.stdout, cursor) elif casefold(tokens[0]) == 'generator' or \ casefold(tokens[0]) == 'generators': params = None qualifier = None if len(tokens) == 1: params = () qualifier = '1' else: params = tokens[1:] names = ','.join('?%d' % (i + 1, ) for i in range(len(params))) qualifier = ''' (name IN ({names})) '''.format(names=names) ok = True for generator in params: if not core.bayesdb_has_generator(self._bdb, None, generator): self.stdout.write('No such generator: %s\n' % (repr(generator), )) ok = False if not ok: return sql = ''' SELECT id, name, tabname, backend FROM bayesdb_generator WHERE %s ''' % (qualifier, ) with self._bdb.savepoint(): pretty.pp_cursor(self.stdout, self._bdb.sql_execute(sql, params)) elif casefold(tokens[0]) == 'variables': if len(tokens) != 2: self.stdout.write('Usage: .describe variables <population>\n') return population = tokens[1] with self._bdb.savepoint(): if not core.bayesdb_has_population(self._bdb, population): self.stdout.write('No such population: %r\n' % (population, )) return population_id = core.bayesdb_get_population( self._bdb, population) sql = ''' SELECT c.colno AS colno, c.name AS name, v.stattype AS stattype, c.shortname AS shortname FROM bayesdb_population AS p, (bayesdb_column AS c LEFT OUTER JOIN bayesdb_variable AS v USING (colno)) WHERE p.id = ? AND p.id = v.population_id AND p.tabname = c.tabname ORDER BY colno ASC; ''' cursor = self._bdb.sql_execute(sql, (population_id, )) pretty.pp_cursor(self.stdout, cursor) elif casefold(tokens[0]) == 'model' or \ casefold(tokens[0]) == 'models': if len(tokens) < 2: self.stdout.write('Describe models of what generator?\n') return generator = tokens[1] with self._bdb.savepoint(): if not core.bayesdb_has_generator(self._bdb, None, generator): self.stdout.write('No such generator: %s\n' % (repr(generator), )) return generator_id = core.bayesdb_get_generator( self._bdb, None, generator) qualifier = None if len(tokens) == 2: qualifier = '1' else: modelnos = [] for token in tokens[2:]: try: modelno = int(token) except ValueError: self.stdout.write('Invalid model number: %s\n' % (repr(token), )) return else: if not core.bayesdb_generator_has_model( self._bdb, generator_id, modelno): self.stdout.write('No such model: %d\n' % (modelno, )) return modelnos.append(modelno) qualifier = 'modelno IN (%s)' % \ (','.join(map(str, modelnos),)) sql = ''' SELECT modelno, iterations FROM bayesdb_generator_model WHERE generator_id = ? AND %s ''' % (qualifier, ) cursor = self._bdb.sql_execute(sql, (generator_id, )) pretty.pp_cursor(self.stdout, cursor) else: self.stdout.write('Usage: .describe table(s) [<table>...]\n') self.stdout.write(' .describe generator(s) [<gen>...]\n') self.stdout.write(' .describe variables <pop>\n') self.stdout.write(' .describe model(s) <gen> [<model>...]\n')
def _test_example(bdb, exname): mm, t, t_sql, data_sql, data, g, g_bql, g_bqlbad0, g_bqlbad1 = \ examples[exname] qt = bql_quote_name(t) qg = bql_quote_name(g) bayeslite.bayesdb_register_metamodel(bdb, mm()) # Create a table. assert not core.bayesdb_has_table(bdb, t) with bdb.savepoint_rollback(): bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) assert not core.bayesdb_has_table(bdb, t) bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) # Insert data into the table. assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt, )).fetchvalue() == 0 for row in data: bdb.sql_execute(data_sql, row) n = len(data) assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt, )).fetchvalue() == n # Create a generator. Make sure savepoints work for this. assert not core.bayesdb_has_generator(bdb, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad0) assert not core.bayesdb_has_generator(bdb, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad1) assert not core.bayesdb_has_generator(bdb, g) with bdb.savepoint_rollback(): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) assert not core.bayesdb_has_generator(bdb, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) with pytest.raises(Exception): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) gid = core.bayesdb_get_generator(bdb, g) assert not core.bayesdb_generator_has_model(bdb, gid, 0) assert [] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 1 MODEL FOR %s' % (qg, )) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 10 MODELS FOR %s' % (qg, )) for i in range(10): assert core.bayesdb_generator_has_model(bdb, gid, i) assert range(10) == core.bayesdb_generator_modelnos(bdb, gid) bdb.execute('INITIALIZE 2 MODELS FOR %s' % (qg, )) # Test dropping things. with pytest.raises(bayeslite.BQLError): bdb.execute('DROP TABLE %s' % (qt, )) with bdb.savepoint_rollback(): # Note that sql_execute does not protect us! bdb.sql_execute('DROP TABLE %s' % (qt, )) assert not core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_table(bdb, t) # XXX Should we reject dropping a generator when there remain # models? Should we not reject dropping a table when there remain # generators? A table can be dropped when there remain indices. # # with pytest.raises(bayeslite.BQLError): # # Models remain. # bdb.execute('DROP GENERATOR %s' % (qg,)) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg, )) assert not core.bayesdb_has_generator(bdb, g) assert core.bayesdb_has_generator(bdb, g) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg, )) assert not core.bayesdb_has_generator(bdb, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) assert core.bayesdb_has_generator(bdb, g) assert gid == core.bayesdb_get_generator(bdb, g) # Test dropping models. with bdb.savepoint_rollback(): bdb.execute('DROP MODEL 1 FROM %s' % (qg, )) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert not core.bayesdb_generator_has_model(bdb, gid, 1) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) # Test analyzing models. bdb.execute('ANALYZE %s FOR 1 ITERATION WAIT' % (qg, )) bdb.execute('ANALYZE %s MODEL 0 FOR 1 ITERATION WAIT' % (qg, )) bdb.execute('ANALYZE %s MODEL 1 FOR 1 ITERATION WAIT' % (qg, ))
def execute_phrase(bdb, phrase, bindings=()): """Execute the BQL AST phrase `phrase` and return a cursor of results.""" if isinstance(phrase, ast.Parametrized): n_numpar = phrase.n_numpar nampar_map = phrase.nampar_map phrase = phrase.phrase assert 0 < n_numpar else: n_numpar = 0 nampar_map = None # Ignore extraneous bindings. XXX Bad idea? if ast.is_query(phrase): # Compile the query in the transaction in case we need to # execute subqueries to determine column lists. Compiling is # a quick tree descent, so this should be fast. out = compiler.Output(n_numpar, nampar_map, bindings) with bdb.savepoint(): compiler.compile_query(bdb, phrase, out) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.Begin): txn.bayesdb_begin_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Rollback): txn.bayesdb_rollback_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Commit): txn.bayesdb_commit_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabAs): assert ast.is_query(phrase.query) with bdb.savepoint(): out = compiler.Output(n_numpar, nampar_map, bindings) qt = sqlite3_quote_name(phrase.name) temp = 'TEMP ' if phrase.temp else '' ifnotexists = 'IF NOT EXISTS ' if phrase.ifnotexists else '' out.write('CREATE %sTABLE %s%s AS ' % (temp, ifnotexists, qt)) compiler.compile_query(bdb, phrase.query, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): bdb.sql_execute(out.getvalue(), out.getbindings()) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabSim): assert isinstance(phrase.simulation, ast.Simulate) with bdb.savepoint(): if core.bayesdb_has_generator(bdb, phrase.name): raise BQLError( bdb, 'Name already defined as generator: %s' % (repr(phrase.name), )) if core.bayesdb_has_table(bdb, phrase.name): raise BQLError( bdb, 'Name already defined as table: %s' % (repr(phrase.name), )) if not core.bayesdb_has_generator_default( bdb, phrase.simulation.generator): raise BQLError( bdb, 'No such generator: %s' % (phrase.simulation.generator, )) generator_id = core.bayesdb_get_generator_default( bdb, phrase.simulation.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) table = core.bayesdb_generator_table(bdb, generator_id) qn = sqlite3_quote_name(phrase.name) qt = sqlite3_quote_name(table) qgn = sqlite3_quote_name(phrase.simulation.generator) column_names = phrase.simulation.columns qcns = map(sqlite3_quote_name, column_names) cursor = bdb.sql_execute('PRAGMA table_info(%s)' % (qt, )) column_sqltypes = {} for _colno, name, sqltype, _nonnull, _default, _primary in cursor: assert casefold(name) not in column_sqltypes column_sqltypes[casefold(name)] = sqltype assert 0 < len(column_sqltypes) for column_name in column_names: if casefold(column_name) not in column_sqltypes: raise BQLError( bdb, 'No such column' ' in generator %s table %s: %s' % (repr(phrase.simulation.generator), repr(table), repr(column_name))) for column_name, _expression in phrase.simulation.constraints: if casefold(column_name) not in column_sqltypes: raise BQLError( bdb, 'No such column' ' in generator %s table %s: %s' % (repr(phrase.simulation.generator), repr(table), repr(column_name))) # XXX Move to compiler.py. # XXX Copypasta of this in compile_simulate! out = compiler.Output(n_numpar, nampar_map, bindings) out.write('SELECT ') with compiler.compiling_paren(bdb, out, 'CAST(', ' AS INTEGER)'): compiler.compile_nobql_expression(bdb, phrase.simulation.nsamples, out) out.write(', ') with compiler.compiling_paren(bdb, out, 'CAST(', ' AS INTEGER)'): compiler.compile_nobql_expression(bdb, phrase.simulation.modelno, out) for _column_name, expression in phrase.simulation.constraints: out.write(', ') compiler.compile_nobql_expression(bdb, expression, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): cursor = bdb.sql_execute(out.getvalue(), out.getbindings()).fetchall() assert len(cursor) == 1 nsamples = cursor[0][0] assert isinstance(nsamples, int) modelno = cursor[0][1] assert modelno is None or isinstance(modelno, int) constraints = \ [(core.bayesdb_generator_column_number(bdb, generator_id, name), value) for (name, _expression), value in zip(phrase.simulation.constraints, cursor[0][2:])] colnos = \ [core.bayesdb_generator_column_number(bdb, generator_id, name) for name in column_names] bdb.sql_execute( 'CREATE %sTABLE %s%s (%s)' % ('TEMP ' if phrase.temp else '', 'IF NOT EXISTS ' if phrase.ifnotexists else '', qn, ','.join( '%s %s' % (qcn, column_sqltypes[casefold(column_name)]) for qcn, column_name in zip(qcns, column_names)))) insert_sql = ''' INSERT INTO %s (%s) VALUES (%s) ''' % (qn, ','.join(qcns), ','.join('?' for qcn in qcns)) for row in bqlfn.bayesdb_simulate(bdb, generator_id, constraints, colnos, modelno=modelno, numpredictions=nsamples): bdb.sql_execute(insert_sql, row) return empty_cursor(bdb) if isinstance(phrase, ast.DropTab): with bdb.savepoint(): sql = 'SELECT COUNT(*) FROM bayesdb_generator WHERE tabname = ?' cursor = bdb.sql_execute(sql, (phrase.name, )) if 0 < cursor_value(cursor): # XXX Automatically delete the generators? Generators # are more interesting than triggers and indices, so # automatic deletion is not obviously right. raise BQLError( bdb, 'Table still in use by generators: %s' % (repr(phrase.name), )) bdb.sql_execute('DELETE FROM bayesdb_column WHERE tabname = ?', (phrase.name, )) ifexists = 'IF EXISTS ' if phrase.ifexists else '' qt = sqlite3_quote_name(phrase.name) return bdb.sql_execute('DROP TABLE %s%s' % (ifexists, qt)) if isinstance(phrase, ast.AlterTab): with bdb.savepoint(): table = phrase.table if not core.bayesdb_has_table(bdb, table): raise BQLError(bdb, 'No such table: %s' % (repr(table), )) for cmd in phrase.commands: if isinstance(cmd, ast.AlterTabRenameTab): # If the names differ only in case, we have to do # some extra work because SQLite will reject the # table rename. Note that we may even have table # == cmd.name here, but if the stored table name # differs in case from cmd.name, we want to update # it anyway. if casefold(table) == casefold(cmd.name): # Go via a temporary table. temp = table + '_temp' while core.bayesdb_has_table(bdb, temp) or \ core.bayesdb_has_generator(bdb, temp): temp += '_temp' rename_table(bdb, table, temp) rename_table(bdb, temp, cmd.name) else: # Make sure nothing else has this name and # rename it. if core.bayesdb_has_table(bdb, cmd.name): raise BQLError( bdb, 'Name already defined as table' ': %s' % (repr(cmd.name), )) if core.bayesdb_has_generator(bdb, cmd.name): raise BQLError( bdb, 'Name already defined' ' as generator: %s' % (repr(cmd.name), )) rename_table(bdb, table, cmd.name) # Remember the new name for subsequent commands. table = cmd.name elif isinstance(cmd, ast.AlterTabRenameCol): # XXX Need to deal with this in the compiler. raise NotImplementedError('Renaming columns' ' not yet implemented.') # Make sure the old name exist and the new name does not. old_folded = casefold(cmd.old) new_folded = casefold(cmd.new) if old_folded != new_folded: if not core.bayesdb_table_has_column( bdb, table, cmd.old): raise BQLError( bdb, 'No such column in table %s' ': %s' % (repr(table), repr(cmd.old))) if core.bayesdb_table_has_column(bdb, table, cmd.new): raise BQLError( bdb, 'Column already exists' ' in table %s: %s' % (repr(table), repr(cmd.new))) # Update bayesdb_column. Everything else refers # to columns by (tabname, colno) pairs rather than # by names. update_column_sql = ''' UPDATE bayesdb_column SET name = :new WHERE tabname = :table AND name = :old ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_column_sql, { 'table': table, 'old': cmd.old, 'new': cmd.new, }) assert bdb._sqlite3.totalchanges() - total_changes == 1 # ...except metamodels may have the (case-folded) # name cached. if old_folded != new_folded: generators_sql = ''' SELECT id FROM bayesdb_generator WHERE tabname = ? ''' cursor = bdb.sql_execute(generators_sql, (table, )) for (generator_id, ) in cursor: metamodel = core.bayesdb_generator_metamodel( bdb, generator_id) metamodel.rename_column(bdb, generator_id, old_folded, new_folded) elif isinstance(cmd, ast.AlterTabSetDefGen): if not core.bayesdb_has_generator(bdb, cmd.generator): raise BQLError( bdb, 'No such generator: %s' % (repr(cmd.generator), )) generator_id = core.bayesdb_get_generator( bdb, cmd.generator) bayesdb_schema_required(bdb, 6, "generator defaults") unset_default_sql = ''' UPDATE bayesdb_generator SET defaultp = 0 WHERE tabname = ? AND defaultp ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(unset_default_sql, (table, )) assert bdb._sqlite3.totalchanges() - total_changes in (0, 1) set_default_sql = ''' UPDATE bayesdb_generator SET defaultp = 1 WHERE id = ? ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(set_default_sql, (generator_id, )) assert bdb._sqlite3.totalchanges() - total_changes == 1 elif isinstance(cmd, ast.AlterTabUnsetDefGen): unset_default_sql = ''' UPDATE bayesdb_generator SET defaultp = 0 WHERE tabname = ? AND defaultp ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(unset_default_sql, (table, )) assert bdb._sqlite3.totalchanges() - total_changes in (0, 1) else: assert False, 'Invalid alter table command: %s' % \ (cmd,) return empty_cursor(bdb) if isinstance(phrase, ast.CreateGen): # Find the metamodel. if phrase.metamodel not in bdb.metamodels: raise BQLError( bdb, 'No such metamodel: %s' % (repr(phrase.metamodel), )) metamodel = bdb.metamodels[phrase.metamodel] # Let the metamodel parse the schema itself and call # create_generator with the modelled columns. with bdb.savepoint(): if core.bayesdb_has_generator(bdb, phrase.name): if not phrase.ifnotexists: raise BQLError( bdb, 'Name already defined as generator: %s' % (repr(phrase.name), )) else: def instantiate(columns): return instantiate_generator(bdb, phrase.name, phrase.table, metamodel, columns, default=phrase.default) metamodel.create_generator(bdb, phrase.table, phrase.schema, instantiate) # All done. Nothing to return. return empty_cursor(bdb) if isinstance(phrase, ast.DropGen): with bdb.savepoint(): if not core.bayesdb_has_generator(bdb, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such generator: %s' % (repr(phrase.name), )) generator_id = core.bayesdb_get_generator(bdb, phrase.name) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # Metamodel-specific destruction. metamodel.drop_generator(bdb, generator_id) # Drop the columns, models, and, finally, generator. drop_columns_sql = ''' DELETE FROM bayesdb_generator_column WHERE generator_id = ? ''' bdb.sql_execute(drop_columns_sql, (generator_id, )) drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_model_sql, (generator_id, )) drop_generator_sql = ''' DELETE FROM bayesdb_generator WHERE id = ? ''' bdb.sql_execute(drop_generator_sql, (generator_id, )) return empty_cursor(bdb) if isinstance(phrase, ast.AlterGen): with bdb.savepoint(): generator = phrase.generator if not core.bayesdb_has_generator(bdb, generator): raise BQLError(bdb, 'No such generator: %s' % (repr(generator), )) generator_id = core.bayesdb_get_generator(bdb, generator) for cmd in phrase.commands: if isinstance(cmd, ast.AlterGenRenameGen): # Make sure nothing else has this name. if casefold(generator) != casefold(cmd.name): if core.bayesdb_has_table(bdb, cmd.name): raise BQLError( bdb, 'Name already defined as table' ': %s' % (repr(cmd.name), )) if core.bayesdb_has_generator(bdb, cmd.name): raise BQLError( bdb, 'Name already defined' ' as generator: %s' % (repr(cmd.name), )) # Update bayesdb_generator. Everything else # refers to it by id. update_generator_sql = ''' UPDATE bayesdb_generator SET name = ? WHERE id = ? ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_generator_sql, (cmd.name, generator_id)) assert bdb._sqlite3.totalchanges() - total_changes == 1 # Remember the new name for subsequent commands. generator = cmd.name else: assert False, 'Invalid ALTER GENERATOR command: %s' % \ (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.InitModels): if not core.bayesdb_has_generator_default(bdb, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator_default( bdb, phrase.generator) modelnos = range(phrase.nmodels) model_config = None # XXX For now. with bdb.savepoint(): # Find the model numbers. Omit existing ones for # ifnotexists; reject existing ones otherwise. if phrase.ifnotexists: modelnos = set(modelno for modelno in modelnos if not core.bayesdb_generator_has_model( bdb, generator_id, modelno)) else: existing = set(modelno for modelno in modelnos if core.bayesdb_generator_has_model( bdb, generator_id, modelno)) if 0 < len(existing): raise BQLError( bdb, 'Generator %s already has models: %s' % (repr(phrase.generator), sorted(existing))) # Stop now if there's nothing to initialize. if len(modelnos) == 0: return # Create the bayesdb_generator_model records. modelnos = sorted(modelnos) insert_model_sql = ''' INSERT INTO bayesdb_generator_model (generator_id, modelno, iterations) VALUES (:generator_id, :modelno, :iterations) ''' for modelno in modelnos: bdb.sql_execute( insert_model_sql, { 'generator_id': generator_id, 'modelno': modelno, 'iterations': 0, }) # Do metamodel-specific initialization. metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) metamodel.initialize_models(bdb, generator_id, modelnos, model_config) return empty_cursor(bdb) if isinstance(phrase, ast.AnalyzeModels): if not phrase.wait: raise NotImplementedError('No background analysis -- use WAIT.') # WARNING: It is the metamodel's responsibility to work in a # transaction. # # WARNING: It is the metamodel's responsibility to update the # iteration count in bayesdb_generator_model records. # # We do this so that the metamodel can save incremental # progress in case of ^C in the middle. # # XXX Put these warning somewhere more appropriate. if not core.bayesdb_has_generator_default(bdb, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator_default( bdb, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # XXX Should allow parameters for iterations and ckpt/iter. metamodel.analyze_models(bdb, generator_id, modelnos=phrase.modelnos, iterations=phrase.iterations, max_seconds=phrase.seconds, ckpt_iterations=phrase.ckpt_iterations, ckpt_seconds=phrase.ckpt_seconds) return empty_cursor(bdb) if isinstance(phrase, ast.DropModels): with bdb.savepoint(): generator_id = core.bayesdb_get_generator_default( bdb, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) modelnos = None if phrase.modelnos is not None: lookup_model_sql = ''' SELECT COUNT(*) FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' modelnos = sorted(list(phrase.modelnos)) for modelno in modelnos: cursor = bdb.sql_execute(lookup_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) if cursor_value(cursor) == 0: raise BQLError( bdb, 'No such model' ' in generator %s: %s' % (repr(phrase.generator), repr(modelno))) metamodel.drop_models(bdb, generator_id, modelnos=modelnos) if modelnos is None: drop_models_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_models_sql, (generator_id, )) else: drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' for modelno in modelnos: bdb.sql_execute(drop_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) return empty_cursor(bdb) assert False # XXX
assert False, 'Invalid ALTER GENERATOR command: %s' % \ (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.InitModels): if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) modelnos = range(phrase.nmodels) with bdb.savepoint(): # Find the model numbers. Omit existing ones for # ifnotexists; reject existing ones otherwise. if phrase.ifnotexists: modelnos = set(modelno for modelno in modelnos if not core.bayesdb_generator_has_model( bdb, generator_id, modelno)) else: existing = set(modelno for modelno in modelnos if core.bayesdb_generator_has_model( bdb, generator_id, modelno)) if 0 < len(existing): raise BQLError( bdb, 'Generator %s already has models: %s' % (repr(phrase.generator), sorted(existing))) # Stop now if there's nothing to initialize. if len(modelnos) == 0: return # Create the bayesdb_generator_model records. modelnos = sorted(modelnos)
def dot_describe(self, line): """describe BayesDB entities [table(s)|generator(s)|columns|model(s)] [<name>...] Print a human-readable description of the specified BayesDB entities. """ # XXX Lousy, lousy tokenizer. tokens = line.split() if len(tokens) == 0: self.stdout.write("Usage: .describe table(s) [<table>...]\n") self.stdout.write(" .describe generator(s) [<gen>...]\n") self.stdout.write(" .describe columns <gen>\n") self.stdout.write(" .describe model(s) <gen> [<model>...]\n") return if casefold(tokens[0]) == "table" or casefold(tokens[0]) == "tables": params = None qualifier = None if len(tokens) == 1: params = () qualifier = "1" else: params = tokens[1:] qualifier = "(" + " OR ".join(["tabname = ?" for _p in params]) + ")" ok = True for table in params: if not core.bayesdb_has_table(self._bdb, table): self.stdout.write("No such table: %s\n" % (repr(table),)) ok = False if not ok: return for table in params: core.bayesdb_table_guarantee_columns(self._bdb, table) sql = """ SELECT tabname, colno, name, shortname FROM bayesdb_column WHERE %s ORDER BY tabname ASC, colno ASC """ % ( qualifier, ) with self._bdb.savepoint(): pretty.pp_cursor(self.stdout, self._bdb.execute(sql, params)) elif casefold(tokens[0]) == "generator" or casefold(tokens[0]) == "generators": params = None qualifier = None if len(tokens) == 1: params = () qualifier = "1" else: params = tokens[1:] names = ",".join("?%d" % (i + 1,) for i in range(len(params))) qualifier = """ (name IN ({names}) OR (defaultp AND tabname IN ({names}))) """.format( names=names ) ok = True for generator in params: if not core.bayesdb_has_generator_default(self._bdb, generator): self.stdout.write("No such generator: %s\n" % (repr(generator),)) ok = False if not ok: return sql = """ SELECT id, name, tabname, metamodel FROM bayesdb_generator WHERE %s """ % ( qualifier, ) with self._bdb.savepoint(): pretty.pp_cursor(self.stdout, self._bdb.sql_execute(sql, params)) elif casefold(tokens[0]) == "columns": if len(tokens) != 2: self.stdout.write("Describe columns of what generator?\n") return generator = tokens[1] with self._bdb.savepoint(): if not core.bayesdb_has_generator_default(self._bdb, generator): self.stdout.write("No such generator: %s\n" % (repr(generator),)) return generator_id = core.bayesdb_get_generator_default(self._bdb, generator) sql = """ SELECT c.colno AS colno, c.name AS name, gc.stattype AS stattype, c.shortname AS shortname FROM bayesdb_generator AS g, (bayesdb_column AS c LEFT OUTER JOIN bayesdb_generator_column AS gc USING (colno)) WHERE g.id = ? AND g.id = gc.generator_id AND g.tabname = c.tabname ORDER BY colno ASC; """ cursor = self._bdb.sql_execute(sql, (generator_id,)) pretty.pp_cursor(self.stdout, cursor) elif casefold(tokens[0]) == "model" or casefold(tokens[0]) == "models": if len(tokens) < 2: self.stdout.write("Describe models of what generator?\n") return generator = tokens[1] with self._bdb.savepoint(): if not core.bayesdb_has_generator_default(self._bdb, generator): self.stdout.write("No such generator: %s\n" % (repr(generator),)) return generator_id = core.bayesdb_get_generator_default(self._bdb, generator) qualifier = None if len(tokens) == 2: qualifier = "1" else: modelnos = [] for token in tokens[2:]: try: modelno = int(token) except ValueError: self.stdout.write("Invalid model number: %s\n" % (repr(token),)) return else: if not core.bayesdb_generator_has_model(self._bdb, generator_id, modelno): self.stdout.write("No such model: %d\n" % (modelno,)) return modelnos.append(modelno) qualifier = "modelno IN (%s)" % (",".join(map(str, modelnos))) sql = """ SELECT modelno, iterations FROM bayesdb_generator_model WHERE generator_id = ? AND %s """ % ( qualifier, ) cursor = self._bdb.sql_execute(sql, (generator_id,)) pretty.pp_cursor(self.stdout, cursor) else: self.stdout.write("Usage: .describe table(s) [<table>...]\n") self.stdout.write(" .describe generator(s) [<gen>...]\n") self.stdout.write(" .describe columns <gen>\n") self.stdout.write(" .describe model(s) <gen> [<model>...]\n")
def execute_phrase(bdb, phrase, bindings=()): """Execute the BQL AST phrase `phrase` and return a cursor of results.""" if isinstance(phrase, ast.Parametrized): n_numpar = phrase.n_numpar nampar_map = phrase.nampar_map phrase = phrase.phrase assert 0 < n_numpar else: n_numpar = 0 nampar_map = None # Ignore extraneous bindings. XXX Bad idea? if ast.is_query(phrase): # Compile the query in the transaction in case we need to # execute subqueries to determine column lists. Compiling is # a quick tree descent, so this should be fast. out = compiler.Output(n_numpar, nampar_map, bindings) with bdb.savepoint(): compiler.compile_query(bdb, phrase, out) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.Begin): txn.bayesdb_begin_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Rollback): txn.bayesdb_rollback_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Commit): txn.bayesdb_commit_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabAs): assert ast.is_query(phrase.query) with bdb.savepoint(): if core.bayesdb_has_table(bdb, phrase.name): if phrase.ifnotexists: return empty_cursor(bdb) else: raise BQLError( bdb, 'Name already defined as table: %s' % (repr(phrase.name), )) out = compiler.Output(n_numpar, nampar_map, bindings) qt = sqlite3_quote_name(phrase.name) temp = 'TEMP ' if phrase.temp else '' ifnotexists = 'IF NOT EXISTS ' if phrase.ifnotexists else '' out.write('CREATE %sTABLE %s%s AS ' % (temp, ifnotexists, qt)) compiler.compile_query(bdb, phrase.query, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): bdb.sql_execute(out.getvalue(), out.getbindings()) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabCsv): with bdb.savepoint(): table_exists = core.bayesdb_has_table(bdb, phrase.name) if table_exists: if phrase.ifnotexists: return empty_cursor(bdb) else: raise BQLError( bdb, 'Table already exists: %s' % (repr(phrase.name), )) bayesdb_read_csv_file(bdb, phrase.name, phrase.csv, header=True, create=True) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabSimModels): assert isinstance(phrase.simulation, ast.SimulateModels) with bdb.savepoint(): # Check if table exists. if core.bayesdb_has_table(bdb, phrase.name): if phrase.ifnotexists: return empty_cursor(bdb) raise BQLError( bdb, 'Name already defined as table: %s' % (phrase.name), ) # Set up schema and create the new table. qn = sqlite3_quote_name(phrase.name) qcns = map(sqlite3_quote_name, [ simcol.name if simcol.name is not None else str(simcol.col) for simcol in phrase.simulation.columns ]) temp = '' if phrase.temp is None else 'TEMP' bdb.sql_execute(''' CREATE %s TABLE %s (%s) ''' % (temp, qn, str.join(',', qcns))) # Retrieve the rows. rows = simulate_models_rows(bdb, phrase.simulation) # Insert the rows into the table. insert_sql = ''' INSERT INTO %s (%s) VALUES (%s) ''' % (qn, ','.join(qcns), ','.join('?' for qcn in qcns)) for row in rows: bdb.sql_execute(insert_sql, row) return empty_cursor(bdb) if isinstance(phrase, ast.DropTab): with bdb.savepoint(): sql = 'SELECT COUNT(*) FROM bayesdb_population WHERE tabname = ?' cursor = bdb.sql_execute(sql, (phrase.name, )) if 0 < cursor_value(cursor): raise BQLError( bdb, 'Table still in use by populations: %s' % (repr(phrase.name), )) bdb.sql_execute('DELETE FROM bayesdb_column WHERE tabname = ?', (phrase.name, )) ifexists = 'IF EXISTS ' if phrase.ifexists else '' qt = sqlite3_quote_name(phrase.name) return bdb.sql_execute('DROP TABLE %s%s' % (ifexists, qt)) if isinstance(phrase, ast.AlterTab): with bdb.savepoint(): table = phrase.table if not core.bayesdb_has_table(bdb, table): raise BQLError(bdb, 'No such table: %s' % (repr(table), )) for cmd in phrase.commands: if isinstance(cmd, ast.AlterTabRenameTab): # If the names differ only in case, we have to do # some extra work because SQLite will reject the # table rename. Note that we may even have table # == cmd.name here, but if the stored table name # differs in case from cmd.name, we want to update # it anyway. if casefold(table) == casefold(cmd.name): # Go via a temporary table. temp = table + '_temp' while core.bayesdb_has_table(bdb, temp): temp += '_temp' rename_table(bdb, table, temp) rename_table(bdb, temp, cmd.name) else: # Make sure nothing else has this name and # rename it. if core.bayesdb_has_table(bdb, cmd.name): raise BQLError( bdb, 'Name already defined as table' ': %s' % (repr(cmd.name), )) rename_table(bdb, table, cmd.name) # Remember the new name for subsequent commands. table = cmd.name elif isinstance(cmd, ast.AlterTabRenameCol): # XXX Need to deal with this in the compiler. raise NotImplementedError('Renaming columns' ' not yet implemented.') # Make sure the old name exist and the new name does not. old_folded = casefold(cmd.old) new_folded = casefold(cmd.new) if old_folded != new_folded: if not core.bayesdb_table_has_column( bdb, table, cmd.old): raise BQLError( bdb, 'No such column in table %s' ': %s' % (repr(table), repr(cmd.old))) if core.bayesdb_table_has_column(bdb, table, cmd.new): raise BQLError( bdb, 'Column already exists' ' in table %s: %s' % (repr(table), repr(cmd.new))) # Update bayesdb_column. Everything else refers # to columns by (tabname, colno) pairs rather than # by names. update_column_sql = ''' UPDATE bayesdb_column SET name = :new WHERE tabname = :table AND name = :old ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_column_sql, { 'table': table, 'old': cmd.old, 'new': cmd.new, }) assert bdb._sqlite3.totalchanges() - total_changes == 1 # ...except metamodels may have the (case-folded) # name cached. if old_folded != new_folded: generators_sql = ''' SELECT id FROM bayesdb_generator WHERE tabname = ? ''' cursor = bdb.sql_execute(generators_sql, (table, )) for (generator_id, ) in cursor: metamodel = core.bayesdb_generator_metamodel( bdb, generator_id) metamodel.rename_column(bdb, generator_id, old_folded, new_folded) else: assert False, 'Invalid alter table command: %s' % \ (cmd,) return empty_cursor(bdb) if isinstance(phrase, ast.GuessSchema): if not core.bayesdb_has_table(bdb, phrase.table): raise BQLError(bdb, 'No such table : %s' % phrase.table) schema = guess.guess_to_schema(guess.bayesdb_guess_stattypes, bdb, phrase.table) # Print schema to console, so user can edit it and/or copy/paste it into # the schema definition when creating a population. print schema return empty_cursor(bdb) if isinstance(phrase, ast.CreatePop): with bdb.savepoint(): _create_population(bdb, phrase) return empty_cursor(bdb) if isinstance(phrase, ast.DropPop): with bdb.savepoint(): if not core.bayesdb_has_population(bdb, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such population: %r' % (phrase.name, )) population_id = core.bayesdb_get_population(bdb, phrase.name) if core.bayesdb_population_generators(bdb, population_id): raise BQLError( bdb, 'Population still has generators: %r' % (phrase.name, )) # XXX helpful error checking if generators still exist # XXX check change counts bdb.sql_execute( ''' DELETE FROM bayesdb_variable WHERE population_id = ? ''', (population_id, )) bdb.sql_execute( ''' DELETE FROM bayesdb_population WHERE id = ? ''', (population_id, )) return empty_cursor(bdb) if isinstance(phrase, ast.AlterPop): with bdb.savepoint(): population = phrase.population if not core.bayesdb_has_population(bdb, population): raise BQLError(bdb, 'No such population: %s' % (repr(population), )) population_id = core.bayesdb_get_population(bdb, population) for cmd in phrase.commands: if isinstance(cmd, ast.AlterPopStatType): # Check the no metamodels are defined for this population. generators = core.bayesdb_population_generators( bdb, population_id) if generators: raise BQLError( bdb, 'Cannot update statistical types ' 'for population %s, it has metamodels: %s' % ( repr(population), repr(generators), )) # Check all the variables are in the population. unknown = [ c for c in cmd.names if not core.bayesdb_has_variable( bdb, population_id, None, c) ] if unknown: raise BQLError( bdb, 'No such variables in population' ': %s' % (repr(unknown))) # Check the statistical type is valid. if not core.bayesdb_has_stattype(bdb, cmd.stattype): raise BQLError( bdb, 'Invalid statistical type' ': %r' % (repr(cmd.stattype), )) # Perform the stattype update. colnos = [ core.bayesdb_variable_number(bdb, population_id, None, c) for c in cmd.names ] qcolnos = ','.join('%d' % (colno, ) for colno in colnos) update_stattype_sql = ''' UPDATE bayesdb_variable SET stattype = ? WHERE population_id = ? AND colno IN (%s) ''' % (qcolnos, ) bdb.sql_execute(update_stattype_sql, ( casefold(cmd.stattype), population_id, )) else: assert False, 'Invalid ALTER POPULATION command: %s' % \ (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.CreateGen): # Find the population. if not core.bayesdb_has_population(bdb, phrase.population): raise BQLError(bdb, 'No such population: %r' % (phrase.population, )) population_id = core.bayesdb_get_population(bdb, phrase.population) table = core.bayesdb_population_table(bdb, population_id) # Find the metamodel, or use the default. metamodel_name = phrase.metamodel if phrase.metamodel is None: metamodel_name = 'cgpm' if metamodel_name not in bdb.metamodels: raise BQLError(bdb, 'No such metamodel: %s' % (repr(metamodel_name), )) metamodel = bdb.metamodels[metamodel_name] with bdb.savepoint(): if core.bayesdb_has_generator(bdb, population_id, phrase.name): if not phrase.ifnotexists: raise BQLError( bdb, 'Name already defined as generator: %s' % (repr(phrase.name), )) else: # Insert a record into bayesdb_generator and get the # assigned id. bdb.sql_execute( ''' INSERT INTO bayesdb_generator (name, tabname, population_id, metamodel) VALUES (?, ?, ?, ?) ''', (phrase.name, table, population_id, metamodel.name())) generator_id = core.bayesdb_get_generator( bdb, population_id, phrase.name) # Populate bayesdb_generator_column. # # XXX Omit needless bayesdb_generator_column table -- # Github issue #441. bdb.sql_execute( ''' INSERT INTO bayesdb_generator_column (generator_id, colno, stattype) SELECT :generator_id, colno, stattype FROM bayesdb_variable WHERE population_id = :population_id AND generator_id IS NULL ''', { 'generator_id': generator_id, 'population_id': population_id, }) # Do any metamodel-specific initialization. metamodel.create_generator(bdb, generator_id, phrase.schema, baseline=phrase.baseline) # Populate bayesdb_generator_column with any latent # variables that metamodel.create_generator has added # with bayesdb_add_latent. bdb.sql_execute( ''' INSERT INTO bayesdb_generator_column (generator_id, colno, stattype) SELECT :generator_id, colno, stattype FROM bayesdb_variable WHERE population_id = :population_id AND generator_id = :generator_id ''', { 'generator_id': generator_id, 'population_id': population_id, }) # All done. Nothing to return. return empty_cursor(bdb) if isinstance(phrase, ast.DropGen): with bdb.savepoint(): if not core.bayesdb_has_generator(bdb, None, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such generator: %s' % (repr(phrase.name), )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.name) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # Metamodel-specific destruction. metamodel.drop_generator(bdb, generator_id) # Drop the columns, models, and, finally, generator. drop_columns_sql = ''' DELETE FROM bayesdb_generator_column WHERE generator_id = ? ''' bdb.sql_execute(drop_columns_sql, (generator_id, )) drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_model_sql, (generator_id, )) drop_generator_sql = ''' DELETE FROM bayesdb_generator WHERE id = ? ''' bdb.sql_execute(drop_generator_sql, (generator_id, )) return empty_cursor(bdb) if isinstance(phrase, ast.AlterGen): with bdb.savepoint(): generator = phrase.generator if not core.bayesdb_has_generator(bdb, None, generator): raise BQLError(bdb, 'No such generator: %s' % (repr(generator), )) generator_id = core.bayesdb_get_generator(bdb, None, generator) for cmd in phrase.commands: if isinstance(cmd, ast.AlterGenRenameGen): # Make sure nothing else has this name. if casefold(generator) != casefold(cmd.name): if core.bayesdb_has_table(bdb, cmd.name): raise BQLError( bdb, 'Name already defined as table' ': %s' % (repr(cmd.name), )) if core.bayesdb_has_generator(bdb, None, cmd.name): raise BQLError( bdb, 'Name already defined' ' as generator: %s' % (repr(cmd.name), )) # Update bayesdb_generator. Everything else # refers to it by id. update_generator_sql = ''' UPDATE bayesdb_generator SET name = ? WHERE id = ? ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_generator_sql, (cmd.name, generator_id)) assert bdb._sqlite3.totalchanges() - total_changes == 1 # Remember the new name for subsequent commands. generator = cmd.name else: assert False, 'Invalid ALTER GENERATOR command: %s' % \ (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.InitModels): if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) modelnos = range(phrase.nmodels) with bdb.savepoint(): # Find the model numbers. Omit existing ones for # ifnotexists; reject existing ones otherwise. if phrase.ifnotexists: modelnos = set(modelno for modelno in modelnos if not core.bayesdb_generator_has_model( bdb, generator_id, modelno)) else: existing = set(modelno for modelno in modelnos if core.bayesdb_generator_has_model( bdb, generator_id, modelno)) if 0 < len(existing): raise BQLError( bdb, 'Generator %s already has models: %s' % (repr(phrase.generator), sorted(existing))) # Stop now if there's nothing to initialize. if len(modelnos) == 0: return # Create the bayesdb_generator_model records. modelnos = sorted(modelnos) insert_model_sql = ''' INSERT INTO bayesdb_generator_model (generator_id, modelno, iterations) VALUES (:generator_id, :modelno, :iterations) ''' for modelno in modelnos: bdb.sql_execute( insert_model_sql, { 'generator_id': generator_id, 'modelno': modelno, 'iterations': 0, }) # Do metamodel-specific initialization. metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) metamodel.initialize_models(bdb, generator_id, modelnos) return empty_cursor(bdb) if isinstance(phrase, ast.AnalyzeModels): if not phrase.wait: raise NotImplementedError('No background analysis -- use WAIT.') # WARNING: It is the metamodel's responsibility to work in a # transaction. # # WARNING: It is the metamodel's responsibility to update the # iteration count in bayesdb_generator_model records. # # We do this so that the metamodel can save incremental # progress in case of ^C in the middle. # # XXX Put these warning somewhere more appropriate. if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # XXX Should allow parameters for iterations and ckpt/iter. metamodel.analyze_models(bdb, generator_id, modelnos=phrase.modelnos, iterations=phrase.iterations, max_seconds=phrase.seconds, ckpt_iterations=phrase.ckpt_iterations, ckpt_seconds=phrase.ckpt_seconds, program=phrase.program) return empty_cursor(bdb) if isinstance(phrase, ast.DropModels): with bdb.savepoint(): generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) modelnos = None if phrase.modelnos is not None: lookup_model_sql = ''' SELECT COUNT(*) FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' modelnos = sorted(list(phrase.modelnos)) for modelno in modelnos: cursor = bdb.sql_execute(lookup_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) if cursor_value(cursor) == 0: raise BQLError( bdb, 'No such model' ' in generator %s: %s' % (repr(phrase.generator), repr(modelno))) metamodel.drop_models(bdb, generator_id, modelnos=modelnos) if modelnos is None: drop_models_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_models_sql, (generator_id, )) else: drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' for modelno in modelnos: bdb.sql_execute(drop_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) return empty_cursor(bdb) assert False # XXX
def execute_phrase(bdb, phrase, bindings=()): """Execute the BQL AST phrase `phrase` and return a cursor of results.""" if isinstance(phrase, ast.Parametrized): n_numpar = phrase.n_numpar nampar_map = phrase.nampar_map phrase = phrase.phrase assert 0 < n_numpar else: n_numpar = 0 nampar_map = None # Ignore extraneous bindings. XXX Bad idea? if ast.is_query(phrase): # Compile the query in the transaction in case we need to # execute subqueries to determine column lists. Compiling is # a quick tree descent, so this should be fast. out = compiler.Output(n_numpar, nampar_map, bindings) with bdb.savepoint(): compiler.compile_query(bdb, phrase, out) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.Begin): txn.bayesdb_begin_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Rollback): txn.bayesdb_rollback_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Commit): txn.bayesdb_commit_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabAs): assert ast.is_query(phrase.query) with bdb.savepoint(): if core.bayesdb_has_table(bdb, phrase.name): if phrase.ifnotexists: return empty_cursor(bdb) else: raise BQLError(bdb, 'Name already defined as table: %s' % (repr(phrase.name),)) out = compiler.Output(n_numpar, nampar_map, bindings) qt = sqlite3_quote_name(phrase.name) temp = 'TEMP ' if phrase.temp else '' ifnotexists = 'IF NOT EXISTS ' if phrase.ifnotexists else '' out.write('CREATE %sTABLE %s%s AS ' % (temp, ifnotexists, qt)) compiler.compile_query(bdb, phrase.query, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): bdb.sql_execute(out.getvalue(), out.getbindings()) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabCsv): with bdb.savepoint(): table_exists = core.bayesdb_has_table(bdb, phrase.name) if table_exists: if phrase.ifnotexists: return empty_cursor(bdb) else: raise BQLError(bdb, 'Table already exists: %s' % (repr(phrase.name),)) bayesdb_read_csv_file( bdb, phrase.name, phrase.csv, header=True, create=True) return empty_cursor(bdb) if isinstance(phrase, ast.DropTab): with bdb.savepoint(): sql = 'SELECT COUNT(*) FROM bayesdb_population WHERE tabname = ?' cursor = bdb.sql_execute(sql, (phrase.name,)) if 0 < cursor_value(cursor): raise BQLError(bdb, 'Table still in use by populations: %s' % (repr(phrase.name),)) bdb.sql_execute('DELETE FROM bayesdb_column WHERE tabname = ?', (phrase.name,)) ifexists = 'IF EXISTS ' if phrase.ifexists else '' qt = sqlite3_quote_name(phrase.name) return bdb.sql_execute('DROP TABLE %s%s' % (ifexists, qt)) if isinstance(phrase, ast.AlterTab): with bdb.savepoint(): table = phrase.table if not core.bayesdb_has_table(bdb, table): raise BQLError(bdb, 'No such table: %s' % (repr(table),)) for cmd in phrase.commands: if isinstance(cmd, ast.AlterTabRenameTab): # If the names differ only in case, we have to do # some extra work because SQLite will reject the # table rename. Note that we may even have table # == cmd.name here, but if the stored table name # differs in case from cmd.name, we want to update # it anyway. if casefold(table) == casefold(cmd.name): # Go via a temporary table. temp = table + '_temp' while core.bayesdb_has_table(bdb, temp): temp += '_temp' rename_table(bdb, table, temp) rename_table(bdb, temp, cmd.name) else: # Make sure nothing else has this name and # rename it. if core.bayesdb_has_table(bdb, cmd.name): raise BQLError(bdb, 'Name already defined as table: %s' % (repr(cmd.name),)) rename_table(bdb, table, cmd.name) # If table has implicit population, rename it too. if core.bayesdb_table_has_implicit_population( bdb, cmd.name): populations = \ core.bayesdb_table_populations(bdb, cmd.name) assert len(populations) == 1 population_name = core.bayesdb_population_name( bdb, populations[0]) qt = sqlite3_quote_name(cmd.name) qp = sqlite3_quote_name(population_name) bdb.execute('ALTER POPULATION %s RENAME TO %s' % (qp, qt)) # Remember the new name for subsequent commands. table = cmd.name elif isinstance(cmd, ast.AlterTabRenameCol): # XXX Need to deal with this in the compiler. raise NotImplementedError('Renaming columns' ' not yet implemented.') # Make sure the old name exist and the new name does not. old_folded = casefold(cmd.old) new_folded = casefold(cmd.new) if old_folded != new_folded: if not core.bayesdb_table_has_column(bdb, table, cmd.old): raise BQLError(bdb, 'No such column in table %s' ': %s' % (repr(table), repr(cmd.old))) if core.bayesdb_table_has_column(bdb, table, cmd.new): raise BQLError(bdb, 'Column already exists' ' in table %s: %s' % (repr(table), repr(cmd.new))) # Update bayesdb_column. Everything else refers # to columns by (tabname, colno) pairs rather than # by names. update_column_sql = ''' UPDATE bayesdb_column SET name = :new WHERE tabname = :table AND name = :old ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_column_sql, { 'table': table, 'old': cmd.old, 'new': cmd.new, }) assert bdb._sqlite3.totalchanges() - total_changes == 1 # ...except backends may have the (case-folded) name cached. if old_folded != new_folded: populations_sql = ''' SELECT id FROM bayesdb_population WHERE tabname = ? ''' cursor = bdb.sql_execute(populations_sql, (table,)) generators = [ core.bayesdb_population_generators( bdb, population_id) for (population_id,) in cursor ] for generator_id in set(generators): backend = core.bayesdb_generator_backend(bdb, generator_id) backend.rename_column(bdb, generator_id, old_folded, new_folded) else: assert False, 'Invalid alter table command: %s' % \ (cmd,) return empty_cursor(bdb) if isinstance(phrase, ast.GuessSchema): if not core.bayesdb_has_table(bdb, phrase.table): raise BQLError(bdb, 'No such table : %s' % phrase.table) out = compiler.Output(0, {}, {}) with bdb.savepoint(): qt = sqlite3_quote_name(phrase.table) temptable = bdb.temp_table_name() qtt = sqlite3_quote_name(temptable) cursor = bdb.sql_execute('SELECT * FROM %s' % (qt,)) column_names = [d[0] for d in cursor.description] rows = cursor.fetchall() stattypes = bayesdb_guess_stattypes(column_names, rows) distinct_value_counts = [ len(set([row[i] for row in rows])) for i in range(len(column_names)) ] out.winder(''' CREATE TEMP TABLE %s ( column TEXT, stattype TEXT, num_distinct INTEGER, reason TEXT ) ''' % (qtt,), ()) for cn, st, ct in zip(column_names, stattypes, distinct_value_counts): out.winder(''' INSERT INTO %s VALUES (?, ?, ?, ?) ''' % (qtt), (cn, st[0], ct, st[1])) out.write('SELECT * FROM %s' % (qtt,)) out.unwinder('DROP TABLE %s' % (qtt,), ()) winders, unwinders = out.getwindings() return execute_wound( bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.CreatePop): with bdb.savepoint(): _create_population(bdb, phrase) return empty_cursor(bdb) if isinstance(phrase, ast.DropPop): with bdb.savepoint(): if not core.bayesdb_has_population(bdb, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such population: %r' % (phrase.name,)) population_id = core.bayesdb_get_population(bdb, phrase.name) generator_ids = core.bayesdb_population_generators( bdb, population_id) if generator_ids: generators = [core.bayesdb_generator_name(bdb, gid) for gid in generator_ids] raise BQLError(bdb, 'Population %r still has generators: %r' % (phrase.name, generators)) # XXX helpful error checking if generators still exist # XXX check change counts bdb.sql_execute(''' DELETE FROM bayesdb_variable WHERE population_id = ? ''', (population_id,)) bdb.sql_execute(''' DELETE FROM bayesdb_population WHERE id = ? ''', (population_id,)) return empty_cursor(bdb) if isinstance(phrase, ast.AlterPop): with bdb.savepoint(): population = phrase.population if not core.bayesdb_has_population(bdb, population): raise BQLError(bdb, 'No such population: %s' % (repr(population),)) population_id = core.bayesdb_get_population(bdb, population) for cmd in phrase.commands: if isinstance(cmd, ast.AlterPopRenamePop): table = core.bayesdb_population_table(bdb, population_id) # Prevent renaming of implicit population directly, unless # being called by ast.AlterTabRenameTab in which case the # table name and population name will not be matching. if core.bayesdb_population_is_implicit(bdb, population_id) \ and casefold(population) == casefold(table): raise BQLError(bdb, 'Cannot rename implicit' 'population %s; rename base table instead' % (population,)) # Make sure nothing else has this name. if casefold(population) != casefold(cmd.name): if core.bayesdb_has_population(bdb, cmd.name): raise BQLError(bdb, 'Name already defined as population' ': %s' % (repr(cmd.name),)) # Update bayesdb_population. Everything else # refers to it by id. update_generator_sql = ''' UPDATE bayesdb_population SET name = ? WHERE id = ? ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_generator_sql, (cmd.name, population_id)) assert bdb._sqlite3.totalchanges() - total_changes == 1 # If population has implicit generator, rename it too. if core.bayesdb_population_has_implicit_generator( bdb, population_id): generators = core.bayesdb_population_generators( bdb, population_id) assert len(generators) == 1 generator_name = core.bayesdb_generator_name( bdb, generators[0]) qp = sqlite3_quote_name(cmd.name) qg = sqlite3_quote_name(generator_name) bdb.execute('ALTER GENERATOR %s RENAME TO %s' % (qg, qp,)) # Remember the new name for subsequent commands. population = cmd.name elif isinstance(cmd, ast.AlterPopAddVar): # Ensure column exists in base table. table = core.bayesdb_population_table(bdb, population_id) if not core.bayesdb_table_has_column( bdb, table, cmd.name): raise BQLError(bdb, 'No such variable in base table: %s' % (cmd.name)) # Ensure variable not already in population. if core.bayesdb_has_variable( bdb, population_id, None, cmd.name): raise BQLError(bdb, 'Variable already in population: %s' % (cmd.name)) # Ensure there is at least observation in the column. qt = sqlite3_quote_name(table) qc = sqlite3_quote_name(cmd.name) cursor = bdb.sql_execute( 'SELECT COUNT(*) FROM %s WHERE %s IS NOT NULL' % (qt, qc)) if cursor_value(cursor) == 0: raise BQLError(bdb, 'Cannot add variable without any values: %s' % (cmd.name)) # If stattype is None, guess. if cmd.stattype is None: cursor = bdb.sql_execute( 'SELECT %s FROM %s' % (qc, qt)) rows = cursor.fetchall() [stattype, reason] = bayesdb_guess_stattypes( [cmd.name], rows)[0] # Fail if trying to model a key. if stattype == 'key': raise BQLError(bdb, 'Values in column %s appear to be keys.' % (cmd.name,)) # Fail if cannot determine a stattype. elif stattype == 'ignore': raise BQLError(bdb, 'Failed to determine a stattype for %s, ' 'please specify one manually.' % (cmd.name,)) # If user specified stattype, ensure it exists. elif not core.bayesdb_has_stattype(bdb, cmd.stattype): raise BQLError(bdb, 'Invalid stattype: %s' % (cmd.stattype)) else: stattype = cmd.stattype # Check that strings are not being modeled as numerical. if stattype == 'numerical' \ and _column_contains_string(bdb, table, cmd.name): raise BQLError(bdb, 'Numerical column contains string values: %r ' % (qc,)) with bdb.savepoint(): # Add the variable to the population. core.bayesdb_add_variable( bdb, population_id, cmd.name, stattype) colno = core.bayesdb_variable_number( bdb, population_id, None, cmd.name) # Add the variable to each (initialized) generator in # the population. generator_ids = filter( lambda g: core.bayesdb_generator_modelnos(bdb, g), core.bayesdb_population_generators( bdb, population_id), ) for generator_id in generator_ids: backend = core.bayesdb_generator_backend( bdb, generator_id) backend.add_column(bdb, generator_id, colno) elif isinstance(cmd, ast.AlterPopStatType): # Check the no generators are defined for this population. generators = core.bayesdb_population_generators( bdb, population_id) if generators: raise BQLError(bdb, 'Cannot update statistical types for population ' '%s, it has generators: %s' % (repr(population), repr(generators),)) # Check all the variables are in the population. unknown = [ c for c in cmd.names if not core.bayesdb_has_variable(bdb, population_id, None, c) ] if unknown: raise BQLError(bdb, 'No such variables in population: %s' % (repr(unknown))) # Check the statistical type is valid. if not core.bayesdb_has_stattype(bdb, cmd.stattype): raise BQLError(bdb, 'Invalid statistical type: %r' % (repr(cmd.stattype),)) # Check that strings are not being modeled as numerical. if cmd.stattype == 'numerical': table = core.bayesdb_population_table( bdb, population_id) numerical_string_vars = [ col for col in cmd.names if _column_contains_string(bdb, table, col) ] if numerical_string_vars: raise BQLError(bdb, 'Columns with string values modeled as ' 'numerical: %r' % (numerical_string_vars,)) # Perform the stattype update. colnos = [ core.bayesdb_variable_number( bdb, population_id, None, c) for c in cmd.names ] qcolnos = ','.join('%d' % (colno,) for colno in colnos) update_stattype_sql = ''' UPDATE bayesdb_variable SET stattype = ? WHERE population_id = ? AND colno IN (%s) ''' % (qcolnos,) bdb.sql_execute( update_stattype_sql, (casefold(cmd.stattype), population_id,)) else: assert False, 'Invalid ALTER POPULATION command: %s' % \ (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.CreateGen): # Find the population. if not core.bayesdb_has_population(bdb, phrase.population): raise BQLError(bdb, 'No such population: %r' % (phrase.population,)) population_id = core.bayesdb_get_population(bdb, phrase.population) # Find the backend, or use the default. backend_name = phrase.backend if phrase.backend is None: backend_name = 'cgpm' if backend_name not in bdb.backends: raise BQLError(bdb, 'No such backend: %s' % (repr(backend_name),)) backend = bdb.backends[backend_name] # Retrieve the (possibility implicit) generator name. generator_name = phrase.name or phrase.population implicit = 1 if phrase.name is None else 0 with bdb.savepoint(): if core.bayesdb_has_generator(bdb, population_id, generator_name): if not phrase.ifnotexists: raise BQLError( bdb, 'Name already defined as generator: %s' % (repr(generator_name),)) else: # Insert a record into bayesdb_generator and get the # assigned id. bdb.sql_execute(''' INSERT INTO bayesdb_generator (name, population_id, backend, implicit) VALUES (?, ?, ?, ?) ''', (generator_name, population_id, backend.name(), implicit)) generator_id = core.bayesdb_get_generator( bdb, population_id, generator_name) # Do any backend-specific initialization. backend.create_generator(bdb, generator_id, phrase.schema) # All done. Nothing to return. return empty_cursor(bdb) if isinstance(phrase, ast.DropGen): with bdb.savepoint(): if not core.bayesdb_has_generator(bdb, None, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such generator: %s' % (repr(phrase.name),)) generator_id = core.bayesdb_get_generator(bdb, None, phrase.name) backend = core.bayesdb_generator_backend(bdb, generator_id) # Backend-specific destruction. backend.drop_generator(bdb, generator_id) # Drop latent variables, models, and, finally, generator. drop_columns_sql = ''' DELETE FROM bayesdb_variable WHERE generator_id = ? ''' bdb.sql_execute(drop_columns_sql, (generator_id,)) drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_model_sql, (generator_id,)) drop_generator_sql = ''' DELETE FROM bayesdb_generator WHERE id = ? ''' bdb.sql_execute(drop_generator_sql, (generator_id,)) return empty_cursor(bdb) if isinstance(phrase, ast.AlterGen): with bdb.savepoint(): generator = phrase.generator if not core.bayesdb_has_generator(bdb, None, generator): raise BQLError(bdb, 'No such generator: %s' % (repr(generator),)) generator_id = core.bayesdb_get_generator(bdb, None, generator) cmds_generic = [] for cmd in phrase.commands: if isinstance(cmd, ast.AlterGenRenameGen): population_id = core.bayesdb_generator_population( bdb, generator_id) population = core.bayesdb_population_name( bdb, population_id) # Prevent renaming of implicit generator directly, unless # being called by ast.AlterPopRenamePop in which case the # population name and generator name will not be matching. if core.bayesdb_population_is_implicit(bdb, generator_id) \ and casefold(generator) == casefold(population): raise BQLError(bdb, 'Cannot rename implicit ' 'generator; rename base population instead') # Disable modelnos with AlterGenRenameGen. if phrase.modelnos is not None: raise BQLError(bdb, 'Cannot specify models for RENAME') # Make sure nothing else has this name. if casefold(generator) != casefold(cmd.name): if core.bayesdb_has_generator(bdb, None, cmd.name): raise BQLError(bdb, 'Name already defined' ' as generator: %s' % (repr(cmd.name),)) # Update bayesdb_generator. Everything else # refers to it by id. update_generator_sql = ''' UPDATE bayesdb_generator SET name = ? WHERE id = ? ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_generator_sql, (cmd.name, generator_id)) assert bdb._sqlite3.totalchanges() - total_changes == 1 # Remember the new name for subsequent commands. generator = cmd.name elif isinstance(cmd, ast.AlterGenGeneric): cmds_generic.append(cmd.command) else: assert False, 'Invalid ALTER GENERATOR command: %s' % \ (repr(cmd),) if cmds_generic: modelnos = phrase.modelnos modelnos_invalid = None if modelnos is None else [ modelno for modelno in modelnos if not core.bayesdb_generator_has_model(bdb, generator_id, modelno) ] if modelnos_invalid: raise BQLError(bdb, 'No such models in generator %s: %s' % (repr(phrase.generator), repr(modelnos))) # Call generic alternations on the backend. backend = core.bayesdb_generator_backend(bdb, generator_id) backend.alter(bdb, generator_id, modelnos, cmds_generic) return empty_cursor(bdb) if isinstance(phrase, ast.InitModels): if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator,)) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) modelnos = range(phrase.nmodels) with bdb.savepoint(): # Find the model numbers. Omit existing ones for # ifnotexists; reject existing ones otherwise. if phrase.ifnotexists: modelnos = set(modelno for modelno in modelnos if not core.bayesdb_generator_has_model(bdb, generator_id, modelno)) else: existing = set(modelno for modelno in modelnos if core.bayesdb_generator_has_model(bdb, generator_id, modelno)) if 0 < len(existing): raise BQLError(bdb, 'Generator %s already has models: %s' % (repr(phrase.generator), sorted(existing))) # Stop now if there's nothing to initialize. if len(modelnos) == 0: return # Create the bayesdb_generator_model records. modelnos = sorted(modelnos) insert_model_sql = ''' INSERT INTO bayesdb_generator_model (generator_id, modelno) VALUES (:generator_id, :modelno) ''' for modelno in modelnos: bdb.sql_execute(insert_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) # Do backend-specific initialization. backend = core.bayesdb_generator_backend(bdb, generator_id) backend.initialize_models(bdb, generator_id, modelnos) return empty_cursor(bdb) if isinstance(phrase, ast.AnalyzeModels): # WARNING: It is the backend's responsibility to work in a # transaction. # # WARNING: It is the backend's responsibility to update the # iteration count in bayesdb_generator_model records. # # We do this so that the backend can save incremental # progress in case of ^C in the middle. # # XXX Put these warning somewhere more appropriate. if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator,)) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) backend = core.bayesdb_generator_backend(bdb, generator_id) # XXX Should allow parameters for iterations and ckpt/iter. backend.analyze_models(bdb, generator_id, modelnos=phrase.modelnos, iterations=phrase.iterations, max_seconds=phrase.seconds, ckpt_iterations=phrase.ckpt_iterations, ckpt_seconds=phrase.ckpt_seconds, program=phrase.program) return empty_cursor(bdb) if isinstance(phrase, ast.DropModels): with bdb.savepoint(): generator_id = core.bayesdb_get_generator( bdb, None, phrase.generator) backend = core.bayesdb_generator_backend(bdb, generator_id) modelnos = None if phrase.modelnos is not None: lookup_model_sql = ''' SELECT COUNT(*) FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' modelnos = sorted(list(phrase.modelnos)) for modelno in modelnos: cursor = bdb.sql_execute(lookup_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) if cursor_value(cursor) == 0: raise BQLError(bdb, 'No such model' ' in generator %s: %s' % (repr(phrase.generator), repr(modelno))) backend.drop_models(bdb, generator_id, modelnos=modelnos) if modelnos is None: drop_models_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_models_sql, (generator_id,)) else: drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' for modelno in modelnos: bdb.sql_execute(drop_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) return empty_cursor(bdb) if isinstance(phrase, ast.Regress): # Retrieve the population. if not core.bayesdb_has_population(bdb, phrase.population): raise BQLError(bdb, 'No such population: %r' % (phrase.population,)) population_id = core.bayesdb_get_population(bdb, phrase.population) # Retrieve the generator generator_id = None if phrase.generator: if not core.bayesdb_has_generator(bdb, population_id, phrase.generator): raise BQLError(bdb, 'No such generator: %r' % (phrase.generator,)) generator_id = core.bayesdb_get_generator( bdb, population_id, phrase.generator) # Retrieve the target variable. if not core.bayesdb_has_variable( bdb, population_id, None, phrase.target): raise BQLError(bdb, 'No such variable: %r' % (phrase.target,)) colno_target = core.bayesdb_variable_number( bdb, population_id, None, phrase.target) stattype = core.bayesdb_variable_stattype(bdb, population_id, generator_id, colno_target) if stattype != 'numerical': raise BQLError(bdb, 'Target variable is not numerical: %r' % (phrase.target,)) # Build the given variables. if any(isinstance(col, ast.SelColAll) for col in phrase.givens): # Using * is not allowed to be mixed with other variables. if len(phrase.givens) > 1: raise BQLError(bdb, 'Cannot use (*) with other givens.') colno_givens = core.bayesdb_variable_numbers( bdb, population_id, None) else: if any(isinstance(col, ast.SelColSub) for col in phrase.givens): # Subexpression needs special compiling. out = compiler.Output(n_numpar, nampar_map, bindings) bql_compiler = compiler.BQLCompiler_None() givens = compiler.expand_select_columns( bdb, phrase.givens, True, bql_compiler, out) else: givens = phrase.givens colno_givens = [ core.bayesdb_variable_number( bdb, population_id, None, given.expression.column) for given in givens ] # Build the arguments to bqlfn.bayesdb_simulate. colno_givens_unique = set( colno for colno in colno_givens if colno!= colno_target ) if len(colno_givens_unique) == 0: raise BQLError(bdb, 'No matching given columns.') constraints = [] colnos = [colno_target] + list(colno_givens_unique) nsamp = 100 if phrase.nsamp is None else phrase.nsamp.value.value modelnos = None if phrase.modelnos is None else str(phrase.modelnos) rows = bqlfn.bayesdb_simulate( bdb, population_id, generator_id, modelnos, constraints, colnos, numpredictions=nsamp) # Retrieve the stattypes. stattypes = [ core.bayesdb_variable_stattype( bdb, population_id, generator_id, colno_given) for colno_given in colno_givens_unique ] # Separate the target values from the given values. target_values = [row[0] for row in rows] given_values = [row[1:] for row in rows] given_names = [ core.bayesdb_variable_name(bdb, population_id, generator_id, given) for given in colno_givens_unique ] # Compute the coefficients. The import to regress_ols is here since the # feature depends on pandas + sklearn, so avoid module-wide import. from bayeslite.regress import regress_ols coefficients = regress_ols( target_values, given_values, given_names, stattypes) # Store the results in a winder. temptable = bdb.temp_table_name() qtt = sqlite3_quote_name(temptable) out = compiler.Output(0, {}, {}) out.winder(''' CREATE TEMP TABLE %s (variable TEXT, coefficient REAL); ''' % (qtt,), ()) for variable, coef in coefficients: out.winder(''' INSERT INTO %s VALUES (?, ?) ''' % (qtt), (variable, coef,)) out.write('SELECT * FROM %s ORDER BY variable' % (qtt,)) out.unwinder('DROP TABLE %s' % (qtt,), ()) winders, unwinders = out.getwindings() return execute_wound( bdb, winders, unwinders, out.getvalue(), out.getbindings()) assert False # XXX
def _test_example(bdb, exname): mm, t, t_sql, data_sql, data, g, g_bql, g_bqlbad0, g_bqlbad1 = \ examples[exname] qt = bql_quote_name(t) qg = bql_quote_name(g) bayeslite.bayesdb_register_metamodel(bdb, mm()) # Create a table. assert not core.bayesdb_has_table(bdb, t) with bdb.savepoint_rollback(): bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) assert not core.bayesdb_has_table(bdb, t) bdb.sql_execute(t_sql) assert core.bayesdb_has_table(bdb, t) # Insert data into the table. assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt,)).fetchvalue() == 0 for row in data: bdb.sql_execute(data_sql, row) n = len(data) assert bdb.execute('SELECT COUNT(*) FROM %s' % (qt,)).fetchvalue() == n # Create a generator. Make sure savepoints work for this. assert not core.bayesdb_has_generator(bdb, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad0) assert not core.bayesdb_has_generator(bdb, g) with pytest.raises(Exception): with bdb.savepoint(): bdb.execute(g_bqlbad1) assert not core.bayesdb_has_generator(bdb, g) with bdb.savepoint_rollback(): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) assert not core.bayesdb_has_generator(bdb, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) with pytest.raises(Exception): bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) gid = core.bayesdb_get_generator(bdb, g) assert not core.bayesdb_generator_has_model(bdb, gid, 0) assert [] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 1 MODEL FOR %s' % (qg,)) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) with bdb.savepoint_rollback(): bdb.execute('INITIALIZE 10 MODELS FOR %s' % (qg,)) for i in range(10): assert core.bayesdb_generator_has_model(bdb, gid, i) assert range(10) == core.bayesdb_generator_modelnos(bdb, gid) bdb.execute('INITIALIZE 2 MODELS FOR %s' % (qg,)) # Test dropping things. with pytest.raises(bayeslite.BQLError): bdb.execute('DROP TABLE %s' % (qt,)) with bdb.savepoint_rollback(): # Note that sql_execute does not protect us! bdb.sql_execute('DROP TABLE %s' % (qt,)) assert not core.bayesdb_has_table(bdb, t) assert core.bayesdb_has_table(bdb, t) # XXX Should we reject dropping a generator when there remain # models? Should we not reject dropping a table when there remain # generators? A table can be dropped when there remain indices. # # with pytest.raises(bayeslite.BQLError): # # Models remain. # bdb.execute('DROP GENERATOR %s' % (qg,)) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg,)) assert not core.bayesdb_has_generator(bdb, g) assert core.bayesdb_has_generator(bdb, g) with bdb.savepoint_rollback(): bdb.execute('DROP GENERATOR %s' % (qg,)) assert not core.bayesdb_has_generator(bdb, g) bdb.execute(g_bql) assert core.bayesdb_has_generator(bdb, g) assert core.bayesdb_has_generator(bdb, g) assert gid == core.bayesdb_get_generator(bdb, g) # Test dropping models. with bdb.savepoint_rollback(): bdb.execute('DROP MODEL 1 FROM %s' % (qg,)) assert core.bayesdb_generator_has_model(bdb, gid, 0) assert not core.bayesdb_generator_has_model(bdb, gid, 1) assert [0] == core.bayesdb_generator_modelnos(bdb, gid) # Test analyzing models. bdb.execute('ANALYZE %s FOR 1 ITERATION WAIT' % (qg,)) bdb.execute('ANALYZE %s MODEL 0 FOR 1 ITERATION WAIT' % (qg,)) bdb.execute('ANALYZE %s MODEL 1 FOR 1 ITERATION WAIT' % (qg,))
def dot_describe(self, line): '''describe BayesDB entities [table(s)|generator(s)|columns|model(s)] [<name>...] Print a human-readable description of the specified BayesDB entities. ''' # XXX Lousy, lousy tokenizer. tokens = line.split() if len(tokens) == 0: self.stdout.write('Usage: .describe table(s) [<table>...]\n') self.stdout.write(' .describe generator(s) [<gen>...]\n') self.stdout.write(' .describe columns <gen>\n') self.stdout.write(' .describe model(s) <gen> [<model>...]\n') return if casefold(tokens[0]) == 'table' or \ casefold(tokens[0]) == 'tables': params = None qualifier = None if len(tokens) == 1: params = () qualifier = '1' else: params = tokens[1:] qualifier = \ '(' + ' OR '.join(['tabname = ?' for _p in params]) + ')' ok = True for table in params: if not core.bayesdb_has_table(self._bdb, table): self.stdout.write('No such table: %s\n' % (repr(table),)) ok = False if not ok: return for table in params: core.bayesdb_table_guarantee_columns(self._bdb, table) sql = ''' SELECT tabname, colno, name, shortname FROM bayesdb_column WHERE %s ORDER BY tabname ASC, colno ASC ''' % (qualifier,) with self._bdb.savepoint(): pretty.pp_cursor(self.stdout, self._bdb.execute(sql, params)) elif casefold(tokens[0]) == 'generator' or \ casefold(tokens[0]) == 'generators': params = None qualifier = None if len(tokens) == 1: params = () qualifier = '1' else: params = tokens[1:] names = ','.join('?%d' % (i + 1,) for i in range(len(params))) qualifier = ''' (name IN ({names}) OR (defaultp AND tabname IN ({names}))) '''.format(names=names) ok = True for generator in params: if not core.bayesdb_has_generator_default(self._bdb, generator): self.stdout.write('No such generator: %s\n' % (repr(generator),)) ok = False if not ok: return sql = ''' SELECT id, name, tabname, metamodel FROM bayesdb_generator WHERE %s ''' % (qualifier,) with self._bdb.savepoint(): pretty.pp_cursor(self.stdout, self._bdb.sql_execute(sql, params)) elif casefold(tokens[0]) == 'columns': if len(tokens) != 2: self.stdout.write('Describe columns of what generator?\n') return generator = tokens[1] with self._bdb.savepoint(): if not core.bayesdb_has_generator_default(self._bdb, generator): self.stdout.write('No such generator: %s\n' % (repr(generator),)) return generator_id = core.bayesdb_get_generator_default(self._bdb, generator) sql = ''' SELECT c.colno AS colno, c.name AS name, gc.stattype AS stattype, c.shortname AS shortname FROM bayesdb_generator AS g, (bayesdb_column AS c LEFT OUTER JOIN bayesdb_generator_column AS gc USING (colno)) WHERE g.id = ? AND g.id = gc.generator_id AND g.tabname = c.tabname ORDER BY colno ASC; ''' cursor = self._bdb.sql_execute(sql, (generator_id,)) pretty.pp_cursor(self.stdout, cursor) elif casefold(tokens[0]) == 'model' or \ casefold(tokens[0]) == 'models': if len(tokens) < 2: self.stdout.write('Describe models of what generator?\n') return generator = tokens[1] with self._bdb.savepoint(): if not core.bayesdb_has_generator_default(self._bdb, generator): self.stdout.write('No such generator: %s\n' % (repr(generator),)) return generator_id = core.bayesdb_get_generator_default(self._bdb, generator) qualifier = None if len(tokens) == 2: qualifier = '1' else: modelnos = [] for token in tokens[2:]: try: modelno = int(token) except ValueError: self.stdout.write('Invalid model number: %s\n' % (repr(token),)) return else: if not core.bayesdb_generator_has_model( self._bdb, generator_id, modelno): self.stdout.write('No such model: %d\n' % (modelno,)) return modelnos.append(modelno) qualifier = 'modelno IN (%s)' % \ (','.join(map(str, modelnos),)) sql = ''' SELECT modelno, iterations FROM bayesdb_generator_model WHERE generator_id = ? AND %s ''' % (qualifier,) cursor = self._bdb.sql_execute(sql, (generator_id,)) pretty.pp_cursor(self.stdout, cursor) else: self.stdout.write('Usage: .describe table(s) [<table>...]\n') self.stdout.write(' .describe generator(s) [<gen>...]\n') self.stdout.write(' .describe columns <gen>\n') self.stdout.write(' .describe model(s) <gen> [<model>...]\n')
def execute_phrase(bdb, phrase, bindings=()): """Execute the BQL AST phrase `phrase` and return a cursor of results.""" if isinstance(phrase, ast.Parametrized): n_numpar = phrase.n_numpar nampar_map = phrase.nampar_map phrase = phrase.phrase assert 0 < n_numpar else: n_numpar = 0 nampar_map = None # Ignore extraneous bindings. XXX Bad idea? if ast.is_query(phrase): # Compile the query in the transaction in case we need to # execute subqueries to determine column lists. Compiling is # a quick tree descent, so this should be fast. out = compiler.Output(n_numpar, nampar_map, bindings) with bdb.savepoint(): compiler.compile_query(bdb, phrase, out) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.Begin): txn.bayesdb_begin_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Rollback): txn.bayesdb_rollback_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.Commit): txn.bayesdb_commit_transaction(bdb) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabAs): assert ast.is_query(phrase.query) with bdb.savepoint(): if core.bayesdb_has_table(bdb, phrase.name): if phrase.ifnotexists: return empty_cursor(bdb) else: raise BQLError( bdb, 'Name already defined as table: %s' % (repr(phrase.name), )) out = compiler.Output(n_numpar, nampar_map, bindings) qt = sqlite3_quote_name(phrase.name) temp = 'TEMP ' if phrase.temp else '' ifnotexists = 'IF NOT EXISTS ' if phrase.ifnotexists else '' out.write('CREATE %sTABLE %s%s AS ' % (temp, ifnotexists, qt)) compiler.compile_query(bdb, phrase.query, out) winders, unwinders = out.getwindings() with compiler.bayesdb_wind(bdb, winders, unwinders): bdb.sql_execute(out.getvalue(), out.getbindings()) return empty_cursor(bdb) if isinstance(phrase, ast.CreateTabCsv): with bdb.savepoint(): table_exists = core.bayesdb_has_table(bdb, phrase.name) if table_exists: if phrase.ifnotexists: return empty_cursor(bdb) else: raise BQLError( bdb, 'Table already exists: %s' % (repr(phrase.name), )) bayesdb_read_csv_file(bdb, phrase.name, phrase.csv, header=True, create=True) return empty_cursor(bdb) if isinstance(phrase, ast.DropTab): with bdb.savepoint(): sql = 'SELECT COUNT(*) FROM bayesdb_population WHERE tabname = ?' cursor = bdb.sql_execute(sql, (phrase.name, )) if 0 < cursor_value(cursor): raise BQLError( bdb, 'Table still in use by populations: %s' % (repr(phrase.name), )) bdb.sql_execute('DELETE FROM bayesdb_column WHERE tabname = ?', (phrase.name, )) ifexists = 'IF EXISTS ' if phrase.ifexists else '' qt = sqlite3_quote_name(phrase.name) return bdb.sql_execute('DROP TABLE %s%s' % (ifexists, qt)) if isinstance(phrase, ast.AlterTab): with bdb.savepoint(): table = phrase.table if not core.bayesdb_has_table(bdb, table): raise BQLError(bdb, 'No such table: %s' % (repr(table), )) for cmd in phrase.commands: if isinstance(cmd, ast.AlterTabRenameTab): # If the names differ only in case, we have to do # some extra work because SQLite will reject the # table rename. Note that we may even have table # == cmd.name here, but if the stored table name # differs in case from cmd.name, we want to update # it anyway. if casefold(table) == casefold(cmd.name): # Go via a temporary table. temp = table + '_temp' while core.bayesdb_has_table(bdb, temp): temp += '_temp' rename_table(bdb, table, temp) rename_table(bdb, temp, cmd.name) else: # Make sure nothing else has this name and # rename it. if core.bayesdb_has_table(bdb, cmd.name): raise BQLError( bdb, 'Name already defined as table' ': %s' % (repr(cmd.name), )) rename_table(bdb, table, cmd.name) # Remember the new name for subsequent commands. table = cmd.name elif isinstance(cmd, ast.AlterTabRenameCol): # XXX Need to deal with this in the compiler. raise NotImplementedError('Renaming columns' ' not yet implemented.') # Make sure the old name exist and the new name does not. old_folded = casefold(cmd.old) new_folded = casefold(cmd.new) if old_folded != new_folded: if not core.bayesdb_table_has_column( bdb, table, cmd.old): raise BQLError( bdb, 'No such column in table %s' ': %s' % (repr(table), repr(cmd.old))) if core.bayesdb_table_has_column(bdb, table, cmd.new): raise BQLError( bdb, 'Column already exists' ' in table %s: %s' % (repr(table), repr(cmd.new))) # Update bayesdb_column. Everything else refers # to columns by (tabname, colno) pairs rather than # by names. update_column_sql = ''' UPDATE bayesdb_column SET name = :new WHERE tabname = :table AND name = :old ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_column_sql, { 'table': table, 'old': cmd.old, 'new': cmd.new, }) assert bdb._sqlite3.totalchanges() - total_changes == 1 # ...except metamodels may have the (case-folded) # name cached. if old_folded != new_folded: generators_sql = ''' SELECT id FROM bayesdb_generator WHERE tabname = ? ''' cursor = bdb.sql_execute(generators_sql, (table, )) for (generator_id, ) in cursor: metamodel = core.bayesdb_generator_metamodel( bdb, generator_id) metamodel.rename_column(bdb, generator_id, old_folded, new_folded) else: assert False, 'Invalid alter table command: %s' % \ (cmd,) return empty_cursor(bdb) if isinstance(phrase, ast.GuessSchema): if not core.bayesdb_has_table(bdb, phrase.table): raise BQLError(bdb, 'No such table : %s' % phrase.table) out = compiler.Output(0, {}, {}) with bdb.savepoint(): qt = sqlite3_quote_name(phrase.table) temptable = bdb.temp_table_name() qtt = sqlite3_quote_name(temptable) cursor = bdb.sql_execute('SELECT * FROM %s' % (qt, )) column_names = [d[0] for d in cursor.description] rows = cursor.fetchall() stattypes = bayesdb_guess_stattypes(column_names, rows) distinct_value_counts = [ len(set([row[i] for row in rows])) for i in range(len(column_names)) ] out.winder( ''' CREATE TEMP TABLE %s (column TEXT, stattype TEXT, num_distinct INTEGER, reason TEXT) ''' % (qtt), ()) for cn, st, ct in zip(column_names, stattypes, distinct_value_counts): out.winder( ''' INSERT INTO %s VALUES (?, ?, ?, ?) ''' % (qtt), (cn, st[0], ct, st[1])) out.write('SELECT * FROM %s' % (qtt, )) out.unwinder('DROP TABLE %s' % (qtt, ), ()) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) if isinstance(phrase, ast.CreatePop): with bdb.savepoint(): _create_population(bdb, phrase) return empty_cursor(bdb) if isinstance(phrase, ast.DropPop): with bdb.savepoint(): if not core.bayesdb_has_population(bdb, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such population: %r' % (phrase.name, )) population_id = core.bayesdb_get_population(bdb, phrase.name) generator_ids = core.bayesdb_population_generators( bdb, population_id) if generator_ids: generators = [ core.bayesdb_generator_name(bdb, gid) for gid in generator_ids ] raise BQLError( bdb, 'Population %r still has metamodels: %r' % (phrase.name, generators)) # XXX helpful error checking if generators still exist # XXX check change counts bdb.sql_execute( ''' DELETE FROM bayesdb_variable WHERE population_id = ? ''', (population_id, )) bdb.sql_execute( ''' DELETE FROM bayesdb_population WHERE id = ? ''', (population_id, )) return empty_cursor(bdb) if isinstance(phrase, ast.AlterPop): with bdb.savepoint(): population = phrase.population if not core.bayesdb_has_population(bdb, population): raise BQLError(bdb, 'No such population: %s' % (repr(population), )) population_id = core.bayesdb_get_population(bdb, population) for cmd in phrase.commands: if isinstance(cmd, ast.AlterPopAddVar): # Ensure column exists in base table. table = core.bayesdb_population_table(bdb, population_id) if not core.bayesdb_table_has_column(bdb, table, cmd.name): raise BQLError( bdb, 'No such variable in base table: %s' % (cmd.name)) # Ensure variable not already in population. if core.bayesdb_has_variable(bdb, population_id, None, cmd.name): raise BQLError( bdb, 'Variable already in population: %s' % (cmd.name)) # Ensure there is at least observation in the column. qt = sqlite3_quote_name(table) qc = sqlite3_quote_name(cmd.name) cursor = bdb.sql_execute( 'SELECT COUNT(*) FROM %s WHERE %s IS NOT NULL' % (qt, qc)) if cursor_value(cursor) == 0: raise BQLError( bdb, 'Cannot add variable without any values: %s' % (cmd.name)) # If stattype is None, guess. if cmd.stattype is None: cursor = bdb.sql_execute('SELECT %s FROM %s' % (qc, qt)) rows = cursor.fetchall() [stattype, reason] = bayesdb_guess_stattypes([cmd.name], rows)[0] # Fail if trying to model a key. if stattype == 'key': raise BQLError( bdb, 'Values in column %s appear to be keys.' % (cmd.name, )) # Fail if cannot determine a stattype. elif stattype == 'ignore': raise BQLError( bdb, 'Failed to determine a stattype for %s, ' 'please specify one manually.' % (cmd.name, )) # If user specified stattype, ensure it exists. elif not core.bayesdb_has_stattype(bdb, cmd.stattype): raise BQLError(bdb, 'Invalid stattype: %s' % (cmd.stattype)) else: stattype = cmd.stattype # Check that strings are not being modeled as numerical. if stattype == 'numerical' \ and _column_contains_string(bdb, table, cmd.name): raise BQLError( bdb, 'Numerical column contains string values: %r ' % (qc, )) with bdb.savepoint(): # Add the variable to the population. core.bayesdb_add_variable(bdb, population_id, cmd.name, stattype) colno = core.bayesdb_variable_number( bdb, population_id, None, cmd.name) # Add the variable to each (initialized) metamodel in # the population. generator_ids = filter( lambda g: core.bayesdb_generator_modelnos(bdb, g), core.bayesdb_population_generators( bdb, population_id), ) for generator_id in generator_ids: # XXX Omit needless bayesdb_generator_column table # Github issue #441. bdb.sql_execute( ''' INSERT INTO bayesdb_generator_column VALUES (:generator_id, :colno, :stattype) ''', { 'generator_id': generator_id, 'colno': colno, 'stattype': stattype, }) metamodel = core.bayesdb_generator_metamodel( bdb, generator_id) metamodel.add_column(bdb, generator_id, colno) elif isinstance(cmd, ast.AlterPopStatType): # Check the no metamodels are defined for this population. generators = core.bayesdb_population_generators( bdb, population_id) if generators: raise BQLError( bdb, 'Cannot update statistical types for population ' '%s, it has metamodels: %s' % ( repr(population), repr(generators), )) # Check all the variables are in the population. unknown = [ c for c in cmd.names if not core.bayesdb_has_variable( bdb, population_id, None, c) ] if unknown: raise BQLError( bdb, 'No such variables in population: %s' % (repr(unknown))) # Check the statistical type is valid. if not core.bayesdb_has_stattype(bdb, cmd.stattype): raise BQLError( bdb, 'Invalid statistical type: %r' % (repr(cmd.stattype), )) # Check that strings are not being modeled as numerical. if cmd.stattype == 'numerical': table = core.bayesdb_population_table( bdb, population_id) numerical_string_vars = [ col for col in cmd.names if _column_contains_string(bdb, table, col) ] if numerical_string_vars: raise BQLError( bdb, 'Columns with string values modeled as ' 'numerical: %r' % (numerical_string_vars, )) # Perform the stattype update. colnos = [ core.bayesdb_variable_number(bdb, population_id, None, c) for c in cmd.names ] qcolnos = ','.join('%d' % (colno, ) for colno in colnos) update_stattype_sql = ''' UPDATE bayesdb_variable SET stattype = ? WHERE population_id = ? AND colno IN (%s) ''' % (qcolnos, ) bdb.sql_execute(update_stattype_sql, ( casefold(cmd.stattype), population_id, )) else: assert False, 'Invalid ALTER POPULATION command: %s' % \ (repr(cmd),) return empty_cursor(bdb) if isinstance(phrase, ast.CreateGen): # Find the population. if not core.bayesdb_has_population(bdb, phrase.population): raise BQLError(bdb, 'No such population: %r' % (phrase.population, )) population_id = core.bayesdb_get_population(bdb, phrase.population) table = core.bayesdb_population_table(bdb, population_id) # Find the metamodel, or use the default. metamodel_name = phrase.metamodel if phrase.metamodel is None: metamodel_name = 'cgpm' if metamodel_name not in bdb.metamodels: raise BQLError(bdb, 'No such metamodel: %s' % (repr(metamodel_name), )) metamodel = bdb.metamodels[metamodel_name] with bdb.savepoint(): if core.bayesdb_has_generator(bdb, population_id, phrase.name): if not phrase.ifnotexists: raise BQLError( bdb, 'Name already defined as generator: %s' % (repr(phrase.name), )) else: # Insert a record into bayesdb_generator and get the # assigned id. bdb.sql_execute( ''' INSERT INTO bayesdb_generator (name, tabname, population_id, metamodel) VALUES (?, ?, ?, ?) ''', (phrase.name, table, population_id, metamodel.name())) generator_id = core.bayesdb_get_generator( bdb, population_id, phrase.name) # Populate bayesdb_generator_column. # # XXX Omit needless bayesdb_generator_column table -- # Github issue #441. bdb.sql_execute( ''' INSERT INTO bayesdb_generator_column (generator_id, colno, stattype) SELECT :generator_id, colno, stattype FROM bayesdb_variable WHERE population_id = :population_id AND generator_id IS NULL ''', { 'generator_id': generator_id, 'population_id': population_id, }) # Do any metamodel-specific initialization. metamodel.create_generator(bdb, generator_id, phrase.schema, baseline=phrase.baseline) # Populate bayesdb_generator_column with any latent # variables that metamodel.create_generator has added # with bayesdb_add_latent. bdb.sql_execute( ''' INSERT INTO bayesdb_generator_column (generator_id, colno, stattype) SELECT :generator_id, colno, stattype FROM bayesdb_variable WHERE population_id = :population_id AND generator_id = :generator_id ''', { 'generator_id': generator_id, 'population_id': population_id, }) # All done. Nothing to return. return empty_cursor(bdb) if isinstance(phrase, ast.DropGen): with bdb.savepoint(): if not core.bayesdb_has_generator(bdb, None, phrase.name): if phrase.ifexists: return empty_cursor(bdb) raise BQLError(bdb, 'No such generator: %s' % (repr(phrase.name), )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.name) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # Metamodel-specific destruction. metamodel.drop_generator(bdb, generator_id) # Drop the columns, models, and, finally, generator. drop_columns_sql = ''' DELETE FROM bayesdb_generator_column WHERE generator_id = ? ''' bdb.sql_execute(drop_columns_sql, (generator_id, )) drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_model_sql, (generator_id, )) drop_generator_sql = ''' DELETE FROM bayesdb_generator WHERE id = ? ''' bdb.sql_execute(drop_generator_sql, (generator_id, )) return empty_cursor(bdb) if isinstance(phrase, ast.AlterGen): with bdb.savepoint(): generator = phrase.generator if not core.bayesdb_has_generator(bdb, None, generator): raise BQLError(bdb, 'No such generator: %s' % (repr(generator), )) generator_id = core.bayesdb_get_generator(bdb, None, generator) cmds_generic = [] for cmd in phrase.commands: if isinstance(cmd, ast.AlterGenRenameGen): # Disable modelnos with AlterGenRenameGen. if phrase.modelnos is not None: raise BQLError(bdb, 'Cannot specify models for RENAME') # Make sure nothing else has this name. if casefold(generator) != casefold(cmd.name): if core.bayesdb_has_table(bdb, cmd.name): raise BQLError( bdb, 'Name already defined as table' ': %s' % (repr(cmd.name), )) if core.bayesdb_has_generator(bdb, None, cmd.name): raise BQLError( bdb, 'Name already defined' ' as generator: %s' % (repr(cmd.name), )) # Update bayesdb_generator. Everything else # refers to it by id. update_generator_sql = ''' UPDATE bayesdb_generator SET name = ? WHERE id = ? ''' total_changes = bdb._sqlite3.totalchanges() bdb.sql_execute(update_generator_sql, (cmd.name, generator_id)) assert bdb._sqlite3.totalchanges() - total_changes == 1 # Remember the new name for subsequent commands. generator = cmd.name elif isinstance(cmd, ast.AlterGenGeneric): cmds_generic.append(cmd.command) else: assert False, 'Invalid ALTER GENERATOR command: %s' % \ (repr(cmd),) if cmds_generic: modelnos = phrase.modelnos modelnos_invalid = None if modelnos is None else [ modelno for modelno in modelnos if not core.bayesdb_generator_has_model( bdb, generator_id, modelno) ] if modelnos_invalid: raise BQLError( bdb, 'No such models in generator %s: %s' % (repr(phrase.generator), repr(modelnos))) # Call generic alternations on the metamodel. metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) metamodel.alter(bdb, generator_id, modelnos, cmds_generic) return empty_cursor(bdb) if isinstance(phrase, ast.InitModels): if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) modelnos = range(phrase.nmodels) with bdb.savepoint(): # Find the model numbers. Omit existing ones for # ifnotexists; reject existing ones otherwise. if phrase.ifnotexists: modelnos = set(modelno for modelno in modelnos if not core.bayesdb_generator_has_model( bdb, generator_id, modelno)) else: existing = set(modelno for modelno in modelnos if core.bayesdb_generator_has_model( bdb, generator_id, modelno)) if 0 < len(existing): raise BQLError( bdb, 'Generator %s already has models: %s' % (repr(phrase.generator), sorted(existing))) # Stop now if there's nothing to initialize. if len(modelnos) == 0: return # Create the bayesdb_generator_model records. modelnos = sorted(modelnos) insert_model_sql = ''' INSERT INTO bayesdb_generator_model (generator_id, modelno, iterations) VALUES (:generator_id, :modelno, :iterations) ''' for modelno in modelnos: bdb.sql_execute( insert_model_sql, { 'generator_id': generator_id, 'modelno': modelno, 'iterations': 0, }) # Do metamodel-specific initialization. metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) metamodel.initialize_models(bdb, generator_id, modelnos) return empty_cursor(bdb) if isinstance(phrase, ast.AnalyzeModels): if not phrase.wait: raise NotImplementedError('No background analysis -- use WAIT.') # WARNING: It is the metamodel's responsibility to work in a # transaction. # # WARNING: It is the metamodel's responsibility to update the # iteration count in bayesdb_generator_model records. # # We do this so that the metamodel can save incremental # progress in case of ^C in the middle. # # XXX Put these warning somewhere more appropriate. if not core.bayesdb_has_generator(bdb, None, phrase.generator): raise BQLError(bdb, 'No such generator: %s' % (phrase.generator, )) generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) # XXX Should allow parameters for iterations and ckpt/iter. metamodel.analyze_models(bdb, generator_id, modelnos=phrase.modelnos, iterations=phrase.iterations, max_seconds=phrase.seconds, ckpt_iterations=phrase.ckpt_iterations, ckpt_seconds=phrase.ckpt_seconds, program=phrase.program) return empty_cursor(bdb) if isinstance(phrase, ast.DropModels): with bdb.savepoint(): generator_id = core.bayesdb_get_generator(bdb, None, phrase.generator) metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) modelnos = None if phrase.modelnos is not None: lookup_model_sql = ''' SELECT COUNT(*) FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' modelnos = sorted(list(phrase.modelnos)) for modelno in modelnos: cursor = bdb.sql_execute(lookup_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) if cursor_value(cursor) == 0: raise BQLError( bdb, 'No such model' ' in generator %s: %s' % (repr(phrase.generator), repr(modelno))) metamodel.drop_models(bdb, generator_id, modelnos=modelnos) if modelnos is None: drop_models_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = ? ''' bdb.sql_execute(drop_models_sql, (generator_id, )) else: drop_model_sql = ''' DELETE FROM bayesdb_generator_model WHERE generator_id = :generator_id AND modelno = :modelno ''' for modelno in modelnos: bdb.sql_execute(drop_model_sql, { 'generator_id': generator_id, 'modelno': modelno, }) return empty_cursor(bdb) if isinstance(phrase, ast.Regress): # Retrieve the population. if not core.bayesdb_has_population(bdb, phrase.population): raise BQLError(bdb, 'No such population: %r' % (phrase.population, )) population_id = core.bayesdb_get_population(bdb, phrase.population) # Retrieve the metamodel. generator_id = None if phrase.metamodel: if not core.bayesdb_has_generator(bdb, population_id, phrase.metamodel): raise BQLError(bdb, 'No such metamodel: %r' % (phrase.population, )) generator_id = core.bayesdb_get_generator(bdb, population_id, phrase.metamodel) # Retrieve the target variable. if not core.bayesdb_has_variable(bdb, population_id, None, phrase.target): raise BQLError(bdb, 'No such variable: %r' % (phrase.target, )) colno_target = core.bayesdb_variable_number(bdb, population_id, None, phrase.target) if core.bayesdb_variable_stattype(bdb, population_id, colno_target) != \ 'numerical': raise BQLError( bdb, 'Target variable is not numerical: %r' % (phrase.target, )) # Build the given variables. if any(isinstance(col, ast.SelColAll) for col in phrase.givens): # Using * is not allowed to be mixed with other variables. if len(phrase.givens) > 1: raise BQLError(bdb, 'Cannot use (*) with other givens.') colno_givens = core.bayesdb_variable_numbers( bdb, population_id, None) else: if any(isinstance(col, ast.SelColSub) for col in phrase.givens): # Subexpression needs special compiling. out = compiler.Output(n_numpar, nampar_map, bindings) bql_compiler = compiler.BQLCompiler_None() givens = compiler.expand_select_columns( bdb, phrase.givens, True, bql_compiler, out) else: givens = phrase.givens colno_givens = [ core.bayesdb_variable_number(bdb, population_id, None, given.expression.column) for given in givens ] # Build the arguments to bqlfn.bayesdb_simulate. colno_givens_unique = set(colno for colno in colno_givens if colno != colno_target) if len(colno_givens_unique) == 0: raise BQLError(bdb, 'No matching given columns.') constraints = [] colnos = [colno_target] + list(colno_givens_unique) nsamp = 100 if phrase.nsamp is None else phrase.nsamp.value.value modelnos = None if phrase.modelnos is None else str(phrase.modelnos) rows = bqlfn.bayesdb_simulate(bdb, population_id, generator_id, modelnos, constraints, colnos, numpredictions=nsamp) # Retrieve the stattypes. stattypes = [ core.bayesdb_variable_stattype(bdb, population_id, colno_given) for colno_given in colno_givens_unique ] # Separate the target values from the given values. target_values = [row[0] for row in rows] given_values = [row[1:] for row in rows] given_names = [ core.bayesdb_variable_name(bdb, population_id, given) for given in colno_givens_unique ] # Compute the coefficients. The import to regress_ols is here since the # feature depends on pandas + sklearn, so avoid module-wide import. from bayeslite.regress import regress_ols coefficients = regress_ols(target_values, given_values, given_names, stattypes) # Store the results in a winder. temptable = bdb.temp_table_name() qtt = sqlite3_quote_name(temptable) out = compiler.Output(0, {}, {}) out.winder( ''' CREATE TEMP TABLE %s (variable TEXT, coefficient REAL); ''' % (qtt, ), ()) for variable, coef in coefficients: out.winder( ''' INSERT INTO %s VALUES (?, ?) ''' % (qtt), ( variable, coef, )) out.write('SELECT * FROM %s ORDER BY variable' % (qtt, )) out.unwinder('DROP TABLE %s' % (qtt, ), ()) winders, unwinders = out.getwindings() return execute_wound(bdb, winders, unwinders, out.getvalue(), out.getbindings()) assert False # XXX