def _cmd_assert(self, query, sql=None): '''Displays an HTML div indicating whether a bql/sql test passed or failed, i.e. whether the query returned true (1) or false (0). Usage: .assert <query> ''' c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) if df.shape != (1, 1): self.write_stderr( 'The query must return a table with exactly one '\ 'row and one column, received shape: %s' % (df.shape,)) return if df.iloc[0, 0] not in [0, 1]: self.write_stderr( 'The query must return 1 (True) or 0 (False), received: %s' % (repr(df.iloc[0, 0]))) return if df.iloc[0, 0] == 1: display_html(""" <div class="alert alert-success"> <strong>Test passed</strong> </div> """, raw=True) else: display_html(""" <div class="alert alert-danger"> <strong>Test failed</strong> </div> """, raw=True)
def _cmd_table(self, args): '''Returns a table of the PRAGMA schema of <table>. Usage: .table <table> ''' table = args qt = bql_quote_name(table) cursor = self._bdb.sql_execute('PRAGMA table_info(%s)' % (qt, )) return utils_bql.cursor_to_df(cursor)
def _bql(self, lines): out = StringIO.StringIO() ok = False for line in lines: if ok: self._bdb.execute(out.getvalue()) out = StringIO.StringIO() ok = False out.write('%s\n' % (line, )) if out.getvalue() and bql_string_complete_p(out.getvalue()): ok = True cursor = self._bdb.execute(out.getvalue()) return utils_bql.cursor_to_df(cursor)
def _cmd_interactive_heatmap(self, query, sql=None, **kwargs): c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) # XXX Take the last three columns of the dataframe. This behavior is # intended to allow BQL PAIRWISE queries to be passed through directly # to %bql .interactive_heatmap. Unfortunately, PAIRWISE will return # four columns, where the first column is the population_id, and the # second, third, and fourth are name0, name1, and value, respectively. df = df.iloc[:, -3:] table = kwargs.get('table', None) label0 = kwargs.get('label0', None) label1 = kwargs.get('label1', None) if table and label0 and label1: qt = bql_quote_name(table) qc0 = bql_quote_name(label0) qc1 = bql_quote_name(label1) c = self._bdb.sql_execute(''' SELECT %s, %s FROM %s ''' % (qc0, qc1, qt)) df_lookup = utils_bql.cursor_to_df(c) lookup = dict(zip(df_lookup[label0], df_lookup[label1])) df = df.replace({df.columns[0]: lookup, df.columns[1]: lookup}) return jsviz.interactive_heatmap(df)
def sql(self, line, cell=None): if cell is None: ucmds = [line] else: ucmds = cell.split(';') cmds = [ucmd.encode('US-ASCII').strip() for ucmd in ucmds] cursor = None for cmd in cmds: if cmd.isspace() or len(cmd) == 0: continue if cmd.startswith('.'): self._cmd(cmd, sql=True) cursor = None else: cursor = self._bdb.sql_execute(cmd) return utils_bql.cursor_to_df(cursor) if cursor else None
def _cmd_interactive_pairplot(self, query, sql=None, **kwargs): population = kwargs.get('population', None) if population is None: raise ValueError('Specify --population=<name> argument.') c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) schema = utils_mml.get_schema_as_list(self._bdb, population) for colname in df.columns: drop = True for entry in schema: if casefold(entry['name']) == casefold(colname): drop = False entry['name'] = colname if drop: print "Ignoring non-modelled column %s" % (colname, ) del df[colname] return jsviz.interactive_pairplot(df, schema)
def _cmd_regress_sql(self, args): '''Returns an SQL SELECT using the regression coefficients. Usage: .regress_sql --table=<table> REGRESS ... ''' # XXX Copypasta, write a proper parser. # Find the keyword arguments, if any. matches = re.findall('--[^\\s]+?=[^\\s]*', args) kwargs = dict([re.split('--|=', m)[1:] for m in matches]) # Remove kwargs from args. for m in matches: args = str.replace(args, m, '') args = str.strip(args) if 'table' not in kwargs: self.write_stderr('Please specify --table=') return c = self._bdb.execute(args) df = utils_bql.cursor_to_df(c) select_query = utils_sql.regression_to_sql(df, table=kwargs['table']) print select_query
def _cmd_population(self, args): '''Returns a table of the variables and generators for <population>. Usage: .population <population> ''' population = args if not bayesdb_has_population(self._bdb, population): raise ValueError('No such population: %r' % (population, )) population_id = bayesdb_get_population(self._bdb, population) cursor = self._bdb.sql_execute( ''' SELECT 'variable' AS type, name, stattype AS value FROM bayesdb_variable WHERE population_id = :population_id UNION SELECT 'generator' AS type, name, generator AS value FROM bayesdb_generator WHERE population_id = :population_id ''', {'population_id': population_id}) return utils_bql.cursor_to_df(cursor)
def _cmd_interactive_scatter(self, query, sql=None, **kwargs): c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) return jsviz.interactive_scatter(df)
def _cmd_histogram_numerical(self, query, sql=None, **kwargs): c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) utils_plot.histogram_numerical(df, **kwargs)
def _cmd_scatter(self, query, sql=None, **kwargs): c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) utils_plot.scatter(df, **kwargs)
def _cmd_clustermap(self, query, sql=None, **kwargs): c = self._bdb.sql_execute(query) if sql else self._bdb.execute(query) df = utils_bql.cursor_to_df(c) utils_plot.clustermap(df)