def cardinality(bdb, table, cols=None): """Compute the number of unique values in the columns of a table. Parameters ---------- bdb : __population_to_bdb__ table : __population_name__ Name of table. cols : list<str>, optional Columns to compute the unique values. Defaults to all. Returns ------- counts : pandas.DataFrame whose .columns are ['name', 'distinct_count']. """ # If no columns specified, use all. if not cols: sql = 'PRAGMA table_info(%s)' % (quote(table), ) res = bdb.sql_execute(sql) cols = [r[1] for r in res] names = [] counts = [] for col in cols: sql = ''' SELECT COUNT (DISTINCT %s) FROM %s ''' % (quote(col), quote(table)) res = bdb.sql_execute(sql) names.append(col) counts.append(cursor_value(res)) return pd.DataFrame({'name': names, 'distinct_count': counts})
def cardinality(bdb, table, cols=None): """Compute the number of unique values in the columns of a table. Parameters ---------- bdb : bayeslite.BayesDB Active BayesDB instance. table : str Name of table. cols : list<str>, optional Columns to compute the unique values. Defaults to all. Returns ------- counts : list<tuple<str,int>> A list of tuples of the form [(col_1, cardinality_1), ...] """ # If no columns specified, use all. if not cols: sql = "PRAGMA table_info(%s)" % (quote(table),) res = bdb.sql_execute(sql) cols = [r[1] for r in res] counts = [] for col in cols: sql = """ SELECT COUNT (DISTINCT %s) FROM %s """ % ( quote(col), quote(table), ) res = bdb.sql_execute(sql) counts.append((col, cursor_value(res))) return counts
def nullify(bdb, table, value): """Replace specified values in a SQL table with ``NULL``. Parameters ---------- bdb : __population_to_bdb__ table : str The name of the table on which to act value : stringable The value to replace with ``NULL`` Examples -------- >>> import bayeslite >>> from bdbcontrib import plotutils >>> with bayeslite.bayesdb_open('mydb.bdb') as bdb: >>> bdbcontrib.nullify(bdb, 'mytable', 'NaN') """ # get a list of columns of the table c = bdb.sql_execute('pragma table_info({})'.format(quote(table))) columns = [r[1] for r in c] for col in columns: if value in ["''", '""']: bql = ''' UPDATE {} SET {} = NULL WHERE {} = ''; '''.format(quote(table), quote(col), quote(col)) bdb.sql_execute(bql) else: bql = ''' UPDATE {} SET {} = NULL WHERE {} = ?; '''.format(quote(table), quote(col), quote(col)) bdb.sql_execute(bql, (value, ))
def nullify(bdb, table, value): """Replace specified values in a SQL table with ``NULL``. Parameters ---------- bdb : bayeslite.BayesDB bayesdb database object table : str The name of the table on which to act value : stringable The value to replace with ``NULL`` Examples -------- >>> import bayeslite >>> from bdbcontrib import plotutils >>> with bayeslite.bayesdb_open('mydb.bdb') as bdb: >>> bdbcontrib.nullify(bdb, 'mytable', 'NaN') """ # get a list of columns of the table c = bdb.sql_execute('pragma table_info({})'.format(quote(table))) columns = [r[1] for r in c] for col in columns: if value in ["''", '""']: bql = ''' UPDATE {} SET {} = NULL WHERE {} = ''; '''.format(quote(table), quote(col), quote(col)) bdb.sql_execute(bql) else: bql = ''' UPDATE {} SET {} = NULL WHERE {} = ?; '''.format(quote(table), quote(col), quote(col)) bdb.sql_execute(bql, (value,))
def get_data_as_list(bdb, table_name, column_list=None): if column_list is None: sql = ''' SELECT * FROM {}; '''.format(quote(table_name)) else: sql = ''' SELECT {} FROM {} '''.format(', '.join(map(quote, column_list)), table_name) cursor = bdb.sql_execute(sql) T = cursor_to_df(cursor).values.tolist() return T