Exemple #1
0
def cardinality(bdb, table, cols=None):
    """Compute the number of unique values in the columns of a table.

    Parameters
    ----------
    bdb : __population_to_bdb__
    table : __population_name__
        Name of table.
    cols : list<str>, optional
        Columns to compute the unique values. Defaults to all.

    Returns
    -------
    counts : pandas.DataFrame whose .columns are ['name', 'distinct_count'].
    """
    # If no columns specified, use all.
    if not cols:
        sql = 'PRAGMA table_info(%s)' % (quote(table), )
        res = bdb.sql_execute(sql)
        cols = [r[1] for r in res]

    names = []
    counts = []
    for col in cols:
        sql = '''
            SELECT COUNT (DISTINCT %s) FROM %s
        ''' % (quote(col), quote(table))
        res = bdb.sql_execute(sql)
        names.append(col)
        counts.append(cursor_value(res))
    return pd.DataFrame({'name': names, 'distinct_count': counts})
Exemple #2
0
def cardinality(bdb, table, cols=None):
    """Compute the number of unique values in the columns of a table.

    Parameters
    ----------
    bdb : bayeslite.BayesDB
        Active BayesDB instance.
    table : str
        Name of table.
    cols : list<str>, optional
        Columns to compute the unique values. Defaults to all.

    Returns
    -------
    counts : list<tuple<str,int>>
        A list of tuples of the form [(col_1, cardinality_1), ...]
    """
    # If no columns specified, use all.
    if not cols:
        sql = "PRAGMA table_info(%s)" % (quote(table),)
        res = bdb.sql_execute(sql)
        cols = [r[1] for r in res]

    counts = []
    for col in cols:
        sql = """
            SELECT COUNT (DISTINCT %s) FROM %s
        """ % (
            quote(col),
            quote(table),
        )
        res = bdb.sql_execute(sql)
        counts.append((col, cursor_value(res)))

    return counts
Exemple #3
0
def nullify(bdb, table, value):
    """Replace specified values in a SQL table with ``NULL``.

    Parameters
    ----------
    bdb : __population_to_bdb__
    table : str
        The name of the table on which to act
    value : stringable
        The value to replace with ``NULL``

    Examples
    --------
    >>> import bayeslite
    >>> from bdbcontrib import plotutils
    >>> with bayeslite.bayesdb_open('mydb.bdb') as bdb:
    >>>    bdbcontrib.nullify(bdb, 'mytable', 'NaN')
    """
    # get a list of columns of the table
    c = bdb.sql_execute('pragma table_info({})'.format(quote(table)))
    columns = [r[1] for r in c]
    for col in columns:
        if value in ["''", '""']:
            bql = '''
                UPDATE {} SET {} = NULL WHERE {} = '';
            '''.format(quote(table), quote(col), quote(col))
            bdb.sql_execute(bql)
        else:
            bql = '''
                UPDATE {} SET {} = NULL WHERE {} = ?;
            '''.format(quote(table), quote(col), quote(col))
            bdb.sql_execute(bql, (value, ))
Exemple #4
0
def nullify(bdb, table, value):
    """Replace specified values in a SQL table with ``NULL``.

    Parameters
    ----------
    bdb : bayeslite.BayesDB
        bayesdb database object
    table : str
        The name of the table on which to act
    value : stringable
        The value to replace with ``NULL``

    Examples
    --------
    >>> import bayeslite
    >>> from bdbcontrib import plotutils
    >>> with bayeslite.bayesdb_open('mydb.bdb') as bdb:
    >>>    bdbcontrib.nullify(bdb, 'mytable', 'NaN')
    """
    # get a list of columns of the table
    c = bdb.sql_execute('pragma table_info({})'.format(quote(table)))
    columns = [r[1] for r in c]
    for col in columns:
        if value in ["''", '""']:
            bql = '''
                UPDATE {} SET {} = NULL WHERE {} = '';
            '''.format(quote(table), quote(col), quote(col))
            bdb.sql_execute(bql)
        else:
            bql = '''
                UPDATE {} SET {} = NULL WHERE {} = ?;
            '''.format(quote(table), quote(col), quote(col))
            bdb.sql_execute(bql, (value,))
Exemple #5
0
def get_data_as_list(bdb, table_name, column_list=None):
    if column_list is None:
        sql = '''
            SELECT * FROM {};
        '''.format(quote(table_name))
    else:
        sql = '''
            SELECT {} FROM {}
        '''.format(', '.join(map(quote, column_list)), table_name)
    cursor = bdb.sql_execute(sql)
    T = cursor_to_df(cursor).values.tolist()
    return T
Exemple #6
0
def get_data_as_list(bdb, table_name, column_list=None):
    if column_list is None:
        sql = '''
            SELECT * FROM {};
        '''.format(quote(table_name))
    else:
        sql = '''
            SELECT {} FROM {}
        '''.format(', '.join(map(quote, column_list)), table_name)
    cursor = bdb.sql_execute(sql)
    T = cursor_to_df(cursor).values.tolist()
    return T