Exemple #1
0
 def dump_session_as_json(self, session_id):
     """Returns a JSON string representing the list of SQL or BQL entries
     (e.g.  queries) executed within session `session_id`."""
     if session_id > self.session_id or session_id < 1:
         raise ValueError('No such session (%d)' % session_id)
     version = cursor_value(self._sql('''
         SELECT version FROM bayesdb_session
             WHERE id = ?
     ''', (session_id,)))
     cursor = self._sql('''
         SELECT * FROM bayesdb_session_entries
             WHERE session_id = ?
             ORDER BY start_time DESC
     ''', (session_id,))
     # XXX Get the description first because apsw cursors, for
     # whatever reason, don't let you get the description after
     # you've gotten all the results.
     # (see also bql.py BayesDBCursor.__init__)
     fields = []
     try:
         fields = [d[0] for d in cursor.description]
     except apsw.ExecutionCompleteError:
         pass # Probably no rows.
     entries = cursor.fetchall()
     session = {
         'entries': entries,
         'fields': fields,
         'version': version,
     }
     return json.dumps(session, sort_keys=True)
Exemple #2
0
 def _get_num_models(self, bdb, generator_id):
     cursor = bdb.sql_execute('''
         SELECT num_models
         FROM bayesdb_loom_generator_model_info
         WHERE generator_id = ?
     ''', (generator_id,))
     return cursor_value(cursor)
Exemple #3
0
def bayesdb_variable_name(bdb, population_id, colno):
    """Return the name a population variable."""
    cursor = bdb.sql_execute(
        '''
        SELECT name FROM bayesdb_variable WHERE population_id = ? AND colno = ?
    ''', (population_id, colno))
    return cursor_value(cursor)
Exemple #4
0
def bayesdb_has_generator(bdb, name):
    """True if there is a generator named `name` in `bdb`.

    Only actual generator names are considered.
    """
    sql = 'SELECT COUNT(*) FROM bayesdb_generator WHERE name = ?'
    return 0 != cursor_value(bdb.sql_execute(sql, (name,)))
Exemple #5
0
def cardinality(bdb, table, cols=None):
    """Compute the number of unique values in the columns of a table.

    Parameters
    ----------
    bdb : bayeslite.BayesDB
        Active BayesDB instance.
    table : str
        Name of table.
    cols : list<str>, optional
        Columns to compute the unique values. Defaults to all.

    Returns
    -------
    counts : list<tuple<str,int>>
        A list of tuples of the form [(col_1, cardinality_1), ...]
    """
    # If no columns specified, use all.
    if not cols:
        sql = "PRAGMA table_info(%s)" % (quote(table),)
        res = bdb.sql_execute(sql)
        cols = [r[1] for r in res]

    counts = []
    for col in cols:
        sql = """
            SELECT COUNT (DISTINCT %s) FROM %s
        """ % (
            quote(col),
            quote(table),
        )
        res = bdb.sql_execute(sql)
        counts.append((col, cursor_value(res)))

    return counts
Exemple #6
0
    def _add_entry(self, qid, type, query, bindings):
        '''Save a session entry into the database. The entry is initially in
        the not-completed state. Return the new entry's id so that it can be
        set to completed when appropriate.

        qid: str, num, or anything that can be used as a hash key.
          Any identifier unique to this query.
        type: str
          The class of query, usually "bql" or "sql". Case-sensitive.
        query: str
          The text of the query, perhaps with unbound positional references.
        bindings: iterable(str)
          Fillers for unbound references in query, if any.
        '''
        # check for errors on this session and suggest if we haven't already
        if not self._suggested_send:
            self._check_error_entries(self.session_id)
        data = query + json.dumps(bindings)
        self._sql('''
            INSERT INTO bayesdb_session_entries
                (session_id, type, data, start_time)
                VALUES (?,?,?,?)
        ''', (self.session_id, type, data, time.time()))
        entry_id = cursor_value(self._sql('SELECT last_insert_rowid()'))
        self._qid_to_entry_id[qid] = entry_id
Exemple #7
0
    def _add_entry(self, qid, type, query, bindings):
        '''Save a session entry into the database. The entry is initially in
        the not-completed state. Return the new entry's id so that it can be
        set to completed when appropriate.

        qid: str, num, or anything that can be used as a hash key.
          Any identifier unique to this query.
        type: str
          The class of query, usually "bql" or "sql". Case-sensitive.
        query: str
          The text of the query, perhaps with unbound positional references.
        bindings: iterable(str)
          Fillers for unbound references in query, if any.
        '''
        # check for errors on this session and suggest if we haven't already
        if not self._suggested_send:
            self._check_error_entries(self.session_id)
        data = query + json.dumps(bindings)
        self._sql(
            '''
            INSERT INTO bayesdb_session_entries
                (session_id, type, data, start_time)
                VALUES (?,?,?,?)
        ''', (self.session_id, type, data, time.time()))
        entry_id = cursor_value(self._sql('SELECT last_insert_rowid()'))
        self._qid_to_entry_id[qid] = entry_id
Exemple #8
0
 def dump_session_as_json(self, session_id):
     """Returns a JSON string representing the list of SQL or BQL entries
     (e.g.  queries) executed within session `session_id`."""
     if session_id > self.session_id or session_id < 1:
         raise ValueError('No such session (%d)' % session_id)
     version = cursor_value(
         self._sql(
             '''
         SELECT version FROM bayesdb_session
             WHERE id = ?
     ''', (session_id, )))
     cursor = self._sql(
         '''
         SELECT * FROM bayesdb_session_entries
             WHERE session_id = ?
             ORDER BY start_time DESC
     ''', (session_id, ))
     # XXX Get the description first because apsw cursors, for
     # whatever reason, don't let you get the description after
     # you've gotten all the results.
     # (see also bql.py BayesDBCursor.__init__)
     fields = []
     try:
         fields = [d[0] for d in cursor.description]
     except apsw.ExecutionCompleteError:
         pass  # Probably no rows.
     entries = cursor.fetchall()
     session = {
         'entries': entries,
         'fields': fields,
         'version': version,
     }
     return json.dumps(session, sort_keys=True)
Exemple #9
0
def bayesdb_generator_column_stattype(bdb, generator_id, colno):
    """Return the statistical type of the column `colno` in `generator_id`."""
    sql = '''
        SELECT stattype FROM bayesdb_generator_column
            WHERE generator_id = ? AND colno = ?
    '''
    cursor = bdb.sql_execute(sql, (generator_id, colno))
    try:
        row = cursor.next()
    except StopIteration:
        generator = bayesdb_generator_name(bdb, generator_id)
        sql = '''
            SELECT COUNT(*)
                FROM bayesdb_generator AS g, bayesdb_column AS c
                WHERE g.id = :generator_id
                    AND g.tabname = c.tabname
                    AND c.colno = :colno
        '''
        cursor = bdb.sql_execute(sql, {
            'generator_id': generator_id,
            'colno': colno,
        })
        if cursor_value(cursor) == 0:
            raise ValueError('No such column in generator %s: %d' %
                (generator, colno))
        else:
            raise ValueError('Column not modelled in generator %s: %d' %
                (generator, colno))
    else:
        assert len(row) == 1
        return row[0]
Exemple #10
0
def bayesdb_has_latent(bdb, population_id, var):
    """True if the population has a latent variable by the given name."""
    cursor = bdb.sql_execute('''
        SELECT COUNT(*) FROM bayesdb_variable
            WHERE population_id = ? AND name = ? AND generator_id IS NOT NULL
    ''', (population_id, var))
    return cursor_value(cursor)
 def _get_name(self, bdb, generator_id):
     return cursor_value(
         bdb.sql_execute(
             '''
         SELECT name FROM bayesdb_loom_generator
         WHERE generator_id = ?
     ''', (generator_id, )))
Exemple #12
0
def bayesdb_variable_stattype(bdb, population_id, generator_id, colno):
    """Return the statistical type of a population variable."""
    sql = '''
        SELECT stattype FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND colno = ?
    '''
    cursor = bdb.sql_execute(sql, (population_id, generator_id, colno))
    try:
        row = cursor.next()
    except StopIteration:
        population = bayesdb_population_name(bdb, population_id)
        sql = '''
            SELECT COUNT(*)
                FROM bayesdb_population AS p, bayesdb_column AS c
                WHERE p.id = :population_id
                    AND p.tabname = c.tabname
                    AND c.colno = :colno
        '''
        cursor = bdb.sql_execute(sql, {
            'population_id': population_id,
            'colno': colno,
        })
        if cursor_value(cursor) == 0:
            raise ValueError('No such variable in population %s: %d'
                % (population, colno))
        else:
            raise ValueError('Variable not modeled in population %s: %d'
                % (population, colno))
    else:
        assert len(row) == 1
        return row[0]
Exemple #13
0
def bayesdb_has_latent(bdb, population_id, var):
    """True if the population has a latent variable by the given name."""
    cursor = bdb.sql_execute('''
        SELECT COUNT(*) FROM bayesdb_variable
            WHERE population_id = ? AND name = ? AND generator_id IS NOT NULL
    ''', (population_id, var))
    return cursor_value(cursor)
Exemple #14
0
def bayesdb_generator_has_model(bdb, generator_id, modelno):
    """True if `generator_id` has a model numbered `modelno`."""
    sql = '''
        SELECT COUNT(*) FROM bayesdb_generator_model AS m
            WHERE generator_id = ? AND modelno = ?
    '''
    return cursor_value(bdb.sql_execute(sql, (generator_id, modelno)))
Exemple #15
0
def cardinality(bdb, table, cols=None):
    """Compute the number of unique values in the columns of a table.

    Parameters
    ----------
    bdb : __population_to_bdb__
    table : __population_name__
        Name of table.
    cols : list<str>, optional
        Columns to compute the unique values. Defaults to all.

    Returns
    -------
    counts : pandas.DataFrame whose .columns are ['name', 'distinct_count'].
    """
    # If no columns specified, use all.
    if not cols:
        sql = 'PRAGMA table_info(%s)' % (quote(table), )
        res = bdb.sql_execute(sql)
        cols = [r[1] for r in res]

    names = []
    counts = []
    for col in cols:
        sql = '''
            SELECT COUNT (DISTINCT %s) FROM %s
        ''' % (quote(col), quote(table))
        res = bdb.sql_execute(sql)
        names.append(col)
        counts.append(cursor_value(res))
    return pd.DataFrame({'name': names, 'distinct_count': counts})
Exemple #16
0
    def _schema(self, bdb, generator_id):
        # Probe the cache.
        cache = self._cache(bdb)
        if cache is not None:
            if generator_id in cache.schema:
                return cache.schema[generator_id]

        # Not cached.  Load the schema from the database.
        cursor = bdb.sql_execute(
            '''
            SELECT schema_json FROM bayesdb_cgpm_generator
                WHERE generator_id = ?
        ''', (generator_id, ))
        schema_json = cursor_value(cursor, nullok=True)
        if schema_json is None:
            generator = core.bayesdb_generator_name(bdb, generator_id)
            raise BQLError(bdb, 'No such CGPM generator: %r' % (generator, ))

        # Deserialize the schema.
        schema = json.loads(schema_json)

        # Cache it, if we can.
        if cache is not None:
            cache.schema[generator_id] = schema
        return schema
Exemple #17
0
def _default_categorical(bdb, generator_id, var):
    table = core.bayesdb_generator_table(bdb, generator_id)
    qt = sqlite3_quote_name(table)
    qv = sqlite3_quote_name(var)
    cursor = bdb.sql_execute('SELECT COUNT(DISTINCT %s) FROM %s' % (qv, qt))
    k = cursor_value(cursor)
    return 'categorical', {'k': k}
Exemple #18
0
def bayesdb_generator_column_stattype(bdb, generator_id, colno):
    """Return the statistical type of the column `colno` in `generator_id`."""
    sql = '''
        SELECT stattype FROM bayesdb_generator_column
            WHERE generator_id = ? AND colno = ?
    '''
    cursor = bdb.sql_execute(sql, (generator_id, colno))
    try:
        row = cursor.next()
    except StopIteration:
        generator = bayesdb_generator_name(bdb, generator_id)
        sql = '''
            SELECT COUNT(*)
                FROM bayesdb_generator AS g, bayesdb_column AS c
                WHERE g.id = :generator_id
                    AND g.tabname = c.tabname
                    AND c.colno = :colno
        '''
        cursor = bdb.sql_execute(sql, {
            'generator_id': generator_id,
            'colno': colno,
        })
        if cursor_value(cursor) == 0:
            raise ValueError('No such column in generator %s: %d' %
                             (generator, colno))
        else:
            raise ValueError('Column not modelled in generator %s: %d' %
                             (generator, colno))
    else:
        assert len(row) == 1
        return row[0]
Exemple #19
0
    def _engine(self, bdb, generator_id):
        # Probe the cache.
        cache = self._cache(bdb)
        if cache is not None and generator_id in cache.engine:
            return cache.engine[generator_id]

        # Not cached.  Load the engine from the database.
        cursor = bdb.sql_execute(
            '''
            SELECT engine_json FROM bayesdb_cgpm_generator
                WHERE generator_id = ?
        ''', (generator_id, ))
        engine_json = cursor_value(cursor)
        if engine_json is None:
            generator = core.bayesdb_generator_name(bdb, generator_id)
            raise BQLError(
                bdb, 'No models initialized for generator: %r' % (generator, ))

        # Deserialize the engine.
        engine = Engine.from_metadata(json.loads(engine_json),
                                      rng=bdb.np_prng,
                                      multiprocess=self._ncpu)

        # Cache it, if we can.
        if cache is not None:
            cache.engine[generator_id] = engine
        return engine
Exemple #20
0
def bayesdb_has_generator_default(bdb, name):
    """True if there is a generator or default-modelled table named `name`."""
    sql = '''
        SELECT COUNT(*) FROM bayesdb_generator
            WHERE name = :name OR (defaultp AND tabname = :name)
    '''
    return 0 != cursor_value(bdb.sql_execute(sql, {'name': name}))
def test_math_func_one_param(name, probe):
    # Retrieve result from python.
    python_value_error = None
    python_type_error = None
    try:
        result_python = get_python_math_call(name, probe)
    except ValueError:
        python_value_error = True
    except TypeError:
        python_type_error = True

    # Retrieve result from SQL.
    sql_value_error = None
    sql_type_error = None
    try:
        with bayesdb_open(':memory:') as bdb:
            cursor = bdb.execute(get_sql_math_call(name, probe))
            result_sql = cursor_value(cursor)
    except ValueError:
        sql_value_error = True
    except (TypeError, apsw.SQLError):
        sql_type_error = True

    # Domain error on both.
    if python_value_error or sql_value_error:
        assert python_value_error and sql_value_error
    # Arity error on both.
    elif python_type_error or sql_type_error:
        assert python_type_error and sql_type_error
    # Both invocations succeeded, confirm results match.
    else:
        assert abserr(result_python, result_sql) < 1e-4
Exemple #22
0
def bayesdb_variable_stattype(bdb, population_id, generator_id, colno):
    """Return the statistical type of a population variable."""
    sql = '''
        SELECT stattype FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND colno = ?
    '''
    cursor = bdb.sql_execute(sql, (population_id, generator_id, colno))
    try:
        row = cursor.next()
    except StopIteration:
        population = bayesdb_population_name(bdb, population_id)
        sql = '''
            SELECT COUNT(*)
                FROM bayesdb_population AS p, bayesdb_column AS c
                WHERE p.id = :population_id
                    AND p.tabname = c.tabname
                    AND c.colno = :colno
        '''
        cursor = bdb.sql_execute(sql, {
            'population_id': population_id,
            'colno': colno,
        })
        if cursor_value(cursor) == 0:
            raise ValueError('No such variable in population %s: %d'
                % (population, colno))
        else:
            raise ValueError('Variable not modeled in population %s: %d'
                % (population, colno))
    else:
        assert len(row) == 1
        return row[0]
Exemple #23
0
def bayesdb_generator_has_model(bdb, generator_id, modelno):
    """True if `generator_id` has a model numbered `modelno`."""
    sql = '''
        SELECT COUNT(*) FROM bayesdb_generator_model AS m
            WHERE generator_id = ? AND modelno = ?
    '''
    return cursor_value(bdb.sql_execute(sql, (generator_id, modelno)))
Exemple #24
0
def test_math_func_one_param(name, probe):
    # Retrieve result from python.
    python_value_error = None
    python_type_error = None
    try:
        result_python = get_python_math_call(name, probe)
    except ValueError:
        python_value_error = True
    except TypeError:
        python_type_error = True

    # Retrieve result from SQL.
    sql_value_error = None
    sql_type_error = None
    try:
        with bayesdb_open(':memory:') as bdb:
            cursor = bdb.execute(get_sql_math_call(name, probe))
            result_sql = cursor_value(cursor)
    except ValueError:
        sql_value_error = True
    except (TypeError, apsw.SQLError):
        sql_type_error = True

    # Domain error on both.
    if python_value_error or sql_value_error:
        assert python_value_error and sql_value_error
    # Arity error on both.
    elif python_type_error or sql_type_error:
        assert python_type_error and sql_type_error
    # Both invocations succeeded, confirm results match.
    else:
        assert abserr(result_python, result_sql) < 1e-4
Exemple #25
0
def bayesdb_has_generator_default(bdb, name):
    """True if there is a generator or default-modelled table named `name`."""
    sql = '''
        SELECT COUNT(*) FROM bayesdb_generator
            WHERE name = :name OR (defaultp AND tabname = :name)
    '''
    return 0 != cursor_value(bdb.sql_execute(sql, {'name': name}))
Exemple #26
0
def bayesdb_has_generator(bdb, name):
    """True if there is a generator named `name` in `bdb`.

    Only actual generator names are considered.
    """
    sql = 'SELECT COUNT(*) FROM bayesdb_generator WHERE name = ?'
    return 0 != cursor_value(bdb.sql_execute(sql, (name, )))
Exemple #27
0
 def _cgpm_rowid(self, bdb, generator_id, table_rowid):
     cursor = bdb.sql_execute(
         '''
         SELECT cgpm_rowid FROM bayesdb_cgpm_individual
             WHERE generator_id = ? AND table_rowid = ?
     ''', (generator_id, table_rowid))
     cgpm_rowid = cursor_value(cursor, nullok=True)
     return cgpm_rowid if cgpm_rowid is not None else -1
Exemple #28
0
def bayesdb_generator_fresh_row_id(bdb, generator_id):
    table_name = bayesdb_generator_table(bdb, generator_id)
    qt = sqlite3_quote_name(table_name)
    cursor = bdb.sql_execute('SELECT MAX(_rowid_) FROM %s' % (qt, ))
    max_rowid = cursor_value(cursor)
    if max_rowid is None:
        max_rowid = 0
    return max_rowid + 1  # Synthesize a non-existent SQLite row id
 def _get_num_models(self, bdb, generator_id):
     cursor = bdb.sql_execute(
         '''
         SELECT num_models
         FROM bayesdb_loom_generator_model_info
         WHERE generator_id = ?
     ''', (generator_id, ))
     return cursor_value(cursor)
Exemple #30
0
 def _get_is_incorporated_rowid(self, bdb, generator_id, rowid):
     """Return True iff the rowid is incorporated in the loom model."""
     cursor = bdb.sql_execute('''
         SELECT COUNT(*)
         FROM bayesdb_loom_rowid_mapping
         WHERE generator_id = ? AND table_rowid = ?
     ''', (generator_id, rowid))
     return cursor_value(cursor) > 0
Exemple #31
0
def bayesdb_generator_fresh_row_id(bdb, generator_id):
    table_name = bayesdb_generator_table(bdb, generator_id)
    qt = sqlite3_quote_name(table_name)
    cursor = bdb.sql_execute('SELECT MAX(_rowid_) FROM %s' % (qt,))
    max_rowid = cursor_value(cursor)
    if max_rowid is None:
        max_rowid = 0
    return max_rowid + 1   # Synthesize a non-existent SQLite row id
Exemple #32
0
def bayesdb_population_fresh_row_id(bdb, population_id):
    """Return one plus maximum rowid in base table of given `population_id`."""
    table_name = bayesdb_population_table(bdb, population_id)
    qt = sqlite3_quote_name(table_name)
    cursor = bdb.sql_execute('SELECT MAX(_rowid_) FROM %s' % (qt,))
    max_rowid = cursor_value(cursor)
    if max_rowid is None:
        max_rowid = 0
    return max_rowid + 1   # Synthesize a non-existent SQLite row id
Exemple #33
0
def bayesdb_variable_number(bdb, population_id, generator_id, name):
    """Return the column number of a population variable."""
    cursor = bdb.sql_execute('''
        SELECT colno FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND name = ?
    ''', (population_id, generator_id, name))
    return cursor_value(cursor)
Exemple #34
0
def bayesdb_variable_name(bdb, population_id, generator_id, colno):
    """Return the name a population variable."""
    cursor = bdb.sql_execute('''
        SELECT name FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND colno = ?
    ''', (population_id, generator_id, colno))
    return cursor_value(cursor)
Exemple #35
0
def bayesdb_variable_number(bdb, population_id, generator_id, name):
    """Return the column number of a population variable."""
    cursor = bdb.sql_execute('''
        SELECT colno FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND name = ?
    ''', (population_id, generator_id, name))
    return cursor_value(cursor)
Exemple #36
0
 def _get_loom_rank(self, bdb, generator_id, colno):
     """Return the loom rank (column number) for the given colno."""
     cursor = bdb.sql_execute('''
         SELECT rank
         FROM bayesdb_loom_column_ordering
         WHERE generator_id = ?
             AND colno = ?
     ''', (generator_id, colno,))
     return cursor_value(cursor)
Exemple #37
0
def bayesdb_population_fresh_row_id(bdb, population_id):
    """Return one plus maximum rowid in base table of given `population_id`."""
    table_name = bayesdb_population_table(bdb, population_id)
    qt = sqlite3_quote_name(table_name)
    cursor = bdb.sql_execute('SELECT MAX(_rowid_) FROM %s' % (qt,))
    max_rowid = cursor_value(cursor)
    if max_rowid is None:
        max_rowid = 0
    return max_rowid + 1   # Synthesize a non-existent SQLite row id
 def _get_is_incorporated_rowid(self, bdb, generator_id, rowid):
     """Return True iff the rowid is incorporated in the loom model."""
     cursor = bdb.sql_execute(
         '''
         SELECT COUNT(*)
         FROM bayesdb_loom_rowid_mapping
         WHERE generator_id = ? AND table_rowid = ?
     ''', (generator_id, rowid))
     return cursor_value(cursor) > 0
Exemple #39
0
 def _get_integer_form(self, bdb, generator_id, colno, string_form):
     """Return integer code representing the string."""
     cursor = bdb.sql_execute('''
         SELECT integer_form
         FROM bayesdb_loom_string_encoding
         WHERE generator_id = ?
             AND colno = ?
             AND string_form = ?
     ''', (generator_id, colno, string_form,))
     return cursor_value(cursor)
Exemple #40
0
def bayesdb_table_has_rowid(bdb, table, rowid):
    """True if the table named `table` has record with given rowid.

    `bdb` must have a table named `table`.  If you're not sure, call
    :func:`bayesdb_has_table` first.
    """
    qt = sqlite3_quote_name(table)
    sql = 'SELECT COUNT(*) FROM %s WHERE oid = ?'
    cursor = bdb.sql_execute(sql % (qt,), (rowid,))
    return cursor_value(cursor) != 0
Exemple #41
0
 def _get_kind_id(self, bdb, generator_id, modelno, colno):
     """Return kind_id (view assignment) of colno in modelno."""
     cursor = bdb.sql_execute('''
         SELECT kind_id
         FROM bayesdb_loom_column_kind_partition
         WHERE generator_id = ?
             AND modelno = ?
             AND colno = ?
     ''', (generator_id, modelno, colno,))
     return cursor_value(cursor)
Exemple #42
0
def bayesdb_table_has_rowid(bdb, table, rowid):
    """True if the table named `table` has record with given rowid.

    `bdb` must have a table named `table`.  If you're not sure, call
    :func:`bayesdb_has_table` first.
    """
    qt = sqlite3_quote_name(table)
    sql = 'SELECT COUNT(*) FROM %s WHERE oid = ?'
    cursor = bdb.sql_execute(sql % (qt,), (rowid,))
    return cursor_value(cursor) != 0
 def _get_partition_id(self, bdb, generator_id, modelno, kind_id, rowid):
     cursor = bdb.sql_execute(
         '''
         SELECT partition_id
         FROM bayesdb_loom_row_kind_partition
         WHERE generator_id = ?
             AND modelno = ?
             AND kind_id = ?
             AND rowid = ?
     ''', (generator_id, modelno, kind_id, rowid))
     return cursor_value(cursor)
Exemple #44
0
 def _simulate_1(self, bdb, generator_id, mus, sigmas, colno):
     if colno < 0:
         dev_colno = colno
         cursor = bdb.sql_execute('''
             SELECT observed_colno FROM bayesdb_nig_normal_deviation
                 WHERE generator_id = ? AND deviation_colno = ?
         ''', (generator_id, dev_colno))
         obs_colno = cursor_value(cursor)
         return self.prng.gauss(0, sigmas[obs_colno])
     else:
         return self.prng.gauss(mus[colno], sigmas[colno])
Exemple #45
0
 def _logpdf_1(self, bdb, generator_id, mus, sigmas, colno, x):
     if colno < 0:
         dev_colno = colno
         cursor = bdb.sql_execute('''
             SELECT observed_colno FROM bayesdb_nig_normal_deviation
                 WHERE generator_id = ? AND deviation_colno = ?
         ''', (generator_id, dev_colno))
         obs_colno = cursor_value(cursor)
         return logpdf_gaussian(x, 0, sigmas[obs_colno])
     else:
         return logpdf_gaussian(x, mus[colno], sigmas[colno])
Exemple #46
0
 def _simulate_1(self, bdb, generator_id, mus, sigmas, colno):
     if colno < 0:
         dev_colno = colno
         cursor = bdb.sql_execute('''
             SELECT observed_colno FROM bayesdb_nig_normal_deviation
                 WHERE generator_id = ? AND deviation_colno = ?
         ''', (generator_id, dev_colno))
         obs_colno = cursor_value(cursor)
         return self.prng.gauss(0, sigmas[obs_colno])
     else:
         return self.prng.gauss(mus[colno], sigmas[colno])
Exemple #47
0
 def _logpdf_1(self, bdb, generator_id, mus, sigmas, colno, x):
     if colno < 0:
         dev_colno = colno
         cursor = bdb.sql_execute('''
             SELECT observed_colno FROM bayesdb_nig_normal_deviation
                 WHERE generator_id = ? AND deviation_colno = ?
         ''', (generator_id, dev_colno))
         obs_colno = cursor_value(cursor)
         return logpdf_gaussian(x, 0, sigmas[obs_colno])
     else:
         return logpdf_gaussian(x, mus[colno], sigmas[colno])
Exemple #48
0
 def _get_partition_id(self, bdb, generator_id, modelno, kind_id, rowid):
     """Return row partition_id of given rowid, within kind_id of modelno."""
     cursor = bdb.sql_execute('''
         SELECT partition_id
         FROM bayesdb_loom_row_kind_partition
         WHERE generator_id = ?
             AND modelno = ?
             AND kind_id = ?
             AND table_rowid = ?
     ''', (generator_id, modelno, kind_id, rowid))
     return cursor_value(cursor)
 def _get_partition_id(self, bdb, generator_id, modelno, kind_id, rowid):
     """Return row partition_id of given rowid, within kind_id of modelno."""
     cursor = bdb.sql_execute(
         '''
         SELECT partition_id
         FROM bayesdb_loom_row_kind_partition
         WHERE generator_id = ?
             AND modelno = ?
             AND kind_id = ?
             AND table_rowid = ?
     ''', (generator_id, modelno, kind_id, rowid))
     return cursor_value(cursor)
 def _get_loom_rank(self, bdb, generator_id, colno):
     cursor = bdb.sql_execute(
         '''
         SELECT rank
         FROM bayesdb_loom_column_ordering
         WHERE generator_id = ?
             AND colno = ?
     ''', (
             generator_id,
             colno,
         ))
     return cursor_value(cursor)
Exemple #51
0
 def _check_error_entries(self, session_id):
     '''Check if the previous session contains queries that resulted in
     errors and suggest sending the session'''
     error_entries = cursor_value(self._sql('''
         SELECT COUNT(*) FROM bayesdb_session_entries
             WHERE error IS NOT NULL AND session_id = ?
     ''', (session_id,)))
     # suggest sending sessions but don't suggest more than once
     if (error_entries > 0 and not self._suggested_send):
         self._logger.warn(_error_previous_session_msg)
         self._suggested_send = True
     return error_entries
Exemple #52
0
def bayesdb_table_has_column(bdb, table, name):
    """True if the table named `table` has a column named `name`.

    `bdb` must have a table named `table`.  If you're not sure, call
    :func:`bayesdb_has_table` first.

    WARNING: This may modify the database by populating the
    ``bayesdb_column`` table if it has not yet been populated.
    """
    bayesdb_table_guarantee_columns(bdb, table)
    sql = 'SELECT COUNT(*) FROM bayesdb_column WHERE tabname = ? AND name = ?'
    return cursor_value(bdb.sql_execute(sql, (table, name)))
Exemple #53
0
def bayesdb_has_generator(bdb, population_id, name):
    """True if there is a generator named `name` in `bdb`."""
    if population_id is None:
        sql = 'SELECT COUNT(*) FROM bayesdb_generator WHERE name = ?'
        cursor = bdb.sql_execute(sql, (name, ))
    else:
        sql = '''
            SELECT COUNT(*) FROM bayesdb_generator
                WHERE name = ? AND population_id = ?
        '''
        cursor = bdb.sql_execute(sql, (name, population_id))
    return 0 != cursor_value(cursor)
Exemple #54
0
def bayesdb_table_has_column(bdb, table, name):
    """True if the table named `table` has a column named `name`.

    `bdb` must have a table named `table`.  If you're not sure, call
    :func:`bayesdb_has_table` first.

    WARNING: This may modify the database by populating the
    ``bayesdb_column`` table if it has not yet been populated.
    """
    bayesdb_table_guarantee_columns(bdb, table)
    sql = 'SELECT COUNT(*) FROM bayesdb_column WHERE tabname = ? AND name = ?'
    return cursor_value(bdb.sql_execute(sql, (table, name)))
Exemple #55
0
def bayesdb_has_variable(bdb, population_id, generator_id, name):
    """True if the population has a given variable.

    generator_id is None for manifest variables and the id of a
    generator for variables that may be latent.
    """
    cursor = bdb.sql_execute('''
        SELECT COUNT(*) FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND name = ?
    ''', (population_id, generator_id, name))
    return cursor_value(cursor) != 0
Exemple #56
0
def bayesdb_has_variable(bdb, population_id, generator_id, name):
    """True if the population has a given variable.

    generator_id is None for manifest variables and the id of a
    generator for variables that may be latent.
    """
    cursor = bdb.sql_execute('''
        SELECT COUNT(*) FROM bayesdb_variable
            WHERE population_id = ?
                AND (generator_id IS NULL OR generator_id = ?)
                AND name = ?
    ''', (population_id, generator_id, name))
    return cursor_value(cursor) != 0
Exemple #57
0
 def run_queries(target, m):
     extra = 'MODELED BY %s' % (m,) if m is not None else ''
     bdb.execute('''
         ESTIMATE PROBABILITY DENSITY OF %s = 1 BY p %s
     ''' % (target, extra,)).fetchall()
     for other in ['age', 'gender', 'salary']:
         cursor = bdb.execute('''
             ESTIMATE DEPENDENCE PROBABILITY OF %s WITH %s
             BY p %s
         ''' % (target, other, extra))
         assert cursor_value(cursor) >= 0
     bdb.execute('''
         ESTIMATE SIMILARITY IN THE CONTEXT OF %s
         FROM PAIRWISE p %s;
     ''' % (target, extra,)).fetchall()
def _basic_test_trace(executor):

    # a new session is automatically initialized with id 1
    assert get_num_sessions(executor) == 1

    # the above select query counts should become one or more entries
    num_entries = cursor_value(executor('''
        SELECT COUNT(*) FROM bayesdb_session_entries
    '''))
    assert num_entries > 0

    # entries are ordered starting from 1
    for id, entry in enumerate(get_entries(executor)):
        assert entry.session_id == 1
        assert entry.id == id + 1
Exemple #59
0
 def _add_entry(self, qid, type, query, bindings):
     '''Save a session entry into the database. The entry is initially in
     the not-completed state. Return the new entry's id so that it can be
     set to completed when appropriate.'''
     # check for errors on this session and suggest if we haven't already
     if not self._suggested_send:
         self._check_error_entries(self.session_id)
     data = query + json.dumps(bindings)
     self._sql('''
         INSERT INTO bayesdb_session_entries
             (session_id, type, data, start_time)
             VALUES (?,?,?,?)
     ''', (self.session_id, type, data, time.time()))
     entry_id = cursor_value(self._sql('SELECT last_insert_rowid()'))
     self._qid_to_entry_id[qid] = entry_id
Exemple #60
0
def bayesdb_generator_has_column(bdb, generator_id, column_name):
    """True if `generator_id` models a column named `name`."""
    sql = '''
        SELECT COUNT(*)
            FROM bayesdb_generator AS g,
                bayesdb_generator_column as gc,
                bayesdb_column AS c
            WHERE g.id = :generator_id AND c.name = :column_name
                AND g.id = gc.generator_id
                AND g.tabname = c.tabname
                AND gc.colno = c.colno
    '''
    cursor = bdb.sql_execute(sql, {
        'generator_id': generator_id,
        'column_name': column_name,
    })
    return cursor_value(cursor)