Esempio n. 1
0
def bql_column_stattypes_and_data(bdb, generator_id, colno0, colno1):
    st0 = core.bayesdb_generator_column_stattype(bdb, generator_id, colno0)
    st1 = core.bayesdb_generator_column_stattype(bdb, generator_id, colno1)
    table_name = core.bayesdb_generator_table(bdb, generator_id)
    qt = sqlite3_quote_name(table_name)
    colname0 = core.bayesdb_generator_column_name(bdb, generator_id, colno0)
    colname1 = core.bayesdb_generator_column_name(bdb, generator_id, colno1)
    qcn0 = sqlite3_quote_name(colname0)
    qcn1 = sqlite3_quote_name(colname1)
    data_sql = '''
        SELECT %s, %s FROM %s WHERE %s IS NOT NULL AND %s IS NOT NULL
    ''' % (qcn0, qcn1, qt, qcn0, qcn1)
    data = bdb.sql_execute(data_sql).fetchall()
    data0 = [row[0] for row in data]
    data1 = [row[1] for row in data]
    return (st0, st1, data0, data1)
Esempio n. 2
0
def bql_column_stattypes_and_data(bdb, generator_id, colno0, colno1):
    st0 = core.bayesdb_generator_column_stattype(bdb, generator_id, colno0)
    st1 = core.bayesdb_generator_column_stattype(bdb, generator_id, colno1)
    table_name = core.bayesdb_generator_table(bdb, generator_id)
    qt = sqlite3_quote_name(table_name)
    colname0 = core.bayesdb_generator_column_name(bdb, generator_id, colno0)
    colname1 = core.bayesdb_generator_column_name(bdb, generator_id, colno1)
    qcn0 = sqlite3_quote_name(colname0)
    qcn1 = sqlite3_quote_name(colname1)
    data_sql = '''
        SELECT %s, %s FROM %s WHERE %s IS NOT NULL AND %s IS NOT NULL
    ''' % (qcn0, qcn1, qt, qcn0, qcn1)
    data = bdb.sql_execute(data_sql).fetchall()
    data0 = [row[0] for row in data]
    data1 = [row[1] for row in data]
    return (st0, st1, data0, data1)
Esempio n. 3
0
def bayesdb_generator_cell_value(bdb, generator_id, colno, rowid):
    table_name = core.bayesdb_generator_table(bdb, generator_id)
    qt = bql_quote_name(table_name)
    colname = core.bayesdb_generator_column_name(bdb, generator_id, colno)
    qcn = bql_quote_name(colname)
    sql = 'SELECT %s FROM %s WHERE _rowid_ = ?' % (qcn, qt)
    cursor = bdb.sql_execute(sql, (rowid, ))
    try:
        row = cursor.next()
    except StopIteration:
        assert False, 'Missing row at %d!' % (rowid, )
    else:
        return row[0]
Esempio n. 4
0
def bayesdb_generator_cell_value(bdb, generator_id, colno, rowid):
    table_name = core.bayesdb_generator_table(bdb, generator_id)
    qt = bql_quote_name(table_name)
    colname = core.bayesdb_generator_column_name(bdb, generator_id, colno)
    qcn = bql_quote_name(colname)
    sql = 'SELECT %s FROM %s WHERE _rowid_ = ?' % (qcn, qt)
    cursor = bdb.sql_execute(sql, (rowid,))
    try:
        row = cursor.next()
    except StopIteration:
        assert False, 'Missing row at %d!' % (rowid,)
    else:
        return row[0]
Esempio n. 5
0
 def initialize_models(self, bdb, genid, modelnos, model_config):
     # Initialize internal crosscat, maintaining equality of model numbers.
     # The semantics of INITIALIZE are that it guarantees the existence
     # of a sequence of models up to the requested number of them,
     # and BayesDB computes the numbers that need to be filled in.
     # The inverse of that computation is max(modelnos)+1.
     qg = quote(core.bayesdb_generator_name(bdb, self.cc_id(bdb, genid)))
     bql = 'INITIALIZE {} MODELS FOR {};'.format(max(modelnos)+1, qg)
     bdb.execute(bql)
     # Initialize the foriegn predictors.
     for fcol in self.fcols(bdb, genid):
         # Convert column numbers to names.
         targets = \
             [(core.bayesdb_generator_column_name(bdb, genid, fcol),
               core.bayesdb_generator_column_stattype(bdb, genid, fcol))]
         conditions = \
             [(core.bayesdb_generator_column_name(bdb, genid, pcol),
               core.bayesdb_generator_column_stattype(bdb, genid, pcol))
              for pcol in self.pcols(bdb, genid, fcol)]
         # Initialize the foreign predictor.
         table_name = core.bayesdb_generator_table(bdb, genid)
         predictor_name = self.predictor_name(bdb, genid, fcol)
         builder = self.predictor_builder[predictor_name]
         predictor = builder.create(bdb, table_name, targets, conditions)
         # Store in the database.
         with bdb.savepoint():
             sql = '''
                 UPDATE bayesdb_composer_column_foreign_predictor SET
                     predictor_binary = :predictor_binary
                     WHERE generator_id = :genid AND colno = :colno
             '''
             predictor_binary = builder.serialize(bdb, predictor)
             bdb.sql_execute(sql, {
                 'genid': genid,
                 'predictor_binary': sqlite3.Binary(predictor_binary),
                 'colno': fcol
             })
Esempio n. 6
0
def test_t1_column_value_probability(colno, rowid):
    with analyzed_bayesdb_generator(t1(), 1, 1) as (bdb, generator_id):
        if rowid == 0: rowid = bayesdb_maxrowid(bdb, generator_id)
        value = bayesdb_generator_cell_value(bdb, generator_id, colno, rowid)
        bqlfn.bql_column_value_probability(bdb, generator_id, None, colno,
                                           value)
        table_name = core.bayesdb_generator_table(bdb, generator_id)
        colname = core.bayesdb_generator_column_name(bdb, generator_id, colno)
        qt = bql_quote_name(table_name)
        qc = bql_quote_name(colname)
        sql = '''
            select bql_column_value_probability(?, NULL, ?,
                (select %s from %s where rowid = ?))
        ''' % (qc, qt)
        bdb.sql_execute(sql, (generator_id, colno, rowid)).fetchall()
Esempio n. 7
0
def test_t1_column_value_probability(colno, rowid):
    with analyzed_bayesdb_generator(t1(), 1, 1) as (bdb, generator_id):
        if rowid == 0: rowid = bayesdb_maxrowid(bdb, generator_id)
        value = bayesdb_generator_cell_value(bdb, generator_id, colno, rowid)
        bqlfn.bql_column_value_probability(bdb, generator_id, None, colno,
            value)
        table_name = core.bayesdb_generator_table(bdb, generator_id)
        colname = core.bayesdb_generator_column_name(bdb, generator_id, colno)
        qt = bql_quote_name(table_name)
        qc = bql_quote_name(colname)
        sql = '''
            select bql_column_value_probability(?, NULL, ?,
                (select %s from %s where rowid = ?))
        ''' % (qc, qt)
        bdb.sql_execute(sql, (generator_id, colno, rowid)).fetchall()
Esempio n. 8
0
 def predictor(self, bdb, genid, fcol):
     if (genid, fcol) not in self._predictor_cache(bdb):
         cursor = bdb.sql_execute('''
             SELECT predictor_name, predictor_binary
                 FROM bayesdb_composer_column_foreign_predictor
                 WHERE generator_id = ? AND colno = ?
         ''', (genid, fcol))
         name, binary = cursor.fetchall()[0]
         builder = self.predictor_builder.get(name, None)
         if builder is None:
             raise LookupError('Foreign predictor for column "{}" '
                 'not registered: "{}".'.format(name,
                     core.bayesdb_generator_column_name(bdb, genid, fcol)))
         self._predictor_cache(bdb)[(genid, fcol)] = \
             builder.deserialize(bdb, binary)
     return self._predictor_cache(bdb)[(genid, fcol)]
Esempio n. 9
0
 def _weighted_sample(self, bdb, genid, modelno, row_id, Y, n_samples=None):
     # Returns a pairs of parallel lists ([sample ...], [weight ...])
     # Each `sample` is a dict {col:v} of values for all nodes in
     # the network for one row. Y specifies evidence nodes as (row,
     # col, value) triples: all returned samples have constrained
     # values at the evidence nodes.
     # `weight` is the likelihood of the evidence Y under s\Y.
     if n_samples is None:
         n_samples = self.n_samples
     # Create n_samples dicts, each entry is weighted sample from joint.
     samples = [{c:v for r,c,v in Y if r == row_id}
                for _ in xrange(n_samples)]
     weights = []
     w0 = 0
     # Assess likelihood of evidence at root.
     Y_cc = [(r, c, v) for r,c,v in Y if c in self.lcols(bdb, genid)]
     if Y_cc:
         w0 += self.cc(bdb, genid).logpdf_joint(bdb, self.cc_id(bdb, genid),
             Y_cc, [], modelno)
     # Simulate unobserved ccs.
     Q_cc = [(row_id, c)
             for c in self.lcols(bdb, genid) if c not in samples[0]]
     V_cc = self.cc(bdb, genid).simulate_joint(bdb, self.cc_id(bdb, genid),
         Q_cc, Y_cc, modelno, num_predictions=n_samples)
     for k in xrange(n_samples):
         w = w0
         # Add simulated Q_cc.
         samples[k].update({c:v for (_, c), v in zip(Q_cc, V_cc[k])})
         for fcol in self.topo(bdb, genid):
             pcols = self.pcols(bdb, genid, fcol)
             predictor = self.predictor(bdb, genid, fcol)
             # All parents of FP known (evidence or simulated)?
             assert pcols.issubset(set(samples[k]))
             conditions = {core.bayesdb_generator_column_name(
                 bdb, genid, c):v for c,v in samples[k].iteritems()
                     if c in pcols}
             if fcol in samples[k]:
                 # f is evidence: compute likelihood weight.
                 w += predictor.logpdf(samples[k][fcol], conditions)
             else:
                 # f is latent: simulate from conditional distribution.
                 samples[k][fcol] = predictor.simulate(1, conditions)[0]
         weights.append(w)
     return samples, weights
Esempio n. 10
0
 def create_generator(self, bdb, table, schema, instantiate):
     # Parse the schema.
     (columns, lcols, _fcols, fcol_to_pcols, fcol_to_fpred,
         dependencies) = self.parse(schema)
     # Instantiate **this** generator.
     genid, bdbcolumns = instantiate(columns.items())
     # Create internal crosscat generator. The name will be the same as
     # this generator name, with a _cc suffix.
     SUFFIX = '_cc'
     cc_name = bayeslite.core.bayesdb_generator_name(bdb, genid) + SUFFIX
     # Create strings for crosscat schema.
     cc_cols = ','.join('{} {}'.format(quote(c), quote(columns[c]))
                        for c in lcols)
     cc_dep = []
     for dep, colnames in dependencies:
         qcns = ','.join(map(quote, colnames))
         if dep:
             cc_dep.append('DEPENDENT({})'.format(qcns))
         else:
             cc_dep.append('INDEPENDENT({})'.format(qcns))
     bql = """
         CREATE GENERATOR {} FOR {} USING crosscat(
             {}, {}
         );
     """.format(quote(cc_name), quote(table), cc_cols, ','.join(cc_dep))
     bdb.execute(bql)
     # Convert strings to column numbers.
     fcolno_to_pcolnos = {}
     for f in fcol_to_pcols:
         fcolno = core.bayesdb_generator_column_number(bdb, genid, f)
         fcolno_to_pcolnos[fcolno] = [core.bayesdb_generator_column_number(
             bdb, genid, col) for col in fcol_to_pcols[f]]
     with bdb.savepoint():
         # Save internal cc generator id.
         bdb.sql_execute('''
             INSERT INTO bayesdb_composer_cc_id
                 (generator_id, crosscat_generator_id) VALUES (?,?)
         ''', (genid, core.bayesdb_get_generator(bdb, cc_name),))
         # Save lcols/fcolnos.
         for colno, _, _ in bdbcolumns:
             local = colno not in fcolno_to_pcolnos
             bdb.sql_execute('''
                 INSERT INTO bayesdb_composer_column_owner
                     (generator_id, colno, local) VALUES (?,?,?)
             ''', (genid, colno, int(local),))
         # Save parents of foreign columns.
         for fcolno in fcolno_to_pcolnos:
             for pcolno in fcolno_to_pcolnos[fcolno]:
                 bdb.sql_execute('''
                     INSERT INTO bayesdb_composer_column_parents
                         (generator_id, fcolno, pcolno) VALUES (?,?,?)
                 ''', (genid, fcolno, pcolno,))
         # Save topological order.
         topo = self.topological_sort(fcolno_to_pcolnos)
         for position, (colno, _) in enumerate(topo):
             bdb.sql_execute('''
                 INSERT INTO bayesdb_composer_column_toposort
                     (generator_id, colno, position) VALUES (?,?,?)
                 ''', (genid, colno, position,))
         # Save predictor names of foreign columns.
         for fcolno in fcolno_to_pcolnos:
             fp_name = fcol_to_fpred[casefold(
                 core.bayesdb_generator_column_name(bdb,genid, fcolno))]
             bdb.sql_execute('''
                 INSERT INTO bayesdb_composer_column_foreign_predictor
                     (generator_id, colno, predictor_name) VALUES (?,?,?)
             ''', (genid, fcolno, casefold(fp_name)))