Esempio n. 1
0
def test_bayesdb_generator_fresh_row_id():
    with bayesdb_generator(bayesdb(), 't1', 't1_cc', t1_schema, lambda x: 0,\
            columns=['label CATEGORICAL', 'age NUMERICAL', 'weight NUMERICAL'])\
            as (bdb, generator_id):
        assert core.bayesdb_generator_fresh_row_id(bdb, generator_id) == 1
        t1_data(bdb)
        assert core.bayesdb_generator_fresh_row_id(bdb, generator_id) == \
            len(t1_rows) + 1
Esempio n. 2
0
def test_bayesdb_generator_fresh_row_id():
    with bayesdb_generator(bayesdb(), 't1', 't1_cc', t1_schema, lambda x: 0,\
            columns=['label CATEGORICAL', 'age NUMERICAL', 'weight NUMERICAL'])\
            as (bdb, generator_id):
        assert core.bayesdb_generator_fresh_row_id(bdb, generator_id) == 1
        t1_data(bdb)
        assert core.bayesdb_generator_fresh_row_id(bdb, generator_id) == \
            len(t1_rows) + 1
Esempio n. 3
0
def test_bayesdb_generator_fresh_row_id():
    with bayesdb_generator(
        bayesdb(),
        "t1",
        "t1_cc",
        t1_schema,
        lambda x: 0,
        columns=["label CATEGORICAL", "age NUMERICAL", "weight NUMERICAL"],
    ) as (bdb, generator_id):
        assert core.bayesdb_generator_fresh_row_id(bdb, generator_id) == 1
        t1_data(bdb)
        assert core.bayesdb_generator_fresh_row_id(bdb, generator_id) == len(t1_rows) + 1
Esempio n. 4
0
def bayesdb_simulate(bdb,
                     generator_id,
                     constraints,
                     colnos,
                     modelno=None,
                     numpredictions=1):
    """Simulate rows from a generative model, subject to constraints.

    Returns a list of `numpredictions` tuples, with a value for each
    column specified in the list `colnos`, conditioned on the
    constraints in the list `constraints` of tuples ``(colno,
    value)``.

    The results are simulated from the predictive distribution on
    fresh rows.

    """
    metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
    fake_rowid = core.bayesdb_generator_fresh_row_id(bdb, generator_id)
    targets = [(fake_rowid, colno) for colno in colnos]
    if constraints is not None:
        constraints = [(fake_rowid, colno, val) for colno, val in constraints]
    return metamodel.simulate_joint(bdb,
                                    generator_id,
                                    targets,
                                    constraints,
                                    modelno,
                                    num_predictions=numpredictions)
Esempio n. 5
0
def bql_pdf_joint(bdb, generator_id, modelno, *args):
    metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
    # A nonexistent (`unobserved') row id.
    fake_row_id = core.bayesdb_generator_fresh_row_id(bdb, generator_id)
    i = 0
    targets = []
    while i < len(args):
        if args[i] == -1:
            i += 1
            break
        if i + 1 == len(args):
            raise ValueError('Missing logpdf target value: %r' % (args[i], ))
        t_colno = args[i]
        t_value = args[i + 1]
        targets.append((fake_row_id, t_colno, t_value))
        i += 2
    constraints = []
    while i < len(args):
        if i + 1 == len(args):
            raise ValueError('Missing logpdf constraint value: %r' %
                             (args[i], ))
        c_colno = args[i]
        c_value = args[i + 1]
        constraints.append((fake_row_id, c_colno, c_value))
        i += 2
    logp = metamodel.logpdf_joint(bdb, generator_id, targets, constraints,
                                  modelno)
    return ieee_exp(logp)
Esempio n. 6
0
def bql_pdf_joint(bdb, generator_id, modelno, *args):
    metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
    # A nonexistent (`unobserved') row id.
    fake_row_id = core.bayesdb_generator_fresh_row_id(bdb, generator_id)
    i = 0
    targets = []
    while i < len(args):
        if args[i] == -1:
            i += 1
            break
        if i + 1 == len(args):
            raise ValueError('Missing logpdf target value: %r' % (args[i],))
        t_colno = args[i]
        t_value = args[i + 1]
        targets.append((fake_row_id, t_colno, t_value))
        i += 2
    constraints = []
    while i < len(args):
        if i + 1 == len(args):
            raise ValueError('Missing logpdf constraint value: %r' %
                (args[i],))
        c_colno = args[i]
        c_value = args[i + 1]
        constraints.append((fake_row_id, c_colno, c_value))
        i += 2
    logp = metamodel.logpdf_joint(bdb, generator_id, targets, constraints,
        modelno)
    return ieee_exp(logp)
Esempio n. 7
0
 def column_mutual_information(self, bdb, genid, modelno, colno0, colno1,
         numsamples=None):
     if numsamples is None:
         numsamples = self.n_samples
     # XXX Aggregator only.
     row_id = core.bayesdb_generator_fresh_row_id(bdb, genid)
     X = [(row_id, colno0)]
     W = [(row_id, colno1)]
     Z = Y = []
     if modelno is None:
         modelnos = core.bayesdb_generator_modelnos(bdb, genid)
     else:
         modelnos = [modelno]
     with bdb.savepoint():
         mi = sum(self.conditional_mutual_information(
                   bdb, genid, modelno, X, W, Z, Y)
                  for modelno in modelnos) / float(len(modelnos))
     return mi
Esempio n. 8
0
def bql_column_value_probability(bdb, generator_id, modelno, colno, value,
                                 *constraint_args):
    metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
    # A nonexistent (`unobserved') row id.
    fake_row_id = core.bayesdb_generator_fresh_row_id(bdb, generator_id)
    constraints = []
    i = 0
    while i < len(constraint_args):
        if i + 1 == len(constraint_args):
            raise ValueError('Odd constraint arguments: %s' %
                             (constraint_args, ))
        constraint_colno = constraint_args[i]
        constraint_value = constraint_args[i + 1]
        constraints.append((fake_row_id, constraint_colno, constraint_value))
        i += 2
    targets = [(fake_row_id, colno, value)]
    r = metamodel.logpdf_joint(bdb, generator_id, targets, constraints,
                               modelno)
    return ieee_exp(r)
Esempio n. 9
0
def bql_column_value_probability(bdb, generator_id, modelno, colno, value,
        *constraint_args):
    metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
    # A nonexistent (`unobserved') row id.
    fake_row_id = core.bayesdb_generator_fresh_row_id(bdb, generator_id)
    constraints = []
    i = 0
    while i < len(constraint_args):
        if i + 1 == len(constraint_args):
            raise ValueError('Odd constraint arguments: %s' %
                (constraint_args,))
        constraint_colno = constraint_args[i]
        constraint_value = constraint_args[i + 1]
        constraints.append((fake_row_id, constraint_colno, constraint_value))
        i += 2
    targets = [(fake_row_id, colno, value)]
    r = metamodel.logpdf_joint(
        bdb, generator_id, targets, constraints, modelno)
    return ieee_exp(r)
Esempio n. 10
0
def bayesdb_simulate(bdb, generator_id, constraints, colnos,
        modelno=None, numpredictions=1):
    """Simulate rows from a generative model, subject to constraints.

    Returns a list of `numpredictions` tuples, with a value for each
    column specified in the list `colnos`, conditioned on the
    constraints in the list `constraints` of tuples ``(colno,
    value)``.

    The results are simulated from the predictive distribution on
    fresh rows.

    """
    metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
    fake_rowid = core.bayesdb_generator_fresh_row_id(bdb, generator_id)
    targets = [(fake_rowid, colno) for colno in colnos]
    if constraints is not None:
        constraints = [(fake_rowid, colno, val)
                       for colno, val in constraints]
    return metamodel.simulate_joint(bdb, generator_id, targets,
        constraints, modelno, num_predictions=numpredictions)
Esempio n. 11
0
    def predict_confidence(self,
                           bdb,
                           generator_id,
                           modelno,
                           colno,
                           rowid,
                           numsamples=None):
        if not numsamples:
            numsamples = 2
        assert numsamples > 0

        def _impute_categorical(sample):
            counts = Counter(s[0] for s in sample)
            mode_count = max(counts[v] for v in counts)
            pred = iter(v for v in counts if counts[v] == mode_count).next()
            conf = float(mode_count) / numsamples
            return pred, conf

        def _impute_numerical(sample):
            pred = sum(s[0] for s in sample) / float(len(sample))
            conf = 0  # XXX Punt confidence for now
            return pred, conf

        constraints = []
        # If rowid is a hypothetical cell for cgpm (did not exist at the time
        # of INITIALIZE), but exists in the base table (by INSERT INTO), then
        # retrieve all values for rowid as the constraints.
        exists = rowid < core.bayesdb_generator_fresh_row_id(bdb, generator_id)
        max_cgpm_rowid = bdb.sql_execute(
            '''
            SELECT MAX(table_rowid) FROM bayesdb_cgpm_individual
            WHERE generator_id = ?
        ''', (generator_id, )).fetchall()[0][0]
        hypothetical = rowid > max_cgpm_rowid
        if exists and hypothetical:
            population_id = core.bayesdb_generator_population(
                bdb, generator_id)
            # Retrieve all other variables except colno, and ignore latents in
            # generator_id, and place them in the constraints.
            pop_names = core.bayesdb_variable_names(bdb, population_id, None)
            avoid_name = core.bayesdb_variable_name(bdb, population_id, colno)
            constraints_names = [n for n in pop_names if n != avoid_name]
            # Obtain the row.
            qt_names = str.join(',', map(sqlite3_quote_name,
                                         constraints_names))
            qt_table = sqlite3_quote_name(
                core.bayesdb_population_table(bdb, population_id))
            data = bdb.sql_execute(
                '''
                SELECT %s FROM %s WHERE oid = ?
            ''' % (
                    qt_names,
                    qt_table,
                ), (rowid, )).fetchall()[0]
            # Build the constraints.
            pop_nos = core.bayesdb_variable_numbers(bdb, population_id, None)
            constraints_nos = [n for n in pop_nos if n != colno]
            # import ipdb; ipdb.set_trace()
            assert len(data) == len(constraints_nos)
            constraints = [(rowid, c, v)
                           for c, v in zip(constraints_nos, data)
                           if (v is not None) and v]

        # Retrieve the samples.
        sample = self.simulate_joint(bdb, generator_id, [(rowid, colno)],
                                     constraints, modelno, numsamples)

        # Determine the imputation strategy (mode or mean).
        stattype = core.bayesdb_variable_stattype(
            bdb, core.bayesdb_generator_population(bdb, generator_id), colno)
        if _is_categorical(stattype):
            return _impute_categorical(sample)
        else:
            return _impute_numerical(sample)