コード例 #1
0
ファイル: test_core.py プロジェクト: utanapishtim/bayeslite
def test_bayesdb_population_fresh_row_id():
    with bayesdb_population(
            bayesdb(), 't1', 'p1', 'p1_cc', t1_schema, lambda x: 0,\
            columns=['id IGNORE','label NOMINAL', 'age NUMERICAL',
                'weight NUMERICAL'])\
            as (bdb, population_id, generator_id):
        assert core.bayesdb_population_fresh_row_id(bdb, population_id) == 1
        t1_data(bdb)
        assert core.bayesdb_population_fresh_row_id(bdb, population_id) == \
            len(t1_rows) + 1
コード例 #2
0
def test_bayesdb_population_fresh_row_id():
    with bayesdb_population(
            bayesdb(), 't1', 'p1', 'p1_cc', t1_schema, lambda x: 0,\
            columns=['id IGNORE','label NOMINAL', 'age NUMERICAL',
                'weight NUMERICAL'])\
            as (bdb, population_id, _generator_id):
        assert core.bayesdb_population_fresh_row_id(bdb, population_id) == 1
        t1_data(bdb)
        n_rows = len(t1_rows)
        for rowid in xrange(n_rows):
            assert core.bayesdb_table_has_rowid(bdb, 't1', rowid + 1)
        assert not core.bayesdb_table_has_rowid(bdb, 't1', n_rows + 1)
        assert core.bayesdb_population_fresh_row_id(bdb, population_id) == \
            n_rows + 1
コード例 #3
0
ファイル: test_core.py プロジェクト: probcomp/bayeslite
def test_bayesdb_population_fresh_row_id():
    with bayesdb_population(
            bayesdb(), 't1', 'p1', 'p1_cc', t1_schema, lambda x: 0,\
            columns=['id IGNORE','label NOMINAL', 'age NUMERICAL',
                'weight NUMERICAL'])\
            as (bdb, population_id, _generator_id):
        assert core.bayesdb_population_fresh_row_id(bdb, population_id) == 1
        t1_data(bdb)
        n_rows = len(t1_rows)
        for rowid in xrange(n_rows):
            assert core.bayesdb_table_has_rowid(bdb, 't1', rowid+1)
        assert not core.bayesdb_table_has_rowid(bdb, 't1', n_rows+1)
        assert core.bayesdb_population_fresh_row_id(bdb, population_id) == \
            n_rows + 1
コード例 #4
0
def bql_row_column_predictive_probability(bdb, population_id, generator_id,
                                          rowid, colno):
    value = core.bayesdb_population_cell_value(bdb, population_id, rowid,
                                               colno)
    if value is None:
        return None
    # Retrieve all other values in the row.
    row_values = core.bayesdb_population_row_values(bdb, population_id, rowid)
    variable_numbers = core.bayesdb_variable_numbers(bdb, population_id, None)
    # Build the constraints and query from rowid, using a fresh rowid.
    fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    query = [(colno, value)]
    constraints = [(col, value)
                   for (col, value) in zip(variable_numbers, row_values)
                   if (value is not None) and (col != colno)]

    def generator_predprob(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.logpdf_joint(bdb, generator_id, fresh_rowid, query,
                                      constraints, None)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
コード例 #5
0
def bql_row_column_predictive_probability(bdb, population_id, generator_id,
                                          modelnos, rowid, targets,
                                          constraints):
    targets = json.loads(targets)
    constraints = json.loads(constraints)
    modelnos = _retrieve_modelnos(modelnos)
    # Build the constraints and query from rowid, using a fresh rowid.
    fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)

    def retrieve_values(colnos):
        values = [
            core.bayesdb_population_cell_value(bdb, population_id, rowid,
                                               colno) for colno in colnos
        ]
        return [(c, v) for (c, v) in zip(colnos, values) if v is not None]

    cgpm_targets = retrieve_values(targets)
    # If all targets have NULL values, return None.
    if len(cgpm_targets) == 0:
        return None
    cgpm_constraints = retrieve_values(constraints)

    def generator_predprob(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.logpdf_joint(bdb, generator_id, modelnos, fresh_rowid,
                                      cgpm_targets, cgpm_constraints)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
コード例 #6
0
ファイル: bqlfn.py プロジェクト: number0/bayeslite
def bql_pdf_joint(bdb, population_id, generator_id, *args):
    # A nonexistent (`unobserved') row id.
    fake_row_id = core.bayesdb_population_fresh_row_id(bdb, population_id)
    i = 0
    targets = []
    while i < len(args):
        if args[i] is None:
            i += 1
            break
        if i + 1 == len(args):
            raise ValueError('Missing logpdf target value: %r' % (args[i], ))
        t_colno = args[i]
        t_value = args[i + 1]
        targets.append((fake_row_id, t_colno, t_value))
        i += 2
    constraints = []
    while i < len(args):
        if i + 1 == len(args):
            raise ValueError('Missing logpdf constraint value: %r' %
                             (args[i], ))
        c_colno = args[i]
        c_value = args[i + 1]
        constraints.append((fake_row_id, c_colno, c_value))
        i += 2
    logp = _bql_logpdf(bdb, population_id, generator_id, targets, constraints)
    return ieee_exp(logp)
コード例 #7
0
ファイル: bqlfn.py プロジェクト: probcomp/bayeslite
def bql_row_column_predictive_probability(
        bdb, population_id, generator_id, modelnos, rowid, targets,
        constraints):
    targets = json.loads(targets)
    constraints = json.loads(constraints)
    modelnos = _retrieve_modelnos(modelnos)
    # Build the constraints and query from rowid, using a fresh rowid.
    fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    def retrieve_values(colnos):
        values = [
            core.bayesdb_population_cell_value(bdb, population_id, rowid, colno)
            for colno in colnos
        ]
        return [(c,v) for (c,v) in zip (colnos, values) if v is not None]
    cgpm_targets = retrieve_values(targets)
    # If all targets have NULL values, return None.
    if len(cgpm_targets) == 0:
        return None
    cgpm_constraints = retrieve_values(constraints)
    def generator_predprob(generator_id):
        backend = core.bayesdb_generator_backend(bdb, generator_id)
        return backend.logpdf_joint(
            bdb, generator_id, modelnos, fresh_rowid, cgpm_targets,
            cgpm_constraints)
    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
コード例 #8
0
ファイル: bqlfn.py プロジェクト: PeterZs/bayeslite
def bayesdb_simulate(bdb, population_id, constraints, colnos,
        generator_id=None, numpredictions=1, accuracy=None):
    """Simulate rows from a generative model, subject to constraints.

    Returns a list of `numpredictions` tuples, with a value for each
    column specified in the list `colnos`, conditioned on the
    constraints in the list `constraints` of tuples ``(colno,
    value)``.

    The results are simulated from the predictive distribution on
    fresh rows.
    """
    rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    if constraints is not None:
        user_rowid = [
            v for c, v in constraints
            if c in core.bayesdb_rowid_tokens(bdb)
        ]
        if len(user_rowid) == 1:
            rowid = user_rowid[0]
        elif len(user_rowid) > 1:
            raise BQLError(bdb, 'Multiple rowids given: %s.' % (constraints,))
        constraints = [
            (rowid, c, v) for c, v in constraints
            if c not in core.bayesdb_rowid_tokens(bdb)
        ]
    targets = [(rowid, colno) for colno in colnos]
    def loglikelihood(generator_id, metamodel):
        if not constraints:
            return 0
        return metamodel.logpdf_joint(
            bdb, generator_id, constraints, [], None)
    def simulate(generator_id, metamodel, n):
        return metamodel.simulate_joint(
            bdb, generator_id, targets,
            constraints, None, num_predictions=n, accuracy=accuracy)
    generator_ids = [generator_id] if generator_id is not None else \
        core.bayesdb_population_generators(bdb, population_id)
    metamodels = [core.bayesdb_generator_metamodel(bdb, generator_id)
        for generator_id in generator_ids]
    if len(generator_ids) > 1:
        loglikelihoods = map(loglikelihood, generator_ids, metamodels)
        likelihoods = map(math.exp, loglikelihoods)
        total_likelihood = sum(likelihoods)
        if total_likelihood == 0:
            # XXX Show the constraints with symbolic names.
            raise BQLError(bdb, 'Impossible constraints: %r' % (constraints,))
        probabilities = [likelihood/total_likelihood
            for likelihood in likelihoods]
        countses = bdb.np_prng.multinomial(
            numpredictions, probabilities, size=1)
        counts = countses[0]
    else:
        counts = [numpredictions]
    rowses = map(simulate, generator_ids, metamodels, counts)
    all_rows = [row for rows in rowses for row in rows]
    assert all(isinstance(row, (tuple, list)) for row in all_rows)
    return all_rows
コード例 #9
0
def _retrieve_rowid_constraints(bdb, population_id, constraints):
    rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    if constraints:
        user_rowid = [
            v for c, v in constraints if c in core.bayesdb_rowid_tokens(bdb)
        ]
        if len(user_rowid) == 1:
            rowid = user_rowid[0]
        elif len(user_rowid) > 1:
            raise BQLError(bdb, 'Multiple rowids given: %s.' % (constraints, ))
        constraints = [(c, v) for c, v in constraints
                       if c not in core.bayesdb_rowid_tokens(bdb)]
    return rowid, constraints
コード例 #10
0
ファイル: bqlfn.py プロジェクト: probcomp/bayeslite
def _retrieve_rowid_constraints(bdb, population_id, constraints):
    rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    if constraints:
        user_rowid = [
            v for c, v in constraints
            if c in core.bayesdb_rowid_tokens(bdb)
        ]
        if len(user_rowid) == 1:
            rowid = user_rowid[0]
        elif len(user_rowid) > 1:
            raise BQLError(bdb, 'Multiple rowids given: %s.' % (constraints,))
        constraints = [
            (c, v) for c, v in constraints
            if c not in core.bayesdb_rowid_tokens(bdb)
        ]
    return rowid, constraints
コード例 #11
0
ファイル: bqlfn.py プロジェクト: number0/bayeslite
def bql_column_value_probability(bdb, population_id, generator_id, colno,
                                 value, *constraint_args):
    # A nonexistent (`unobserved') row id.
    fake_row_id = core.bayesdb_population_fresh_row_id(bdb, population_id)
    constraints = []
    i = 0
    while i < len(constraint_args):
        if i + 1 == len(constraint_args):
            raise ValueError('Odd constraint arguments: %s' %
                             (constraint_args, ))
        constraint_colno = constraint_args[i]
        constraint_value = constraint_args[i + 1]
        constraints.append((fake_row_id, constraint_colno, constraint_value))
        i += 2
    targets = [(fake_row_id, colno, value)]
    logp = _bql_logpdf(bdb, population_id, generator_id, targets, constraints)
    return ieee_exp(logp)
コード例 #12
0
def bdb():
    bdb = bayesdb_open(':memory:')

    # Create the population of complements.
    bdb.sql_execute('CREATE TABLE t (a TEXT, b TEXT)')
    for _ in xrange(20):
        bdb.sql_execute('INSERT INTO t (a, b) VALUES (0,1)')
    for _ in xrange(20):
        bdb.sql_execute('INSERT INTO t (a, b) VALUES (1,0)')

    # Create the population and metamodel on the existing rows.
    bdb.execute('CREATE POPULATION p FOR t (MODEL a, b AS NOMINAL)')
    bdb.execute('CREATE METAMODEL m FOR p;')
    bdb.execute('INITIALIZE 1 MODELS FOR m;')
    bdb.execute('ANALYZE m FOR 1000 ITERATION WAIT (OPTIMIZED);')

    # Add new 'hypothetical' rows into the base table to serve as out-of-
    # sample probe points; only zeros, only ones, and nothing.
    for _ in xrange(40, 50):
        bdb.sql_execute('INSERT INTO t (a) VALUES (0)')
    for _ in xrange(50, 60):
        bdb.sql_execute('INSERT INTO t (b) VALUES (1)')
    for _ in xrange(60, 80):
        bdb.sql_execute('INSERT INTO t (a,b) VALUES (NULL, NULL)')

    # Make sure fresh_row_id 80 from the base table, not metamodel.
    population_id = bayesdb_get_population(bdb, 'p')
    assert bayesdb_population_fresh_row_id(bdb, population_id) == 81

    # Make sure the cgpm only has 40 rowids incorporated.
    generator_id = bayesdb_get_generator(bdb, population_id, 'm')
    cursor = bdb.sql_execute(
        '''
        SELECT MAX(table_rowid) FROM bayesdb_cgpm_individual
        WHERE generator_id = ?
    ''', (generator_id, ))
    assert cursor_value(cursor) == 40

    # Turn off multiprocessing for sequence of queries.
    bdb.metamodels['cgpm'].set_multiprocess(False)
    return bdb
コード例 #13
0
def bdb():
    bdb = bayesdb_open(':memory:')

    # Create the population of complements.
    bdb.sql_execute('CREATE TABLE t (a TEXT, b TEXT)')
    for _ in xrange(20):
        bdb.sql_execute('INSERT INTO t (a, b) VALUES (0,1)')
    for _ in xrange(20):
        bdb.sql_execute('INSERT INTO t (a, b) VALUES (1,0)')

    # Create the population and generator on the existing rows.
    bdb.execute('CREATE POPULATION p FOR t (SET STATTYPES OF a, b TO NOMINAL)')
    bdb.execute('CREATE GENERATOR m FOR p;')
    bdb.execute('INITIALIZE 1 MODELS FOR m;')
    bdb.execute('ANALYZE m FOR 1000 ITERATION (OPTIMIZED);')

    # Add new 'hypothetical' rows into the base table to serve as out-of-
    # sample probe points; only zeros, only ones, and nothing.
    for _ in xrange(40, 50):
        bdb.sql_execute('INSERT INTO t (a) VALUES (0)')
    for _ in xrange(50, 60):
        bdb.sql_execute('INSERT INTO t (b) VALUES (1)')
    for _ in xrange(60, 80):
        bdb.sql_execute('INSERT INTO t (a,b) VALUES (NULL, NULL)')

    # Make sure fresh_row_id 80 from the base table, not generator.
    population_id = bayesdb_get_population(bdb, 'p')
    assert bayesdb_population_fresh_row_id(bdb, population_id) == 81

    # Make sure the cgpm only has 40 rowids incorporated.
    generator_id = bayesdb_get_generator(bdb, population_id, 'm')
    cursor = bdb.sql_execute('''
        SELECT MAX(table_rowid) FROM bayesdb_cgpm_individual
        WHERE generator_id = ?
    ''', (generator_id,))
    assert cursor_value(cursor) == 40

    # Turn off multiprocessing for sequence of queries.
    bdb.backends['cgpm'].set_multiprocess(False)
    return bdb
コード例 #14
0
    def simulate_joint(self,
                       bdb,
                       generator_id,
                       modelnos,
                       rowid,
                       targets,
                       constraints,
                       num_samples=1,
                       accuracy=None):
        # Retrieve the population id.
        population_id = bayesdb_generator_population(bdb, generator_id)

        # If rowid exists, retrieve conditioning data from the table.
        if rowid != bayesdb_population_fresh_row_id(bdb, generator_id):
            row_values_raw = bayesdb_population_row_values(
                bdb, population_id, rowid)
            row_values = [
                str(a) if isinstance(a, unicode) else a for a in row_values_raw
            ]
            row = [
                entry for entry in enumerate(row_values)
                if entry[1] is not None
            ]
            constraints_colnos = [c[0] for c in constraints]
            row_colnos = [r[0] for r in row]
            if any([colno in constraints_colnos for colno in row_colnos]):
                raise BQLError(bdb, 'Overlap between constraints and' \
                    'target row in simulate.')
            constraints.extend(row)

        # Prepare the query row to provide to Loom.
        row = {}
        target_num_to_name = {}
        for colno in targets:
            name = bayesdb_variable_name(bdb, generator_id, None, colno)
            target_num_to_name[colno] = name
            row[name] = ''
        for (colno, value) in constraints:
            name = bayesdb_variable_name(bdb, generator_id, None, colno)
            row[name] = value

        # Fetch the server.
        server = self._get_cache_entry(bdb, generator_id, 'preql_server')

        # Prepare the csv header.
        csv_headers, csv_values = zip(*row.iteritems())
        lower_to_upper = {str(a).lower(): str(a) for a in csv_headers}
        csv_headers = lower_to_upper.keys()
        csv_values = [str(a) for a in csv_values]

        # Retrieve the samples from the server..
        outfile = StringIO()
        writer = loom.preql.CsvWriter(outfile, returns=outfile.getvalue)
        reader = iter([csv_headers] + [csv_values])
        server._predict(reader, num_samples, writer, False)
        output = writer.result()

        # Parse output.
        returned_headers = [
            lower_to_upper[a]
            for a in output.strip().split('\r\n')[0].split(CSV_DELIMITER)
        ]
        loom_output = [
            zip(returned_headers, a.split(CSV_DELIMITER))
            for a in output.strip().split('\r\n')[1:]
        ]
        return_list = []
        for row in loom_output:
            # Prepare the row.
            row_values = []
            row_dict = dict(row)
            for colno in targets:
                colname = target_num_to_name[colno]
                value = row_dict[colname]
                stattype = bayesdb_variable_stattype(bdb, population_id, None,
                                                     colno)
                if not _is_nominal(stattype):
                    value = float(value)
                row_values.append(value)
            # Add this row to the return list.
            return_list.append(row_values)

        return return_list