Ejemplo n.º 1
0
 def _joint_logpdf(self, bdb, genid, modelno, Q, Y, n_samples=None):
     # XXX Computes the joint probability of query Q given evidence Y
     # for a single model. The function is a likelihood weighted
     # integrator.
     # XXX Determine.
     if n_samples is None:
         n_samples = self.n_samples
     # Validate inputs.
     if modelno is None:
         raise ValueError('Invalid modelno None, integer requried.')
     if len(Q) == 0:
         raise ValueError('Invalid query Q: len(Q) == 0.')
     # Ensure consistency of any duplicates in Q and Y.
     Q = self._queries_consistent_with_constraints(Q, Y)
     if Q is None:
         return float('-inf')
     for r, _, _ in Q+Y:
         assert r == Q[0][0], "Cannot assess more than one row, "\
             "%s and %s requested" % (Q[0][0], r)
     # (Q,Y) marginal joint density.
     _, QY_weights = self._weighted_sample(bdb, genid, modelno,
         Q[0][0], Q+Y, n_samples=n_samples)
     # Y marginal density.
     _, Y_weights = self._weighted_sample(bdb, genid, modelno,
         Q[0][0], Y, n_samples=n_samples)
     # XXX TODO Keep sampling until logpQY <= logpY
     logpQY = logmeanexp(QY_weights)
     logpY = logmeanexp(Y_weights)
     return logpQY - logpY
Ejemplo n.º 2
0
def bql_row_column_predictive_probability(bdb, population_id, generator_id,
                                          rowid, colno):
    value = core.bayesdb_population_cell_value(bdb, population_id, rowid,
                                               colno)
    if value is None:
        return None
    # Retrieve all other values in the row.
    row_values = core.bayesdb_population_row_values(bdb, population_id, rowid)
    variable_numbers = core.bayesdb_variable_numbers(bdb, population_id, None)
    # Build the constraints and query from rowid, using a fresh rowid.
    fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    query = [(colno, value)]
    constraints = [(col, value)
                   for (col, value) in zip(variable_numbers, row_values)
                   if (value is not None) and (col != colno)]

    def generator_predprob(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.logpdf_joint(bdb, generator_id, fresh_rowid, query,
                                      constraints, None)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
Ejemplo n.º 3
0
def bql_row_column_predictive_probability(bdb, population_id, generator_id,
                                          modelnos, rowid, targets,
                                          constraints):
    targets = json.loads(targets)
    constraints = json.loads(constraints)
    modelnos = _retrieve_modelnos(modelnos)
    # Build the constraints and query from rowid, using a fresh rowid.
    fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)

    def retrieve_values(colnos):
        values = [
            core.bayesdb_population_cell_value(bdb, population_id, rowid,
                                               colno) for colno in colnos
        ]
        return [(c, v) for (c, v) in zip(colnos, values) if v is not None]

    cgpm_targets = retrieve_values(targets)
    # If all targets have NULL values, return None.
    if len(cgpm_targets) == 0:
        return None
    cgpm_constraints = retrieve_values(constraints)

    def generator_predprob(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.logpdf_joint(bdb, generator_id, modelnos, fresh_rowid,
                                      cgpm_targets, cgpm_constraints)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
Ejemplo n.º 4
0
    def logpdf_joint(
        self,
        bdb,
        generator_id,
        modelnos,
        rowid,
        targets,
        _constraints,
    ):
        # Note: The constraints are irrelevant for the same reason as
        # in simulate_joint.
        (all_mus, all_sigmas) = self._all_mus_sigmas(bdb, generator_id)

        def model_log_pdf(modelno):
            mus = all_mus[modelno]
            sigmas = all_sigmas[modelno]

            def logpdf_1((colno, x)):
                return self._logpdf_1(bdb, generator_id, mus, sigmas, colno, x)

            return sum(map(logpdf_1, targets))

        # XXX Ignore modelnos and aggregate over all of them.
        modelwise = [model_log_pdf(m) for m in sorted(all_mus.keys())]
        return logmeanexp(modelwise)
Ejemplo n.º 5
0
def bql_row_column_predictive_probability(
        bdb, population_id, generator_id, modelnos, rowid, targets,
        constraints):
    targets = json.loads(targets)
    constraints = json.loads(constraints)
    modelnos = _retrieve_modelnos(modelnos)
    # Build the constraints and query from rowid, using a fresh rowid.
    fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id)
    def retrieve_values(colnos):
        values = [
            core.bayesdb_population_cell_value(bdb, population_id, rowid, colno)
            for colno in colnos
        ]
        return [(c,v) for (c,v) in zip (colnos, values) if v is not None]
    cgpm_targets = retrieve_values(targets)
    # If all targets have NULL values, return None.
    if len(cgpm_targets) == 0:
        return None
    cgpm_constraints = retrieve_values(constraints)
    def generator_predprob(generator_id):
        backend = core.bayesdb_generator_backend(bdb, generator_id)
        return backend.logpdf_joint(
            bdb, generator_id, modelnos, fresh_rowid, cgpm_targets,
            cgpm_constraints)
    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
Ejemplo n.º 6
0
 def logpdf_joint(self, bdb, generator_id, targets, constraints, modelno):
     if modelno is None:
         modelnos = core.bayesdb_generator_modelnos(bdb, generator_id)
     else:
         modelnos = [modelno]
     with bdb.savepoint():
         return logmeanexp([self._joint_logpdf(bdb, generator_id, modelno,
             targets, constraints) for modelno in modelnos])
Ejemplo n.º 7
0
 def logpdf_joint(self, bdb, generator_id, targets, _constraints,
         modelno=None):
     # Note: The constraints are irrelevant for the same reason as
     # in simulate_joint.
     (all_mus, all_sigmas) = self._all_mus_sigmas(bdb, generator_id)
     def model_log_pdf(modelno):
         return sum(logpdf_gaussian(value, all_mus[modelno][colno],
                        all_sigmas[modelno][colno])
                    for (_, colno, value) in targets)
     modelwise = [model_log_pdf(m) for m in sorted(all_mus.keys())]
     return logmeanexp(modelwise)
Ejemplo n.º 8
0
 def logpdf_joint(self, bdb, generator_id, modelnos, rowid, targets,
         _constraints,):
     # Note: The constraints are irrelevant for the same reason as
     # in simulate_joint.
     (all_mus, all_sigmas) = self._all_mus_sigmas(bdb, generator_id)
     def model_log_pdf(modelno):
         mus = all_mus[modelno]
         sigmas = all_sigmas[modelno]
         def logpdf_1((colno, x)):
             return self._logpdf_1(bdb, generator_id, mus, sigmas, colno, x)
         return sum(map(logpdf_1, targets))
     # XXX Ignore modelnos and aggregate over all of them.
     modelwise = [model_log_pdf(m) for m in sorted(all_mus.keys())]
     return logmeanexp(modelwise)
Ejemplo n.º 9
0
def bql_row_column_predictive_probability(bdb, population_id, generator_id,
        rowid, colno):
    value = core.bayesdb_population_cell_value(
        bdb, population_id, rowid, colno)
    if value is None:
        return None
    def generator_predprob(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.logpdf_joint(
            bdb, generator_id, [(rowid, colno, value)], [], None)
    generator_ids = [generator_id] if generator_id is not None else \
        core.bayesdb_population_generators(bdb, population_id)
    predprobs = map(generator_predprob, generator_ids)
    r = logmeanexp(predprobs)
    return ieee_exp(r)
Ejemplo n.º 10
0
def test_logmeanexp():
    assert logmeanexp([-1000., -1000.]) == -1000.
    assert relerr(math.log(0.5 * (1 + math.exp(-1.))), logmeanexp([0., -1.])) \
        < 1e-15
    assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15
Ejemplo n.º 11
0
def test_logmeanexp():
    assert logmeanexp([-1000., -1000.]) == -1000.
    assert relerr(math.log(0.5 * (1 + math.exp(-1.))), logmeanexp([0., -1.])) \
        < 1e-15
    assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15