Exemple #1
0
def bql_row_predictive_relevance(
        bdb, population_id, generator_id, modelnos, rowid_target, rowid_query,
        colno, *constraint_args):
    if rowid_target is None:
        raise BQLError(bdb, 'No such target row for SIMILARITY')
    rowid_query = json.loads(rowid_query)
    modelnos = _retrieve_modelnos(modelnos)
    # Build the list of hypothetical values.
    # Each sequence of values is separated by None to demarcate between rows.
    splits = [-1] + [i for i, x in enumerate(constraint_args) if x is None]
    assert splits[-1] == len(constraint_args) - 1
    rows_list = [
        constraint_args[splits[i]+1:splits[i+1]]
        for i in range(len(splits)-1)
    ]
    assert all(len(row)%2 == 0 for row in rows_list)
    hypotheticals = [zip(row[::2], row[1::2]) for row in rows_list]
    if len(rowid_query) == 0 and len(hypotheticals) == 0:
        raise BQLError(bdb, 'No matching rows for PREDICTIVE RELEVANCE.')
    def generator_similarity(generator_id):
        backend = core.bayesdb_generator_backend(bdb, generator_id)
        return backend.predictive_relevance(
            bdb, generator_id, modelnos, rowid_target, rowid_query,
            hypotheticals, colno)
    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    sims = map(generator_similarity, generator_ids)
    return stats.arithmetic_mean([stats.arithmetic_mean(s) for s in sims])
Exemple #2
0
def bql_row_predictive_relevance(bdb, population_id, generator_id, modelnos,
                                 rowid_target, rowid_query, colno,
                                 *constraint_args):
    if rowid_target is None:
        raise BQLError(bdb, 'No such target row for SIMILARITY')
    rowid_query = json.loads(rowid_query)
    modelnos = _retrieve_modelnos(modelnos)
    # Build the list of hypothetical values.
    # Each sequence of values is separated by None to demarcate between rows.
    splits = [-1] + [i for i, x in enumerate(constraint_args) if x is None]
    assert splits[-1] == len(constraint_args) - 1
    rows_list = [
        constraint_args[splits[i] + 1:splits[i + 1]]
        for i in range(len(splits) - 1)
    ]
    assert all(len(row) % 2 == 0 for row in rows_list)
    hypotheticals = [zip(row[::2], row[1::2]) for row in rows_list]
    if len(rowid_query) == 0 and len(hypotheticals) == 0:
        raise BQLError(bdb, 'No matching rows for PREDICTIVE RELEVANCE.')

    def generator_similarity(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.predictive_relevance(bdb, generator_id, modelnos,
                                              rowid_target, rowid_query,
                                              hypotheticals, colno)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    sims = map(generator_similarity, generator_ids)
    return stats.arithmetic_mean([stats.arithmetic_mean(s) for s in sims])
Exemple #3
0
def bql_column_mutual_information(bdb, population_id, generator_id, colno0,
                                  colno1, numsamples, *constraint_args):
    mutinfs = _bql_column_mutual_information(bdb, population_id, generator_id,
                                             colno0, colno1, numsamples,
                                             *constraint_args)
    # XXX This integral of the CMI returned by each model of all generators in
    # in the population is wrong! At least, it does not directly correspond to
    # any meaningful probabilistic quantity, other than literally the mean CMI
    # averaged over all population models.
    return stats.arithmetic_mean([stats.arithmetic_mean(m) for m in mutinfs])
Exemple #4
0
def bql_column_mutual_information(
        bdb, population_id, generator_id, modelnos, colnos0, colnos1,
        numsamples, *constraint_args):
    colnos0 = json.loads(colnos0)
    colnos1 = json.loads(colnos1)
    modelnos = _retrieve_modelnos(modelnos)
    mutinfs = _bql_column_mutual_information(
        bdb, population_id, generator_id, modelnos, colnos0, colnos1,
        numsamples, *constraint_args)
    # XXX This integral of the CMI returned by each model of all generators in
    # in the population is wrong! At least, it does not directly correspond to
    # any meaningful probabilistic quantity, other than literally the mean CMI
    # averaged over all population models.
    return stats.arithmetic_mean([stats.arithmetic_mean(m) for m in mutinfs])
Exemple #5
0
 def generator_similarity(generator_id):
     metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
     # XXX Change [colno] to colno by updating IBayesDBMetamodel.
     similarity_list = metamodel.row_similarity(bdb, generator_id, modelnos,
                                                rowid, target_rowid,
                                                [colno])
     return stats.arithmetic_mean(similarity_list)
Exemple #6
0
    def column_mutual_information(self,
                                  bdb,
                                  generator_id,
                                  modelno,
                                  colno0,
                                  colno1,
                                  constraints=None,
                                  numsamples=None):
        # XXX Default number of samples drawn from my arse.
        if numsamples is None:
            numsamples = 1000

        # Get the engine.
        engine = self._engine(bdb, generator_id)

        # Engine gives us a list of samples which it is our
        # responsibility to integrate over.
        mi_list = engine.mutual_information(colno0,
                                            colno1,
                                            evidence=constraints,
                                            N=numsamples,
                                            multiprocess=self._ncpu)

        # XXX Is this integral correct?  Should it be weighted?
        return arithmetic_mean(mi_list)
 def column_mutual_information(self, bdb, generator_id, modelnos, colnos0,
                               colnos1, constraints, numsamples):
     population_id = bayesdb_generator_population(bdb, generator_id)
     colnames0 = [
         str(bayesdb_variable_name(bdb, population_id, generator_id, colno))
         for colno in colnos0
     ]
     colnames1 = [
         str(bayesdb_variable_name(bdb, population_id, generator_id, colno))
         for colno in colnos1
     ]
     server = self._get_preql_server(bdb, generator_id)
     target_set = server._cols_to_mask(server.encode_set(colnames0))
     query_set = server._cols_to_mask(server.encode_set(colnames1))
     if self._marginize_cmi(constraints):
         inner_numsamples = numsamples
         conditioning_rows_loom_format = self._get_constraint_rows(
             constraints, bdb, generator_id, population_id, modelnos,
             server, inner_numsamples)
     else:
         conditioning_rows_loom_format = [
             self._get_constraint_row(constraints, bdb, generator_id,
                                      population_id, server)
         ]
     mi_estimates = [
         server._query_server.mutual_information(
             target_set,
             query_set,
             entropys=None,
             sample_count=loom.preql.SAMPLE_COUNT,
             conditioning_row=conditioning_row_loom_format).mean
         for conditioning_row_loom_format in conditioning_rows_loom_format
     ]
     # Output requires an iterable.
     return [arithmetic_mean(mi_estimates)]
Exemple #8
0
def bql_column_dependence_probability(bdb, population_id, generator_id, colno0,
                                      colno1):
    def generator_depprob(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.column_dependence_probability(bdb, generator_id, None,
                                                       colno0, colno1)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    depprobs = map(generator_depprob, generator_ids)
    return stats.arithmetic_mean(depprobs)
Exemple #9
0
def bql_column_dependence_probability(
        bdb, population_id, generator_id, modelnos, colno0, colno1):
    modelnos = _retrieve_modelnos(modelnos)
    def generator_depprob(generator_id):
        backend = core.bayesdb_generator_backend(bdb, generator_id)
        depprob_list = backend.column_dependence_probability(
            bdb, generator_id, modelnos, colno0, colno1)
        return stats.arithmetic_mean(depprob_list)
    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    depprobs = map(generator_depprob, generator_ids)
    return stats.arithmetic_mean(depprobs)
Exemple #10
0
def bql_row_similarity(
        bdb, population_id, generator_id, modelnos, rowid, target_rowid, colno):
    if target_rowid is None:
        raise BQLError(bdb, 'No such target row for SIMILARITY')
    modelnos = _retrieve_modelnos(modelnos)
    def generator_similarity(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        # XXX Change [colno] to colno by updating IBayesDBMetamodel.
        return metamodel.row_similarity(
            bdb, generator_id, modelnos, rowid, target_rowid, [colno])
    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    similarities = map(generator_similarity, generator_ids)
    return stats.arithmetic_mean(similarities)
Exemple #11
0
def bql_column_dependence_probability(bdb, population_id, generator_id,
                                      modelnos, colno0, colno1):
    modelnos = _retrieve_modelnos(modelnos)

    def generator_depprob(generator_id):
        backend = core.bayesdb_generator_backend(bdb, generator_id)
        depprob_list = backend.column_dependence_probability(
            bdb, generator_id, modelnos, colno0, colno1)
        return stats.arithmetic_mean(depprob_list)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    depprobs = map(generator_depprob, generator_ids)
    return stats.arithmetic_mean(depprobs)
Exemple #12
0
def bql_row_similarity(
        bdb, population_id, generator_id, modelnos, rowid, target_rowid, colno):
    if target_rowid is None:
        raise BQLError(bdb, 'No such target row for SIMILARITY')
    modelnos = _retrieve_modelnos(modelnos)
    def generator_similarity(generator_id):
        backend = core.bayesdb_generator_backend(bdb, generator_id)
        # XXX Change [colno] to colno by updating BayesDB_Backend.
        similarity_list = backend.row_similarity(
            bdb, generator_id, modelnos, rowid, target_rowid, [colno])
        return stats.arithmetic_mean(similarity_list)
    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    similarities = map(generator_similarity, generator_ids)
    return stats.arithmetic_mean(similarities)
Exemple #13
0
def bql_column_mutual_information(
        bdb, population_id, generator_id, colno0, colno1,
        numsamples, *constraint_args):
    if len(constraint_args) % 2 == 1:
        raise ValueError('Odd constraint arguments: %s.' % (constraint_args))
    constraints = dict(zip(constraint_args[::2], constraint_args[1::2])) \
        if constraint_args else None
    def generator_mutinf(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.column_mutual_information(bdb, generator_id, None,
            colno0, colno1, constraints=constraints, numsamples=numsamples)
    generator_ids = [generator_id] if generator_id is not None else \
        core.bayesdb_population_generators(bdb, population_id)
    mutinfs = map(generator_mutinf, generator_ids)
    return stats.arithmetic_mean(mutinfs)
Exemple #14
0
def bql_row_similarity(bdb, population_id, generator_id, rowid, target_rowid,
        *colnos):
    if target_rowid is None:
        raise BQLError(bdb, 'No such target row for SIMILARITY')
    if len(colnos) == 0:
        colnos = core.bayesdb_variable_numbers(bdb, population_id,
            generator_id)
    def generator_similarity(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.row_similarity(bdb, generator_id, None, rowid,
            target_rowid, colnos)
    generator_ids = [generator_id] if generator_id is not None else \
        core.bayesdb_population_generators(bdb, population_id)
    similarities = map(generator_similarity, generator_ids)
    return stats.arithmetic_mean(similarities)
Exemple #15
0
    def column_dependence_probability(self, bdb, generator_id, modelno, colno0,
                                      colno1):
        # Optimize special-case vacuous case of self-dependence.
        # XXX Caller should avoid this.
        if colno0 == colno1:
            return 1

        # Get the engine.
        engine = self._engine(bdb, generator_id)

        # Engine gives us a list of dependence probabilities which it is our
        # responsibility to integrate over.
        depprob_list = engine.dependence_probability(
            colno0, colno1, multiprocess=self._multiprocess)

        return arithmetic_mean(depprob_list)
Exemple #16
0
    def row_similarity(self, bdb, generator_id, modelno, rowid, target_rowid,
                       colnos):
        # Map the variable and individual indexing.
        cgpm_rowid = self._cgpm_rowid(bdb, generator_id, rowid)
        cgpm_target_rowid = self._cgpm_rowid(bdb, generator_id, target_rowid)

        # Get the engine.
        engine = self._engine(bdb, generator_id)

        # Engine gives us a list of similarities which it is our
        # responsibility to integrate over.
        similarity_list = engine.row_similarity(cgpm_rowid,
                                                cgpm_target_rowid,
                                                colnos,
                                                multiprocess=self._ncpu)

        return arithmetic_mean(similarity_list)
Exemple #17
0
def bql_row_similarity(bdb, population_id, generator_id, rowid, target_rowid,
                       *colnos):
    if target_rowid is None:
        raise BQLError(bdb, 'No such target row for SIMILARITY')
    if len(colnos) == 0:
        colnos = core.bayesdb_variable_numbers(bdb, population_id,
                                               generator_id)
    if len(colnos) != 1:
        raise BQLError(bdb,
                       'Multiple with respect to columns: %s.' % (colnos, ))

    def generator_similarity(generator_id):
        metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
        return metamodel.row_similarity(bdb, generator_id, None, rowid,
                                        target_rowid, colnos)

    generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id)
    similarities = map(generator_similarity, generator_ids)
    return stats.arithmetic_mean(similarities)
Exemple #18
0
 def column_mutual_information(self, bdb, generator_id, modelnos, colnos0,
         colnos1, constraints, numsamples):
     population_id = bayesdb_generator_population(bdb, generator_id)
     colnames0 = [
         str(bayesdb_variable_name(bdb, population_id, generator_id, colno))
         for colno in colnos0
     ]
     colnames1 = [
         str(bayesdb_variable_name(bdb, population_id, generator_id, colno))
         for colno in colnos1
     ]
     server = self._get_preql_server(bdb, generator_id)
     target_set = server._cols_to_mask(server.encode_set(colnames0))
     query_set = server._cols_to_mask(server.encode_set(colnames1))
     if self._marginize_cmi(constraints):
         inner_numsamples = numsamples
         conditioning_rows_loom_format = self._get_constraint_rows(
             constraints, bdb, generator_id, population_id, modelnos, server,
             inner_numsamples)
     else:
         conditioning_rows_loom_format = [
             self._get_constraint_row(constraints, bdb, generator_id,
             population_id, server)
         ]
     mi_estimates = [
         server._query_server.mutual_information(
             target_set,
             query_set,
             entropys=None,
             sample_count=loom.preql.SAMPLE_COUNT,
             conditioning_row=conditioning_row_loom_format
         ).mean
         for conditioning_row_loom_format in conditioning_rows_loom_format
     ]
     # Output requires an iterable.
     return [arithmetic_mean(mi_estimates)]
Exemple #19
0
 def generator_depprob(generator_id):
     backend = core.bayesdb_generator_backend(bdb, generator_id)
     depprob_list = backend.column_dependence_probability(
         bdb, generator_id, modelnos, colno0, colno1)
     return stats.arithmetic_mean(depprob_list)
Exemple #20
0
 def generator_similarity(generator_id):
     backend = core.bayesdb_generator_backend(bdb, generator_id)
     # XXX Change [colno] to colno by updating BayesDB_Backend.
     similarity_list = backend.row_similarity(
         bdb, generator_id, modelnos, rowid, target_rowid, [colno])
     return stats.arithmetic_mean(similarity_list)
Exemple #21
0
 def generator_similarity(generator_id):
     backend = core.bayesdb_generator_backend(bdb, generator_id)
     # XXX Change [colno] to colno by updating BayesDB_Backend.
     similarity_list = backend.row_similarity(bdb, generator_id, modelnos,
                                              rowid, target_rowid, [colno])
     return stats.arithmetic_mean(similarity_list)
Exemple #22
0
 def generator_depprob(generator_id):
     metamodel = core.bayesdb_generator_metamodel(bdb, generator_id)
     depprob_list = metamodel.column_dependence_probability(
         bdb, generator_id, modelnos, colno0, colno1)
     return stats.arithmetic_mean(depprob_list)