Example #1
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     if rowid in self.rowid_to_component:
         assert not constraints or self.indexer not in constraints
         z = self.rowid_to_component[rowid]
         return self._simulate_one(rowid, targets, constraints, inputs, N,
                                   z)
     elif constraints and self.indexer in constraints:
         z = constraints[self.indexer]
         if z not in self.cgpm_row_divide.support():
             raise ValueError('Constrained cluster has 0 density: %s' %
                              (z, ))
         return self._simulate_one(rowid, targets, constraints, inputs, N,
                                   z)
     z_support = self.cgpm_row_divide.support()
     z_weights = [
         self.logpdf(rowid, {self.indexer: z}, constraints, inputs)
         for z in z_support
     ]
     zs = log_pflip(z_weights, array=z_support, size=(N or 1), rng=self.rng)
     counts = {z: n for z, n in enumerate(np.bincount(zs)) if n}
     samples = [
         self._simulate_one(rowid, targets, constraints, inputs, n, z)
         for z, n in counts.iteritems()
     ]
     return samples[0][0] if N is None else lchain(*samples)
Example #2
0
def transition_hypers(cgpms, grids, rng):
    """Transitions hyperparameters of cgpms greedily."""
    assert all([isinstance(cgpm, DistributionCGPM) for cgpm in cgpms])
    assert all([type(cgpm) is type(cgpms[0]) for cgpm in cgpms])
    hyperparams = cgpms[0].get_hypers()
    shuffled_hypers = rng.permutation(hyperparams.keys())
    # For each hyper.
    for hyper in shuffled_hypers:
        logps = []
        # For each grid point.
        for grid_value in grids[hyper]:
            # Compute the probability of the grid point.
            hyperparams[hyper] = grid_value
            logp_k = 0
            for cgpm in cgpms:
                cgpm.set_hypers(hyperparams)
                logp_k += cgpm.logpdf_score()
            logps.append(logp_k)
        # Sample a new hyperparameter from the grid.
        index = log_pflip(logps, rng=rng)
        hyperparams[hyper] = grids[hyper][index]
    # Set the hyperparameters for each cgpm.
    for cgpm in cgpms:
        cgpm.set_hypers(hyperparams)
    return hyperparams
Example #3
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     DistributionCGPM.simulate(self, rowid, targets, constraints, inputs, N)
     if rowid in self.data and not isnan(self.data[rowid]):
         return {self.outputs[0]: self.data[rowid]}
     p0 = calc_predictive_logp(0, self.N, self.x_sum, self.alpha, self.beta)
     p1 = calc_predictive_logp(1, self.N, self.x_sum, self.alpha, self.beta)
     x = log_pflip([p0, p1], rng=self.rng)
     return {self.outputs[0]: x}
Example #4
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     DistributionCGPM.simulate(self, rowid, targets, constraints, inputs, N)
     if rowid not in self.data:
         K = self.support()
         logps = [self.logpdf(rowid, {targets[0]: x}, None) for x in K]
         x = log_pflip(logps, array=K, rng=self.rng)
     else:
         x = self.data[rowid]
     return {self.outputs[0]: x}
Example #5
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     DistributionGpm.simulate(self, rowid, targets, constraints, inputs, N)
     if rowid in self.data:
         return {self.outputs[0]: self.data[rowid]}
     p0 = Bernoulli.calc_predictive_logp(0, self.N, self.x_sum, self.alpha,
                                         self.beta)
     p1 = Bernoulli.calc_predictive_logp(1, self.N, self.x_sum, self.alpha,
                                         self.beta)
     x = gu.log_pflip([p0, p1], rng=self.rng)
     return {self.outputs[0]: x}
Example #6
0
File: crp.py Project: wilsondy/cgpm
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     DistributionGpm.simulate(self, rowid, targets, constraints, inputs, N)
     if rowid in self.data:
         x = self.data[rowid]
     else:
         K = sorted(self.counts) + [max(self.counts) + 1] if self.counts\
             else [0]
         logps = [self.logpdf(rowid, {targets[0]: x}, None) for x in K]
         x = gu.log_pflip(logps, array=K, rng=self.rng)
     return {self.outputs[0]: x}
Example #7
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     assert targets == self.outputs
     assert not constraints
     if rowid in self.data.x:
         return {self.outputs[0]: self.data.x[rowid]}
     logps = [
         self.logpdf(rowid, {targets[0]: x}, None, inputs)
         for x in xrange(self.k)
     ]
     x = gu.log_pflip(logps, rng=self.rng)
     return {self.outputs[0]: x}
Example #8
0
def transition_rows(cgpm_mixture, rowid, rng):
    """Performs a Gibbs step on the rowid in the given cgpm_mixture."""
    assert isinstance(cgpm_mixture, (FiniteRowMixture, FlexibleRowMixture))
    observation, inputs = cgpm_mixture.unobserve(rowid)
    zs = cgpm_mixture.cgpm_row_divide.support()
    logps = []
    for z in zs:
        observation[cgpm_mixture.cgpm_row_divide.outputs[0]] = z
        logp_z = cgpm_mixture.logpdf(None, observation, None, inputs)
        logps.append(logp_z)
    assignment = log_pflip(logps, array=zs, rng=rng)
    observation[cgpm_mixture.cgpm_row_divide.outputs[0]] = assignment
    cgpm_mixture.observe(rowid, observation, inputs)
Example #9
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     constraints = constraints or {}
     inputs = inputs or {}
     # Generate samples and weights.
     samples, weights = zip(*[
         self.weighted_sample(rowid, targets, constraints, inputs)
         for _i in xrange(self.accuracy)
     ])
     # Sample importance resample.
     if all(isinf(l) for l in weights):
         raise ValueError('Zero density constraints: %s' % (constraints, ))
     index = 0 if self.accuracy == 1 else log_pflip(weights, rng=self.rng)
     return {q: samples[index][q] for q in targets}
Example #10
0
 def _gibbs_transition_row(self, rowid):
     # Probability of row crp assignment to each cluster.
     K = self.crp.clusters[0].gibbs_tables(rowid)
     logp_crp = self.crp.clusters[0].gibbs_logps(rowid)
     # Probability of row data in each cluster.
     logp_data = self._logpdf_row_gibbs(rowid, K)
     assert len(logp_data) == len(logp_crp)
     # Sample new cluster.
     p_cluster = np.add(logp_data, logp_crp)
     z_b = gu.log_pflip(p_cluster, array=K, rng=self.rng)
     # Migrate the row.
     if self.Zr(rowid) != z_b:
         self._migrate_row(rowid, z_b)
     self._check_partitions()
Example #11
0
 def transition_params(self, N=None):
     num_transitions = N if N is not None else 1
     for i in xrange(num_transitions):
         # Transition noise parameter.
         alphas = np.linspace(0.01, 0.99, 30)
         alpha_logps = [
             RandomForest.calc_log_likelihood(self.data.x.values(),
                                              self.data.Y.values(),
                                              self.regressor, self.counts,
                                              a) for a in alphas
         ]
         self.alpha = gu.log_pflip(alpha_logps, array=alphas, rng=self.rng)
         # Transition forest.
         if len(self.data.Y) > 0:
             self.regressor.fit(self.data.Y.values(), self.data.x.values())
Example #12
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     if constraints is None:
         constraints = {}
     if inputs is None:
         inputs = {}
     samples, weights = zip(*[
         self.weighted_sample(rowid, targets, constraints, inputs)
         for _i in xrange(self.accuracy)
     ])
     if all(isinf(l) for l in weights):
         raise ValueError('Zero density constraints: %s' % (constraints, ))
     # Skip an expensive random choice if there is only one option.
     index = 0 if self.accuracy == 1 else \
         gu.log_pflip(weights, rng=self.rng)
     return {q: samples[index][q] for q in targets}
Example #13
0
def view_simulate(view, rowid, targets, constraints, N):
    if not view.hypothetical(rowid):
        return _simulate_row(view, targets, view.Zr(rowid), N)
    Nk = view.Nk()
    N_rows = len(view.Zr())
    K = view.crp.clusters[0].gibbs_tables(-1)
    lp_crp = [Crp.calc_predictive_logp(k, N_rows, Nk, view.alpha()) for k in K]
    lp_constraints = [_logpdf_row(view, constraints, k) for k in K]
    if all(np.isinf(lp_constraints)):
        raise ValueError('Zero density constraints: %s' % (constraints, ))
    lp_cluster = np.add(lp_crp, lp_constraints)
    ks = log_pflip(lp_cluster, array=K, size=N, rng=view.rng)
    counts = {k: n for k, n in enumerate(np.bincount(ks)) if n > 0}
    samples = (_simulate_row(view, targets, k, counts[k]) for k in counts)
    return chain.from_iterable(samples)
Example #14
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     assert targets == self.outputs
     assert not constraints
     if rowid in self.data:
         samples = [self.data[rowid]] * (N or 1)
     elif not self.regressor:
         samples = self.rng.choice(range(self.k), size=(N or 1))
     else:
         y_dum = self.process_inputs(inputs)
         y_dum_probe = np.reshape(y_dum, (1, -1))
         logps = self.regressor.predict_log_proba(y_dum_probe)
         samples = log_pflip(logps[0],
                             array=self.class_to_index.keys(),
                             size=(N or 1),
                             rng=self.rng)
     return dictify_samples(self.outputs[0], samples, N)
Example #15
0
 def _likelihood_weighted_resample(self, samples, rowid, constraints=None,
         inputs=None, statenos=None, multiprocess=1):
     assert len(samples) == \
         len(self.states) if statenos is None else len(statenos)
     assert all(len(s) == len(samples[0]) for s in samples[1:])
     N = len(samples[0])
     weights = np.zeros(len(samples)) if not constraints else \
         self.logpdf(rowid, constraints, inputs,
             statenos=statenos, multiprocess=multiprocess)
     n_model = np.bincount(gu.log_pflip(weights, size=N, rng=self.rng))
     indexes = [self.rng.choice(N, size=n, replace=False) for n in n_model]
     resamples = [
         [s[i] for i in index]
         for s, index in zip(samples, indexes)
         if len(index) > 0
     ]
     return list(itertools.chain.from_iterable(resamples))
Example #16
0
def transition_hypers_full(cgpms, grids, rng):
    """Transitions hyperparameters of cgpms using full grid search."""
    assert all([isinstance(cgpm, DistributionCGPM) for cgpm in cgpms])
    assert all([type(cgpm) is type(cgpms[0]) for cgpm in cgpms])
    hypers = grids.keys()
    cells = list(itertools.product(*(grids.itervalues())))
    logps = []
    for cell in cells:
        proposal = dict(zip(hypers, cell))
        logp_cell = 0
        for cgpm in cgpms:
            cgpm.set_hypers(proposal)
            logp_cell += cgpm.logpdf_score()
        logps.append(logp_cell)
    index = log_pflip(logps, rng=rng)
    selected = dict(zip(hypers, cells[index]))
    for cgpm in cgpms:
        cgpm.set_hypers(selected)
    return selected, cells, logps
Example #17
0
 def simulate(self, rowid, targets, constraints=None, inputs=None, N=None):
     # Refer to comment in logpdf.
     constraints = self._populate_constraints(rowid, targets, constraints)
     if not self.hypothetical(rowid):
         rowid = None
     network = self.build_network()
     # Condition on the cluster assignment.
     if self.outputs[0] in constraints:
         return network.simulate(rowid, targets, constraints, inputs, N)
     # Determine how many samples to return.
     unwrap_result = N is None
     if unwrap_result:
         N = 1
     # Expose cluster assignments to the samples?
     exposed = self.outputs[0] in targets
     if exposed:
         targets = [q for q in targets if q != self.outputs[0]]
     # Weight clusters by probability of constraints in each cluster.
     K = self.crp.clusters[0].gibbs_tables(-1)
     constr2 = [merged(constraints, {self.outputs[0]: k}) for k in K]
     lp_constraints_unorm = [network.logpdf(rowid, ev) for ev in constr2]
     # Find number of samples in each cluster.
     Ks = gu.log_pflip(lp_constraints_unorm, array=K, size=N, rng=self.rng)
     counts = {k: n for k, n in enumerate(np.bincount(Ks)) if n > 0}
     # Add the cluster assignment to the constraints and sample the rest.
     constr3 = {
         k: merged(constraints, {self.outputs[0]: k})
         for k in counts
     }
     samples = [
         network.simulate(rowid, targets, constr3[k], inputs, counts[k])
         for k in counts
     ]
     # If cluster assignments are exposed, append them to the samples.
     if exposed:
         samples = [[merged(l, {self.outputs[0]: k}) for l in s]
                    for s, k in zip(samples, counts)]
     # Return 1 sample if N is None, otherwise a list.
     result = list(itertools.chain.from_iterable(samples))
     return result[0] if unwrap_result else result
Example #18
0
 def transition_hypers(self):
     """Transitions the hyperparameters of each cluster."""
     hypers = self.hypers.keys()
     self.rng.shuffle(hypers)
     # For each hyper.
     for hyper in hypers:
         logps = []
         # For each grid point.
         for grid_value in self.hyper_grids[hyper]:
             # Compute the probability of the grid point.
             self.hypers[hyper] = grid_value
             logp_k = 0
             for k in self.clusters:
                 self.clusters[k].set_hypers(self.hypers)
                 logp_k += self.clusters[k].logpdf_score()
             logps.append(logp_k)
         # Sample a new hyperparameter from the grid.
         index = gu.log_pflip(logps, rng=self.rng)
         self.hypers[hyper] = self.hyper_grids[hyper][index]
     # Set the hyperparameters in each cluster.
     for k in self.clusters:
         self.clusters[k].set_hypers(self.hypers)
     self.aux_model = self.create_aux_model()