def logpdf_one(cgpm, rowid, targets, constraints, inputs): targets_cgpm = get_intersection(cgpm.outputs, targets) if not targets_cgpm: return 0 constraints_cgpm = get_intersection(cgpm.outputs, constraints) inputs_cgpm = get_intersection(cgpm.inputs, inputs) return cgpm.logpdf(rowid, targets_cgpm, constraints_cgpm, inputs_cgpm)
def observe_one(cgpm, rowid, observation, inputs): observation_cgpm = get_intersection(cgpm.outputs, observation) if observation_cgpm: inputs_cgpm_parents = get_intersection(cgpm.inputs, observation) inputs_cgpm_exog = get_intersection(cgpm.inputs, inputs) inputs_cgpm = merged(inputs_cgpm_parents, inputs_cgpm_exog) cgpm.observe(rowid, observation_cgpm, inputs_cgpm)
def simulate_one(cgpm, rowid, targets, constraints, inputs, N=None): targets_cgpm = get_intersection(cgpm.outputs, targets) if not targets_cgpm: return {} if N is None else [{}]*N constraints_cgpm = get_intersection(cgpm.outputs, constraints) inputs_cgpm = get_intersection(cgpm.inputs, inputs) return cgpm.simulate(rowid, targets_cgpm, constraints_cgpm, inputs_cgpm, N)
def _logpdf_one(self, rowid, targets, constraints, inputs, component): """Assess logpdf in fixed mixture component.""" targets_x = get_intersection(self.outputs_x, targets) if not targets_x: return 0 constraints_x = get_intersection(self.outputs_x, constraints) inputs_x = get_intersection(self.outputs_x, inputs) inputs_arr = merged(inputs_x, {self.indexer: component}) return self.cgpm_components_array.logpdf( rowid=rowid, targets=targets_x, constraints=constraints_x, inputs=inputs_arr, )
def observe(self, rowid, observation, inputs=None): if rowid in self.rowid_to_component: component = {self.indexer: self.rowid_to_component[rowid]} else: inputs_z = get_intersection(self.inputs_z, inputs) if self.indexer in observation: component = {self.indexer: observation[self.indexer]} else: component = self.cgpm_row_divide.simulate( rowid, [self.indexer], inputs_z) inputs_z = get_intersection(self.inputs_z, inputs) self.cgpm_row_divide.observe(rowid, component, inputs_z) self.rowid_to_component[rowid] = component[self.indexer] inputs_x = get_intersection(self.inputs_x, inputs) observation_x = get_intersection(self.outputs_x, observation) inputs_arr = merged(inputs_x, component) self.cgpm_components_array.observe(rowid, observation_x, inputs_arr)
def _simulate_one(self, rowid, targets, constraints, inputs, N, component): """Simulate from a fixed mixture component.""" targets_x = get_intersection(self.outputs_x, targets) if targets_x: constraints_x = get_intersection(self.outputs_x, constraints) inputs_x = get_intersection(self.outputs_x, inputs) inputs_arr = merged(inputs_x, {self.indexer: component}) samples = self.cgpm_components_array.simulate( rowid=rowid, targets=targets_x, constraints=constraints_x, inputs=inputs_arr, N=N, ) else: samples = {} if N is None else [{}] * N if N is None and self.indexer in targets: samples[self.indexer] = component elif N is not None and self.indexer in targets: for sample in samples: sample[self.indexer] = component return samples
def logpdf(self, rowid, targets, constraints=None, inputs=None): if rowid in self.rowid_to_component: # Condition on the cluster assignment directly. # p(xT|xC,z=k) assert not constraints or self.indexer not in constraints z = self.rowid_to_component[rowid] return self._logpdf_one(rowid, targets, constraints, inputs, z) elif self.indexer in targets: # Query the cluster assignment. # p(z=k,xT|xC) # = p(z=k,xT,xC) / p(xC) Bayes rule # = p(z=k)p(xT,xC|z=k) / p(xC) chain rule on numerator # The terms are then: # p(z=k) lp_z # p(xT,xC|z=k) lp_x_joint # p(xC) = \sum_z P(xC,z) lp_x_constraints (recursively) z = targets[self.indexer] inputs_z = get_intersection(self.inputs_z, inputs) lp_z = self.cgpm_row_divide.logpdf(rowid=rowid, targets={self.indexer: z}, constraints=None, inputs=inputs_z) targets_joint = merged(targets, constraints or {}) lp_x_joint = self._logpdf_one(rowid=rowid, targets=targets_joint, constraints=None, inputs=inputs, component=z) lp_x_constraints = self.logpdf(rowid=rowid, targets=constraints, constraints=None, inputs=inputs) if constraints else 0 return (lp_z + lp_x_joint) - lp_x_constraints elif constraints and self.indexer in constraints: # Condition on the cluster assignment # P(xT|xC,z=k) # = P(xT,xC,z=k) / P(xC,z=k) # = P(xT,xC|z=k)P(z=k) / P(xC|z=k) # = P(xT,xC|z=k) / P(xC|z=k) # The terms are then: # P(xT,xC|z=k) lp_x_joint # P(xC|z=k) lp_x_constraints z = constraints[self.indexer] if z not in self.cgpm_row_divide.support(): raise ValueError('Constrained cluster has 0 density: %s' % (z, )) targets_joint = merged(targets, constraints) lp_x_joint = self._logpdf_one(rowid=rowid, targets=targets_joint, constraints=None, inputs=inputs, component=z) lp_x_constraints = self._logpdf_one(rowid=rowid, targets=constraints, constraints=None, inputs=inputs, component=z) return lp_x_joint - lp_x_constraints else: # Marginalize over cluster assignment by enumeration. # Let K be a list of values for the support of z: # P(xT|xC) # = \sum_i P(xT,z=K[i]|xC) # = \sum_i P(xT|xC,z=K[i])P(z=K[i]|xC) chain rule # # The posterior is given by: # P(z=K[i]|xC) = P(xC|z=K[i])P(z=K[i]) / \sum_i P(xC,z=K[i]) # # The terms are therefore # P(z=K[i]) lp_z_prior[i] # P(xC|z=K[i]) lp_constraints_likelihood[i] # P(xC,z=K[i]) lp_z_constraints[i] # P(z=K[i]|xC) lp_z_posterior[i] # P(xT|xC,z=K[i]) lp_targets_likelihood[i] # P(xT|xC,z=K[i])P(z=K[i]|xC) lp_joint[i] inputs_z = get_intersection(self.inputs_z, inputs) z_support = self.cgpm_row_divide.support() lp_z_prior = [ self.cgpm_row_divide.logpdf(rowid, {self.indexer: z}, None, inputs_z) for z in z_support ] lp_constraints_likelihood = [ self._logpdf_one(rowid, constraints, None, inputs, z) for z in z_support ] lp_z_constraints = np.add(lp_z_prior, lp_constraints_likelihood) lp_z_posterior = log_normalize(lp_z_constraints) lp_targets_likelihood = [ self._logpdf_one(rowid, targets, constraints, inputs, z) for z in z_support ] lp_joint = np.add(lp_targets_likelihood, lp_z_posterior) return logsumexp(lp_joint)