def sample(self, data, partition, alpha): u = uniform_rvs(0, 1) if u < self.ratio: self.auxillary_sampler.sample(data, partition, alpha) else: self.split_merge_sampler.sample(data, partition, alpha)
def sample(self, data, partition): old_param = self.cluster_density.params new_param = self.proposal_func.random(old_param) old_ll = self.base_measure.log_p(old_param) new_ll = self.base_measure.log_p(new_param) for cell in partition.cells: atom_params = cell.value for j in cell.items: old_ll += self.cluster_density.log_p(data[j], atom_params) self.cluster_density.params = new_param for cell in partition.cells: atom_params = cell.value for j in cell.items: new_ll += self.cluster_density.log_p(data[j], atom_params) forward_log_ratio = new_ll - self.proposal_func.log_p( new_param, old_param) reverse_log_ratio = old_ll - self.proposal_func.log_p( old_param, new_param) log_ratio = forward_log_ratio - reverse_log_ratio u = uniform_rvs(0, 1) if log_ratio >= log(u): self.cluster_density.params = new_param else: self.cluster_density.params = old_param
def sample(self, data, old_partition, alpha): items = range(len(data)) i, j = sample(items, 2) labels = old_partition.labels c_i = labels[i] c_j = labels[j] new_partition = old_partition.copy() if c_i == c_j: c = c_i old_cell = new_partition.cells[c] new_cell_i, new_cell_j, forward_log_q, reverse_log_q = self._split(i, j, old_cell, data, new_partition) forward_log_p = self._compute_partition_log_p(new_cell_i, data) + \ self._compute_partition_log_p(new_cell_j, data) old_cell = old_partition.cells[c] reverse_log_p = self._compute_partition_log_p(old_cell, data) else: cell_i = new_partition.cells[c_i] cell_j = new_partition.cells[c_j] new_cell, forward_log_q, reverse_log_q = self._merge(cell_i, cell_j, data, new_partition) forward_log_p = self._compute_partition_log_p(new_cell, data) old_cell_i = old_partition.cells[c_i] old_cell_j = old_partition.cells[c_j] reverse_log_p = self._compute_partition_log_p(old_cell_i, data) + \ self._compute_partition_log_p(old_cell_j, data) forward_log_prior = self._compute_prior_log_p(alpha, new_partition) reverse_log_prior = self._compute_prior_log_p(alpha, old_partition) forward_log_ratio = forward_log_p + forward_log_prior - forward_log_q reverse_log_ratio = reverse_log_p + reverse_log_prior - reverse_log_q log_ratio = forward_log_ratio - reverse_log_ratio u = uniform_rvs(0, 1) if log_ratio >= log(u): print "accepted" old_partition.cells = new_partition.cells else: # print "rejected" # print forward_log_p - reverse_log_p, forward_log_q - reverse_log_q pass
def sample_atom(self, data, cell): old_param = cell.value new_param = self.proposal_func.random(old_param) old_ll = self.base_measure.log_p(old_param) new_ll = self.base_measure.log_p(new_param) for j in cell.items: old_ll += self.cluster_density.log_p(data[j], old_param) new_ll += self.cluster_density.log_p(data[j], new_param) forward_log_ratio = new_ll - self.proposal_func.log_p(new_param, old_param) reverse_log_ratio = old_ll - self.proposal_func.log_p(old_param, new_param) log_ratio = forward_log_ratio - reverse_log_ratio u = uniform_rvs(0, 1) if log_ratio >= log(u): return new_param else: return old_param
def sample(self, data, partition, alpha): n = partition.number_of_items for item, data_point in enumerate(data): old_cluster_label = partition.labels[item] old_value = partition.item_values[item] partition.remove_item(item, old_cluster_label) if partition.counts[old_cluster_label] == 0: p = [x / (n - 1) for x in partition.counts] new_cluster_label = discrete_rvs(p) new_value = partition.cell_values[new_cluster_label] old_ll = self.cluster_density.log_p(data_point, old_value) new_ll = self.cluster_density.log_p(data_point, new_value) log_ratio = log(n - 1) - log(alpha) + new_ll - old_ll u = uniform_rvs(0, 1) if log_ratio >= log(u): partition.add_item(item, new_cluster_label) else: partition.add_item(item, old_cluster_label) else: new_value = self.base_measure.random() old_ll = self.cluster_density.log_p(data_point, old_value) new_ll = self.cluster_density.log_p(data_point, new_value) log_ratio = log(alpha) - log(n - 1) + new_ll - old_ll u = uniform_rvs(0, 1) if log_ratio >= log(u): partition.add_cell(new_value) cell = partition.get_cell_by_value(new_value) cell.add_item(item) else: partition.add_item(item, old_cluster_label) partition.remove_empty_cells() for item, data_point in enumerate(data): old_cluster_label = partition.labels[item] if partition.cells[old_cluster_label].size == 1: continue partition.remove_item(item, old_cluster_label) log_p = [] for cell in partition.cells: cluster_log_p = self.cluster_density.log_p(data_point, cell.value) counts = cell.size log_p.append(log(counts) + cluster_log_p) log_p = log_space_normalise(log_p) p = [exp(x) for x in log_p] new_cluster_label = discrete_rvs(p) partition.add_item(item, new_cluster_label) partition.remove_empty_cells()
def random(self): phi = uniform_rvs(0, 1) return PyCloneParameter(phi)