Exemple #1
0
    def _set_data_to_clusters(self, data_idx):
        data_point = self.data[data_idx]

        num_clusters = len(self.cluster_params)

        log_p = np.zeros(num_clusters)

        cluster = self.clustering[data_idx]

        for c, block_params in self.cluster_params.items():
            if c == cluster:
                continue

            else:
                log_p[c] = self.partition_prior.log_tau_2(block_params.N)

                log_p[c] += self.dist.log_predictive_likelihood(
                    data_point, block_params)

        if self.cluster_params[cluster].N == 1:
            log_p[cluster] = float('-inf')

        else:
            log_p[cluster] = log_sum_exp(log_p)

            if num_clusters > 1:
                log_p[cluster] -= np.log(num_clusters - 1)

        p, _ = exp_normalize(log_p)

        self.data_to_clusters[data_idx] = dict(
            zip(self.cluster_params.keys(), p))
Exemple #2
0
    def update(self, clustering):
        clustering = relabel_clustering(clustering)

        clusters = np.unique(clustering)

        num_clusters = len(np.unique(clustering))

        self.cluster_probs = np.zeros((num_clusters, num_clusters))

        self.clusters_to_data = {}

        self.data_to_clusters = {}

        margs = {}

        for c in clusters:
            cluster_data = self.data[clustering == c]

            cluster_params = self.dist.create_params_from_data(cluster_data)

            margs[c] = self.dist.log_marginal_likelihood(cluster_params)

            if self.use_prior_weight:
                margs[c] += self.partition_prior.log_tau_2(cluster_params.N)

            self.clusters_to_data[c] = np.where(clustering == c)[0].flatten()

            for i in self.clusters_to_data[c]:
                self.data_to_clusters[i] = c

        for c_i in clusters:
            log_p = np.ones(num_clusters) * float('-inf')

            for c_j in clusters:
                if c_i == c_j:
                    continue

                merged_data = self.data[(clustering == c_i) |
                                        (clustering == c_j)]

                merged_params = self.dist.create_params_from_data(merged_data)

                merge_marg = self.dist.log_marginal_likelihood(merged_params)

                if self.use_prior_weight:
                    merge_marg += self.partition_prior.log_tau_2(
                        merged_params.N)

                log_p[c_j] = merge_marg - (margs[c_i] + margs[c_j])

            if num_clusters == 1:
                log_p[c_i] = 0

            else:
                log_p[c_i] = -np.log(num_clusters - 1) + log_sum_exp(log_p)

            self.cluster_probs[c_i], _ = exp_normalize(log_p)
Exemple #3
0
    def _resample_customer_table_idx(self, customer_data_point, tables):
        log_p = np.zeros(len(tables) + 1, dtype=np.float64)

        log_p[-1] = self._log_prob_new_table(customer_data_point, len(tables))

        for c, table in enumerate(tables):
            log_p[c] = self._log_prob_join_table(customer_data_point,
                                                 table.dish)

        p, _ = exp_normalize(log_p)

        new_table_idx = np.random.multinomial(1, p).argmax()

        return new_table_idx
Exemple #4
0
def get_exact_posterior(data, dist, partition_prior):
    '''
    Compute the exact posterior of the clustering model.

    Returns a dictionary mapping clusterings to posterior probability.
    '''
    log_p = []

    clusterings = []

    for c in get_all_clusterings(data.shape[0]):
        clusterings.append(tuple(relabel_clustering(c).astype(int)))

        log_p.append(log_joint_probability(c, data, dist, partition_prior))

    p, _ = exp_normalize(np.array(log_p))

    return dict(zip(clusterings, p))
Exemple #5
0
    def propose(self, data_point, parent_particle, seed=None):
        '''
        Propose a particle for t given a particle from t - 1 and a data point.
        '''
        if seed is not None:
            random.seed(seed)

            np.random.seed(seed)

        log_q = self.get_log_q(data_point, parent_particle)

        block_probs, log_q_norm = exp_normalize(np.array(log_q.values()))

        block_idx = np.random.multinomial(1, block_probs).argmax()

        block_idx = log_q.keys()[block_idx]

        return self.create_particle(block_idx,
                                    data_point,
                                    parent_particle,
                                    log_q=log_q,
                                    log_q_norm=log_q_norm)
Exemple #6
0
    def sample(self, data, kernel):
        constrained_path = kernel.constrained_path

        init_particle = kernel.create_initial_particle(data[0])

        swarm = ImplicitParticleSwarm()

        swarm.add_particle(0,
                           init_particle,
                           multiplicity=self.num_particles - 1)

        swarm.add_particle(0, constrained_path[0], multiplicity=1)

        for constrained_particle, data_point in zip(constrained_path[1:],
                                                    data[1:]):
            new_swarm = ImplicitParticleSwarm()

            for parent_log_W, parent_multiplicity, parent_particle in swarm:
                log_q = kernel.get_log_q(data_point, parent_particle)

                block_probs, log_q_norm = exp_normalize(
                    np.array(log_q.values()))

                is_constrained_parent = (
                    parent_particle == constrained_particle.parent_particle)

                if is_constrained_parent:
                    multiplicities = np.random.multinomial(
                        parent_multiplicity - 1, block_probs)

                    multiplicities[log_q.keys().index(
                        constrained_particle.block_idx)] += 1

                else:
                    multiplicities = np.random.multinomial(
                        parent_multiplicity, block_probs)

                for block_idx, multiplicity in zip(log_q.keys(),
                                                   multiplicities):
                    if multiplicity == 0:
                        continue

                    if is_constrained_parent and (
                            block_idx == constrained_particle.block_idx):
                        particle = constrained_particle

                    else:
                        particle = kernel.create_particle(
                            block_idx,
                            data_point,
                            parent_particle,
                            log_q=log_q,
                            log_q_norm=log_q_norm)

                    new_swarm.add_particle(parent_log_W + particle.log_w,
                                           particle,
                                           multiplicity=multiplicity)

            swarm = self.resample_if_necessary(
                new_swarm, conditional_particle=constrained_particle)

            if self._check_collapse(kernel, swarm.particles):
                return {kernel.constrained_path[-1]: 0}

        return swarm.to_dict()