Beispiel #1
0
 def sample(self, data, partition, alpha):
     u = uniform_rvs(0, 1)
     
     if u < self.ratio:
         self.auxillary_sampler.sample(data, partition, alpha)
     else:
         self.split_merge_sampler.sample(data, partition, alpha)
Beispiel #2
0
    def sample(self, data, partition, alpha):
        u = uniform_rvs(0, 1)

        if u < self.ratio:
            self.auxillary_sampler.sample(data, partition, alpha)
        else:
            self.split_merge_sampler.sample(data, partition, alpha)
    def sample(self, data, partition):
        old_param = self.cluster_density.params
        new_param = self.proposal_func.random(old_param)

        old_ll = self.base_measure.log_p(old_param)
        new_ll = self.base_measure.log_p(new_param)

        for cell in partition.cells:
            atom_params = cell.value

            for j in cell.items:
                old_ll += self.cluster_density.log_p(data[j], atom_params)

        self.cluster_density.params = new_param

        for cell in partition.cells:
            atom_params = cell.value

            for j in cell.items:
                new_ll += self.cluster_density.log_p(data[j], atom_params)

        forward_log_ratio = new_ll - self.proposal_func.log_p(
            new_param, old_param)
        reverse_log_ratio = old_ll - self.proposal_func.log_p(
            old_param, new_param)

        log_ratio = forward_log_ratio - reverse_log_ratio

        u = uniform_rvs(0, 1)

        if log_ratio >= log(u):
            self.cluster_density.params = new_param
        else:
            self.cluster_density.params = old_param
Beispiel #4
0
    def sample(self, data, old_partition, alpha):
        items = range(len(data))

        i, j = sample(items, 2)

        labels = old_partition.labels

        c_i = labels[i]

        c_j = labels[j]

        new_partition = old_partition.copy()

        if c_i == c_j:
            c = c_i

            old_cell = new_partition.cells[c]

            new_cell_i, new_cell_j, forward_log_q, reverse_log_q = self._split(i, j, old_cell, data, new_partition)

            forward_log_p = self._compute_partition_log_p(new_cell_i, data) + \
                self._compute_partition_log_p(new_cell_j, data)

            old_cell = old_partition.cells[c]

            reverse_log_p = self._compute_partition_log_p(old_cell, data)

        else:
            cell_i = new_partition.cells[c_i]
            cell_j = new_partition.cells[c_j]

            new_cell, forward_log_q, reverse_log_q = self._merge(cell_i, cell_j, data, new_partition)

            forward_log_p = self._compute_partition_log_p(new_cell, data)

            old_cell_i = old_partition.cells[c_i]
            old_cell_j = old_partition.cells[c_j]

            reverse_log_p = self._compute_partition_log_p(old_cell_i, data) + \
                self._compute_partition_log_p(old_cell_j, data)

        forward_log_prior = self._compute_prior_log_p(alpha, new_partition)
        reverse_log_prior = self._compute_prior_log_p(alpha, old_partition)

        forward_log_ratio = forward_log_p + forward_log_prior - forward_log_q
        reverse_log_ratio = reverse_log_p + reverse_log_prior - reverse_log_q

        log_ratio = forward_log_ratio - reverse_log_ratio

        u = uniform_rvs(0, 1)

        if log_ratio >= log(u):
            print "accepted"

            old_partition.cells = new_partition.cells
        else:
            #             print "rejected"
            #             print forward_log_p - reverse_log_p, forward_log_q - reverse_log_q
            pass
Beispiel #5
0
    def sample(self, data, old_partition, alpha):
        items = range(len(data))
        
        i, j = sample(items, 2)
        
        labels = old_partition.labels
        
        c_i = labels[i]
        
        c_j = labels[j]
        
        new_partition = old_partition.copy()

        if c_i == c_j:
            c = c_i
            
            old_cell = new_partition.cells[c]
            
            new_cell_i, new_cell_j, forward_log_q, reverse_log_q = self._split(i, j, old_cell, data, new_partition)
            
            forward_log_p = self._compute_partition_log_p(new_cell_i, data) + \
                            self._compute_partition_log_p(new_cell_j, data)
                            
            old_cell = old_partition.cells[c]
                            
            reverse_log_p = self._compute_partition_log_p(old_cell, data)

        else:
            cell_i = new_partition.cells[c_i]
            cell_j = new_partition.cells[c_j]
            
            new_cell, forward_log_q, reverse_log_q = self._merge(cell_i, cell_j, data, new_partition)
            
            forward_log_p = self._compute_partition_log_p(new_cell, data)
            
            old_cell_i = old_partition.cells[c_i]
            old_cell_j = old_partition.cells[c_j]
            
            reverse_log_p = self._compute_partition_log_p(old_cell_i, data) + \
                            self._compute_partition_log_p(old_cell_j, data)
        
        forward_log_prior = self._compute_prior_log_p(alpha, new_partition)        
        reverse_log_prior = self._compute_prior_log_p(alpha, old_partition)
                
        forward_log_ratio = forward_log_p + forward_log_prior - forward_log_q        
        reverse_log_ratio = reverse_log_p + reverse_log_prior - reverse_log_q

        log_ratio = forward_log_ratio - reverse_log_ratio

        u = uniform_rvs(0, 1)
        
        if log_ratio >= log(u):
            print "accepted"
            
            old_partition.cells = new_partition.cells
        else:
#             print "rejected"
#             print forward_log_p - reverse_log_p, forward_log_q - reverse_log_q
            pass
Beispiel #6
0
    def sample_atom(self, data, cell):
        old_param = cell.value
        new_param = self.proposal_func.random(old_param)            

        old_ll = self.base_measure.log_p(old_param)
        new_ll = self.base_measure.log_p(new_param)

        for j in cell.items:
            old_ll += self.cluster_density.log_p(data[j], old_param)
            new_ll += self.cluster_density.log_p(data[j], new_param)
        
        forward_log_ratio = new_ll - self.proposal_func.log_p(new_param, old_param)
        reverse_log_ratio = old_ll - self.proposal_func.log_p(old_param, new_param)
        
        log_ratio = forward_log_ratio - reverse_log_ratio
        
        u = uniform_rvs(0, 1)
        
        if log_ratio >= log(u):
            return new_param
        else:
            return old_param
Beispiel #7
0
    def sample(self, data, partition, alpha):
        n = partition.number_of_items

        for item, data_point in enumerate(data):
            old_cluster_label = partition.labels[item]
            old_value = partition.item_values[item]

            partition.remove_item(item, old_cluster_label)

            if partition.counts[old_cluster_label] == 0:
                p = [x / (n - 1) for x in partition.counts]

                new_cluster_label = discrete_rvs(p)

                new_value = partition.cell_values[new_cluster_label]

                old_ll = self.cluster_density.log_p(data_point, old_value)
                new_ll = self.cluster_density.log_p(data_point, new_value)

                log_ratio = log(n - 1) - log(alpha) + new_ll - old_ll

                u = uniform_rvs(0, 1)

                if log_ratio >= log(u):
                    partition.add_item(item, new_cluster_label)
                else:
                    partition.add_item(item, old_cluster_label)

            else:
                new_value = self.base_measure.random()

                old_ll = self.cluster_density.log_p(data_point, old_value)
                new_ll = self.cluster_density.log_p(data_point, new_value)

                log_ratio = log(alpha) - log(n - 1) + new_ll - old_ll

                u = uniform_rvs(0, 1)

                if log_ratio >= log(u):
                    partition.add_cell(new_value)

                    cell = partition.get_cell_by_value(new_value)

                    cell.add_item(item)
                else:
                    partition.add_item(item, old_cluster_label)

        partition.remove_empty_cells()

        for item, data_point in enumerate(data):
            old_cluster_label = partition.labels[item]

            if partition.cells[old_cluster_label].size == 1:
                continue

            partition.remove_item(item, old_cluster_label)

            log_p = []

            for cell in partition.cells:
                cluster_log_p = self.cluster_density.log_p(data_point, cell.value)

                counts = cell.size

                log_p.append(log(counts) + cluster_log_p)

            log_p = log_space_normalise(log_p)

            p = [exp(x) for x in log_p]

            new_cluster_label = discrete_rvs(p)

            partition.add_item(item, new_cluster_label)

        partition.remove_empty_cells()
Beispiel #8
0
 def sample(self, data, partition, alpha):
     n = partition.number_of_items
     
     for item, data_point in enumerate(data):
         old_cluster_label = partition.labels[item]
         old_value = partition.item_values[item]
         
         partition.remove_item(item, old_cluster_label)
         
         if partition.counts[old_cluster_label] == 0:
             p = [x / (n - 1) for x in partition.counts]
             
             new_cluster_label = discrete_rvs(p)
             
             new_value = partition.cell_values[new_cluster_label]
             
             old_ll = self.cluster_density.log_p(data_point, old_value)
             new_ll = self.cluster_density.log_p(data_point, new_value)
             
             log_ratio = log(n - 1) - log(alpha) + new_ll - old_ll
             
             u = uniform_rvs(0, 1)
             
             if log_ratio >= log(u):
                 partition.add_item(item, new_cluster_label)
             else:
                 partition.add_item(item, old_cluster_label)
         
         else:
             new_value = self.base_measure.random()
             
             old_ll = self.cluster_density.log_p(data_point, old_value)
             new_ll = self.cluster_density.log_p(data_point, new_value)
             
             log_ratio = log(alpha) - log(n - 1) + new_ll - old_ll
             
             u = uniform_rvs(0, 1)
             
             if log_ratio >= log(u):
                 partition.add_cell(new_value)
                 
                 cell = partition.get_cell_by_value(new_value)
                 
                 cell.add_item(item)
             else:
                 partition.add_item(item, old_cluster_label)
     
     partition.remove_empty_cells()
     
     for item, data_point in enumerate(data):
         old_cluster_label = partition.labels[item]
         
         if partition.cells[old_cluster_label].size == 1:
             continue
         
         partition.remove_item(item, old_cluster_label)
         
         log_p = []
         
         for cell in partition.cells:
             cluster_log_p = self.cluster_density.log_p(data_point, cell.value)
             
             counts = cell.size
             
             log_p.append(log(counts) + cluster_log_p)
 
         log_p = log_space_normalise(log_p)
         
         p = [exp(x) for x in log_p]
         
         new_cluster_label = discrete_rvs(p)
         
         partition.add_item(item, new_cluster_label)
     
     partition.remove_empty_cells()
Beispiel #9
0
 def random(self):
     phi = uniform_rvs(0, 1)
     return PyCloneParameter(phi)