コード例 #1
0
ファイル: partition.py プロジェクト: niharikag/pydp
    def sample(self, data, partition, alpha):
        for item, data_point in enumerate(data):
            old_cell_index = partition.labels[item]

            partition.remove_item(item, old_cell_index)

            partition.remove_empty_cells()

            log_p = []

            for cell in partition.cells:
                cluster_log_p = self.cluster_density.log_p(data_point, cell.value)

                counts = cell.size

                log_p.append(log(counts) + cluster_log_p)

            params = self.base_measure.params

            cluster_log_p = self.posterior_density.log_p(data_point, params)

            log_p.append(log(alpha) + cluster_log_p)

            log_p = log_space_normalise(log_p)

            p = [exp(x) for x in log_p]

            new_cell_index = discrete_rvs(p)

            if new_cell_index == partition.number_of_cells:
                partition.add_cell(self.base_measure.random())

            partition.add_item(item, new_cell_index)
コード例 #2
0
ファイル: partition.py プロジェクト: Q-KIM/PyClone
    def sample(self, data, partition, alpha):
        for item, data_point in enumerate(data):
            old_cell_index = partition.labels[item]
            
            partition.remove_item(item, old_cell_index)
            
            partition.remove_empty_cells()
            
            log_p = []    
            
            for cell in partition.cells:
                cluster_log_p = self.cluster_density.log_p(data_point, cell.value)
                
                counts = cell.size
                
                log_p.append(log(counts) + cluster_log_p)

            params = self.base_measure.params
    
            cluster_log_p = self.posterior_density.log_p(data_point, params)
            
            log_p.append(log(alpha) + cluster_log_p)
            
            log_p = log_space_normalise(log_p)    
            
            p = [exp(x) for x in log_p]
            
            new_cell_index = discrete_rvs(p)
            
            if new_cell_index == partition.number_of_cells:
                partition.add_cell(self.base_measure.random())
                
            partition.add_item(item, new_cell_index)
コード例 #3
0
ファイル: partition.py プロジェクト: niharikag/pydp
    def _merge(self, old_cell_i, old_cell_j, data, partition):
        s_i = old_cell_i.items
        s_j = old_cell_j.items

        param_i = old_cell_i.value
        param_j = old_cell_j.value

        param_new = param_i

        forward_log_q = self.proposal_func.log_p(param_new, param_i)
        reverse_log_q = self.proposal_func.log_p(param_i, param_new) + self.proposal_func.log_p(param_j, param_new)

        new_cell = partition.add_cell(param_new)

        for k in s_i:
            old_cell_i.remove_item(k)

            new_cell.add_item(k)

        for k in s_j:
            old_cell_j.remove_item(k)

            new_cell.add_item(k)

        temp_s_i = set([s_i.pop(), ])
        temp_s_j = set([s_j.pop(), ])

        items = s_i + s_j

        shuffle(items)

        for k in items:
            n_i = len(temp_s_i)
            n_j = len(temp_s_j)

            log_p = [
                log(n_i) + self.cluster_density.log_p(data[k], param_i),
                log(n_j) + self.cluster_density.log_p(data[k], param_j)
            ]

            log_p = log_space_normalise(log_p)

            if k in s_i:
                temp_s_i.add(k)

                reverse_log_q += log_p[0]

            else:
                temp_s_j.add(k)

                reverse_log_q += log_p[1]

        partition.remove_empty_cells()

        return new_cell, forward_log_q, reverse_log_q
コード例 #4
0
ファイル: partition.py プロジェクト: Q-KIM/PyClone
    def _merge(self, old_cell_i, old_cell_j, data, partition):
        s_i = old_cell_i.items     
        s_j = old_cell_j.items
        
        param_i = old_cell_i.value
        param_j = old_cell_j.value
        
        param_new = param_i

        forward_log_q = self.proposal_func.log_p(param_new, param_i)
        reverse_log_q = self.proposal_func.log_p(param_i, param_new) + self.proposal_func.log_p(param_j, param_new)
        
        new_cell = partition.add_cell(param_new)
        
        for k in s_i:
            old_cell_i.remove_item(k)
            
            new_cell.add_item(k)
        
        for k in s_j:
            old_cell_j.remove_item(k)
            
            new_cell.add_item(k)

        temp_s_i = set([s_i.pop(), ])
        temp_s_j = set([s_j.pop(), ])
        
        items = s_i + s_j
        
        shuffle(items)
        
        for k in items:
            n_i = len(temp_s_i)
            n_j = len(temp_s_j)
            
            log_p = [
                     log(n_i) + self.cluster_density.log_p(data[k], param_i),
                     log(n_j) + self.cluster_density.log_p(data[k], param_j)
                     ]
            
            log_p = log_space_normalise(log_p)
            
            if k in s_i:
                temp_s_i.add(k)
                
                reverse_log_q += log_p[0]
                
            else:
                temp_s_j.add(k)
                
                reverse_log_q += log_p[1]
        
        partition.remove_empty_cells()
        
        return new_cell, forward_log_q, reverse_log_q
コード例 #5
0
ファイル: clusters.py プロジェクト: worker000000/pyclone
def _compute_posterior(data, density, mesh_size):
    posterior = {}

    for cellular_prevalence in np.linspace(0, 1, mesh_size):
        posterior[cellular_prevalence] = 0

        for data_point in data:
            posterior[cellular_prevalence] += density.log_p(
                data_point, BetaData(cellular_prevalence))

    posterior = dict(
        zip(posterior.keys(), log_space_normalise(posterior.values())))

    return posterior
コード例 #6
0
ファイル: partition.py プロジェクト: niharikag/pydp
    def _split(self, i, j, old_cell, data, partition):
        old_cell.remove_item(i)
        old_cell.remove_item(j)

        param_i = old_cell.value
        param_j = self.proposal_func.random(param_i)

        forward_log_q = self.proposal_func.log_p(param_i, param_i) + self.proposal_func.log_p(param_j, param_i)
        reverse_log_q = self.proposal_func.log_p(param_i, param_i)

        new_cell_i = partition.add_cell(param_i)
        new_cell_j = partition.add_cell(param_j)

        new_cell_i.add_item(i)
        new_cell_j.add_item(j)

        s = old_cell.items
        shuffle(s)

        for k in s:
            old_cell.remove_item(k)

            n_i = new_cell_i.size
            n_j = new_cell_j.size

            log_p = [
                log(n_i) + self.cluster_density.log_p(data[k], param_i),
                log(n_j) + self.cluster_density.log_p(data[k], param_j)
            ]

            log_p = log_space_normalise(log_p)

            p = [exp(x) for x in log_p]

            c_k = discrete_rvs(p)

            if c_k == 0:
                new_cell_i.add_item(k)
            else:
                new_cell_j.add_item(k)

            forward_log_q += log_p[c_k]

        partition.remove_empty_cells()

        return new_cell_i, new_cell_j, forward_log_q, reverse_log_q
コード例 #7
0
ファイル: partition.py プロジェクト: Q-KIM/PyClone
    def _split(self, i, j, old_cell, data, partition):
        old_cell.remove_item(i)        
        old_cell.remove_item(j)
        
        param_i = old_cell.value      
        param_j = self.proposal_func.random(param_i)
        
        forward_log_q = self.proposal_func.log_p(param_i, param_i) + self.proposal_func.log_p(param_j, param_i)
        reverse_log_q = self.proposal_func.log_p(param_i, param_i)
        
        new_cell_i = partition.add_cell(param_i)        
        new_cell_j = partition.add_cell(param_j)
        
        new_cell_i.add_item(i)
        new_cell_j.add_item(j)
        
        s = old_cell.items
        shuffle(s)
        
        for k in s:
            old_cell.remove_item(k)
            
            n_i = new_cell_i.size
            n_j = new_cell_j.size
            
            log_p = [
                     log(n_i) + self.cluster_density.log_p(data[k], param_i),
                     log(n_j) + self.cluster_density.log_p(data[k], param_j)
                     ]
            
            log_p = log_space_normalise(log_p)
            
            p = [exp(x) for x in log_p]
            
            c_k = discrete_rvs(p)
            
            if c_k == 0:                
                new_cell_i.add_item(k)
            else:
                new_cell_j.add_item(k)
            
            forward_log_q += log_p[c_k]
        
        partition.remove_empty_cells()

        return new_cell_i, new_cell_j, forward_log_q, reverse_log_q
コード例 #8
0
ファイル: partition.py プロジェクト: niharikag/pydp
    def sample(self, data, partition, alpha, m=2):
        '''
        Sample a new partition according to algorithm 8 of Neal "Sampling Methods For Dirichlet Process Mixture Models"
        '''
        items = range(len(data))

        shuffle(items)

        for item in items:
            data_point = data[item]

            old_cell_index = partition.get_cell_index(item)

            partition.remove_item(item, old_cell_index)

            if partition.counts[old_cell_index] == 0:
                num_new_tables = m - 1
            else:
                num_new_tables = m

            for _ in range(num_new_tables):
                partition.add_cell(self.base_measure.random())

            log_p = []

            for cell in partition.cells:
                cluster_log_p = self.cluster_density.log_p(data_point, cell.value)

                counts = cell.size

                if counts == 0:
                    counts = alpha / m

                log_p.append(log(counts) + cluster_log_p)

            log_p = log_space_normalise(log_p)

            p = [exp(x) for x in log_p]

            new_cell_index = discrete_rvs(p)

            partition.add_item(item, new_cell_index)

            partition.remove_empty_cells()
コード例 #9
0
ファイル: partition.py プロジェクト: Q-KIM/PyClone
 def sample(self, data, partition, alpha, m=2):
     '''
     Sample a new partition according to algorithm 8 of Neal "Sampling Methods For Dirichlet Process Mixture Models"
     '''
     items = range(len(data))
     
     shuffle(items)
     
     for item in items:
         data_point = data[item]
         
         old_cell_index = partition.labels[item]
         
         partition.remove_item(item, old_cell_index)
         
         if partition.counts[old_cell_index] == 0:
             num_new_tables = m - 1
         else:
             num_new_tables = m
         
         for _ in range(num_new_tables):
             partition.add_cell(self.base_measure.random())
         
         log_p = []
         
         for cell in partition.cells:
             cluster_log_p = self.cluster_density.log_p(data_point, cell.value)
             
             counts = cell.size
             
             if counts == 0:
                 counts = alpha / m
             
             log_p.append(log(counts) + cluster_log_p)
 
         log_p = log_space_normalise(log_p)
         
         p = [exp(x) for x in log_p]
         
         new_cell_index = discrete_rvs(p)
         
         partition.add_item(item, new_cell_index)
         
         partition.remove_empty_cells()
コード例 #10
0
ファイル: partition.py プロジェクト: niharikag/pydp
    def sample(self, data, partition, alpha):
        n = partition.number_of_items

        for item, data_point in enumerate(data):
            old_cluster_label = partition.labels[item]
            old_value = partition.item_values[item]

            partition.remove_item(item, old_cluster_label)

            if partition.counts[old_cluster_label] == 0:
                p = [x / (n - 1) for x in partition.counts]

                new_cluster_label = discrete_rvs(p)

                new_value = partition.cell_values[new_cluster_label]

                old_ll = self.cluster_density.log_p(data_point, old_value)
                new_ll = self.cluster_density.log_p(data_point, new_value)

                log_ratio = log(n - 1) - log(alpha) + new_ll - old_ll

                u = uniform_rvs(0, 1)

                if log_ratio >= log(u):
                    partition.add_item(item, new_cluster_label)
                else:
                    partition.add_item(item, old_cluster_label)

            else:
                new_value = self.base_measure.random()

                old_ll = self.cluster_density.log_p(data_point, old_value)
                new_ll = self.cluster_density.log_p(data_point, new_value)

                log_ratio = log(alpha) - log(n - 1) + new_ll - old_ll

                u = uniform_rvs(0, 1)

                if log_ratio >= log(u):
                    partition.add_cell(new_value)

                    cell = partition.get_cell_by_value(new_value)

                    cell.add_item(item)
                else:
                    partition.add_item(item, old_cluster_label)

        partition.remove_empty_cells()

        for item, data_point in enumerate(data):
            old_cluster_label = partition.labels[item]

            if partition.cells[old_cluster_label].size == 1:
                continue

            partition.remove_item(item, old_cluster_label)

            log_p = []

            for cell in partition.cells:
                cluster_log_p = self.cluster_density.log_p(data_point, cell.value)

                counts = cell.size

                log_p.append(log(counts) + cluster_log_p)

            log_p = log_space_normalise(log_p)

            p = [exp(x) for x in log_p]

            new_cluster_label = discrete_rvs(p)

            partition.add_item(item, new_cluster_label)

        partition.remove_empty_cells()
コード例 #11
0
ファイル: partition.py プロジェクト: Q-KIM/PyClone
 def sample(self, data, partition, alpha):
     n = partition.number_of_items
     
     for item, data_point in enumerate(data):
         old_cluster_label = partition.labels[item]
         old_value = partition.item_values[item]
         
         partition.remove_item(item, old_cluster_label)
         
         if partition.counts[old_cluster_label] == 0:
             p = [x / (n - 1) for x in partition.counts]
             
             new_cluster_label = discrete_rvs(p)
             
             new_value = partition.cell_values[new_cluster_label]
             
             old_ll = self.cluster_density.log_p(data_point, old_value)
             new_ll = self.cluster_density.log_p(data_point, new_value)
             
             log_ratio = log(n - 1) - log(alpha) + new_ll - old_ll
             
             u = uniform_rvs(0, 1)
             
             if log_ratio >= log(u):
                 partition.add_item(item, new_cluster_label)
             else:
                 partition.add_item(item, old_cluster_label)
         
         else:
             new_value = self.base_measure.random()
             
             old_ll = self.cluster_density.log_p(data_point, old_value)
             new_ll = self.cluster_density.log_p(data_point, new_value)
             
             log_ratio = log(alpha) - log(n - 1) + new_ll - old_ll
             
             u = uniform_rvs(0, 1)
             
             if log_ratio >= log(u):
                 partition.add_cell(new_value)
                 
                 cell = partition.get_cell_by_value(new_value)
                 
                 cell.add_item(item)
             else:
                 partition.add_item(item, old_cluster_label)
     
     partition.remove_empty_cells()
     
     for item, data_point in enumerate(data):
         old_cluster_label = partition.labels[item]
         
         if partition.cells[old_cluster_label].size == 1:
             continue
         
         partition.remove_item(item, old_cluster_label)
         
         log_p = []
         
         for cell in partition.cells:
             cluster_log_p = self.cluster_density.log_p(data_point, cell.value)
             
             counts = cell.size
             
             log_p.append(log(counts) + cluster_log_p)
 
         log_p = log_space_normalise(log_p)
         
         p = [exp(x) for x in log_p]
         
         new_cluster_label = discrete_rvs(p)
         
         partition.add_item(item, new_cluster_label)
     
     partition.remove_empty_cells()