Ejemplo n.º 1
0
    def _init_partition(self, base_measure):
        self.partition = Partition()

        for item, _ in enumerate(self.data):
            self.partition.add_cell(base_measure.random())

            self.partition.add_item(item, item)
Ejemplo n.º 2
0
    def initialise_partition(self, data, init_method):
        '''
        Args:
            data : (list) Data points.
            
        Kwargs:
            method : (str) Initialisation method to use. 
                           - 'separate' will allocate each data point to a separate partition.
                           - 'together' will allocate all data points to the same partition.
        '''

        self.partition = Partition()

        if init_method == 'separate':
            for item, _ in enumerate(data):
                self.partition.add_cell(
                    self.partition_sampler.base_measure.random())

                self.partition.add_item(item, item)

        elif init_method == 'together':
            self.partition.add_cell(
                self.partition_sampler.base_measure.random())

            for item, _ in enumerate(data):
                self.partition.add_item(item, 0)
Ejemplo n.º 3
0
def draw_from_prior(base_measure, size):
    alpha = gamma_rvs(1, 1)

    #    partition = sample_from_crp(alpha, size, base_measure)

    partition = Partition()

    partition.add_cell(base_measure.random())

    for item in range(size):
        partition.add_item(item, 0)

    return alpha, partition
Ejemplo n.º 4
0
Archivo: dp.py Proyecto: Q-KIM/PyClone
 def initialise_partition(self, data, init_method):
     '''
     Args:
         data : (list) Data points.
         
     Kwargs:
         method : (str) Initialisation method to use. 
                        - 'separate' will allocate each data point to a separate partition.
                        - 'together' will allocate all data points to the same partition.
     '''
     
     self.partition = Partition()
     
     if init_method == 'separate':
         for item, _ in enumerate(data):
             self.partition.add_cell(self.partition_sampler.base_measure.random())
             
             self.partition.add_item(item, item)
     
     elif init_method == 'together':
         self.partition.add_cell(self.partition_sampler.base_measure.random())
         
         for item, _ in enumerate(data):                
             self.partition.add_item(item, 0)
Ejemplo n.º 5
0
def sample_from_crp(alpha, size, base_measure):
    labels = []
    values = []

    tables = []

    # Seat the first customer
    tables.append([
        0,
    ])

    labels.append(0)
    values.append(base_measure.random())

    for customer in range(1, size):
        p = _get_table_probabilities(tables, alpha)

        table_id = discrete_rvs(p)

        if table_id == len(tables):
            tables.append([
                customer,
            ])

            values.append(base_measure.random())
        else:
            tables[table_id].append(customer)

        labels.append(table_id)

    partition = Partition()

    for v in values:
        partition.add_cell(v)

    for item, cell_index in enumerate(labels):
        partition.add_item(item, cell_index)

    return partition
Ejemplo n.º 6
0
Archivo: dp.py Proyecto: Q-KIM/PyClone
class DirichletProcessSampler(object):
    def __init__(self, atom_sampler, partition_sampler, alpha=1.0, alpha_priors=None, global_params_sampler=None):
        self.atom_sampler = atom_sampler
        
        self.partition_sampler = partition_sampler
        
        self.alpha = alpha          
        
        if alpha_priors is None:
            self.update_alpha = False

        else:
            self.update_alpha = True
            
            self.concentration_sampler = GammaPriorConcentrationSampler(alpha_priors['shape'], 
                                                                        alpha_priors['rate']) 
        
        if global_params_sampler is None:
            self.update_global_params = False
        
        else:
            self.update_global_params = True
            
            self.global_params_sampler = global_params_sampler
        
        self.num_iters = 0
    
    @property
    def state(self):
        return {
                'alpha' : self.alpha,
                'labels' : self.partition.labels,
                'params' : [param for param in self.partition.item_values],
                'global_params' : self.atom_sampler.cluster_density.params
                }
    
    def initialise_partition(self, data, init_method):
        '''
        Args:
            data : (list) Data points.
            
        Kwargs:
            method : (str) Initialisation method to use. 
                           - 'separate' will allocate each data point to a separate partition.
                           - 'together' will allocate all data points to the same partition.
        '''
        
        self.partition = Partition()
        
        if init_method == 'separate':
            for item, _ in enumerate(data):
                self.partition.add_cell(self.partition_sampler.base_measure.random())
                
                self.partition.add_item(item, item)
        
        elif init_method == 'together':
            self.partition.add_cell(self.partition_sampler.base_measure.random())
            
            for item, _ in enumerate(data):                
                self.partition.add_item(item, 0)
                 
    
    def sample(self, data, trace, num_iters, init_method='separate', print_freq=100):
        self.initialise_partition(data, init_method)
        
        for i in range(num_iters):
            if i % print_freq == 0:
                print self.num_iters, self.partition.number_of_cells, self.alpha 
                
                if self.update_global_params:
                    params = self.atom_sampler.cluster_density.params
                    
                    if isinstance(params, OrderedDict):
                        print ','.join([str(x[0]) for x in self.atom_sampler.cluster_density.params.values()])
                    
                    elif isinstance(params, tuple):
                        print params[0]
                    
                    else:
                        raise Exception('Object type {0} is not a valid cluster parameter'.format(type(params)))
            
            self.interactive_sample(data)
            
            trace.update(self.state)
            
            self.num_iters += 1
    
    def interactive_sample(self, data):
        if self.update_alpha:
            self.alpha = self.concentration_sampler.sample(self.alpha,
                                                           self.partition.number_of_cells,
                                                           self.partition.number_of_items)
        
        self.partition_sampler.sample(data, self.partition, self.alpha)
        
        self.atom_sampler.sample(data, self.partition)
        
        if self.update_global_params:
            self.global_params_sampler.sample(data, self.partition)
Ejemplo n.º 7
0
    def initialise_partition(self, data):
        self.partition = Partition()

        for item, _ in enumerate(data):
            self.partition.add_cell(self.base_measure.random())
            self.partition.add_item(item, item)
Ejemplo n.º 8
0
class DirichletProcessSampler(object):
    def __init__(self,
                 cluster_density,
                 alpha=None,
                 alpha_shape=None,
                 alpha_rate=None):
        self.base_measure = PyCloneBaseMeasure()

        self.partition_sampler = AuxillaryParameterPartitionSampler(
            self.base_measure, cluster_density)

        self.atom_sampler = BaseMeasureAtomSampler(self.base_measure,
                                                   cluster_density)

        if alpha is None:
            self.alpha = 1

            self.update_alpha = True

            self.concentration_sampler =\
                GammaPriorConcentrationSampler(alpha_shape, alpha_rate)
        else:
            self.alpha = alpha

            self.update_alpha = False

        self.num_iters = 0

    @property
    def state(self):
        return {
            'alpha':
            self.alpha,
            'cellular_frequencies':
            [param.phi for param in self.partition.item_values],
            'labels':
            self.partition.labels,
            'phi': [param.phi for param in self.partition.cell_values]
        }

    def initialise_partition(self, data):
        self.partition = Partition()

        for item, _ in enumerate(data):
            self.partition.add_cell(self.base_measure.random())
            self.partition.add_item(item, item)

    def sample(self, data, results_db, num_iters, print_freq=100):
        self.initialise_partition(data)

        for i in range(num_iters):
            if i % print_freq == 0:
                print self.num_iters, self.partition.number_of_cells, self.alpha

            self.interactive_sample(data)

            results_db.update_trace(self.state)

            self.num_iters += 1

    def interactive_sample(self, data):
        if self.update_alpha:
            self.alpha = self.concentration_sampler.sample(
                self.alpha, self.partition.number_of_cells,
                self.partition.number_of_items)

        self.partition_sampler.sample(data, self.partition, self.alpha)

        self.atom_sampler.sample(data, self.partition)

    def _init_partition(self, base_measure):
        self.partition = Partition()

        for item, _ in enumerate(self.data):
            self.partition.add_cell(base_measure.random())

            self.partition.add_item(item, item)
Ejemplo n.º 9
0
for i in range(1000):
    x = poisson_rvs(100)
    data.append(PoissonData(x))

for i in range(50):
    x = poisson_rvs(25)
    data.append(PoissonData(x))

alpha = 1

base_measure = GammaBaseMeasure(1, 1)

cluster_density = PoissonDensity()

partition = Partition()

for item, data_point in enumerate(data):
    partition.add_cell(base_measure.random())
    partition.add_item(item, item)

concentration_sampler = GammaPriorConcentrationSampler(1, 1)

posterior_density = NegativeBinomialDensity()
partition_sampler = MarginalGibbsPartitionSampler(base_measure,
                                                  cluster_density,
                                                  posterior_density)

#partition_sampler = AuxillaryParameterPartitionSampler(base_measure, cluster_density)

atom_sampler = GammaPoissonGibbsAtomSampler(base_measure, cluster_density)
Ejemplo n.º 10
0
class DirichletProcessSampler(object):
    def __init__(self,
                 atom_sampler,
                 partition_sampler,
                 alpha=1.0,
                 alpha_priors=None,
                 global_params_sampler=None):
        self.atom_sampler = atom_sampler

        self.partition_sampler = partition_sampler

        self.alpha = alpha

        if alpha_priors is None:
            self.update_alpha = False

        else:
            self.update_alpha = True

            self.concentration_sampler = GammaPriorConcentrationSampler(
                alpha_priors['shape'], alpha_priors['rate'])

        if global_params_sampler is None:
            self.update_global_params = False

        else:
            self.update_global_params = True

            self.global_params_sampler = global_params_sampler

        self.num_iters = 0

    @property
    def state(self):
        return {
            'alpha': self.alpha,
            'labels': self.partition.labels,
            'params': [param for param in self.partition.item_values],
            'global_params': self.atom_sampler.cluster_density.params
        }

    def initialise_partition(self, data, init_method):
        '''
        Args:
            data : (list) Data points.
            
        Kwargs:
            method : (str) Initialisation method to use. 
                           - 'separate' will allocate each data point to a separate partition.
                           - 'together' will allocate all data points to the same partition.
        '''

        self.partition = Partition()

        if init_method == 'separate':
            for item, _ in enumerate(data):
                self.partition.add_cell(
                    self.partition_sampler.base_measure.random())

                self.partition.add_item(item, item)

        elif init_method == 'together':
            self.partition.add_cell(
                self.partition_sampler.base_measure.random())

            for item, _ in enumerate(data):
                self.partition.add_item(item, 0)

    def sample(self,
               data,
               trace,
               num_iters,
               init_method='separate',
               print_freq=100):
        self.initialise_partition(data, init_method)

        for i in range(num_iters):
            if i % print_freq == 0:
                print self.num_iters, self.partition.number_of_cells, self.alpha

                if self.update_global_params:
                    params = self.atom_sampler.cluster_density.params

                    if isinstance(params, OrderedDict):
                        print ','.join([
                            str(x[0]) for x in
                            self.atom_sampler.cluster_density.params.values()
                        ])

                    elif isinstance(params, tuple):
                        print params[0]

                    else:
                        raise Exception(
                            'Object type {0} is not a valid cluster parameter'.
                            format(type(params)))

            self.interactive_sample(data)

            trace.update(self.state)

            self.num_iters += 1

    def interactive_sample(self, data):
        if self.update_alpha:
            self.alpha = self.concentration_sampler.sample(
                self.alpha, self.partition.number_of_cells,
                self.partition.number_of_items)

        self.partition_sampler.sample(data, self.partition, self.alpha)

        self.atom_sampler.sample(data, self.partition)

        if self.update_global_params:
            self.global_params_sampler.sample(data, self.partition)