def _init_partition(self, base_measure): self.partition = Partition() for item, _ in enumerate(self.data): self.partition.add_cell(base_measure.random()) self.partition.add_item(item, item)
def initialise_partition(self, data, init_method): ''' Args: data : (list) Data points. Kwargs: method : (str) Initialisation method to use. - 'separate' will allocate each data point to a separate partition. - 'together' will allocate all data points to the same partition. ''' self.partition = Partition() if init_method == 'separate': for item, _ in enumerate(data): self.partition.add_cell( self.partition_sampler.base_measure.random()) self.partition.add_item(item, item) elif init_method == 'together': self.partition.add_cell( self.partition_sampler.base_measure.random()) for item, _ in enumerate(data): self.partition.add_item(item, 0)
def draw_from_prior(base_measure, size): alpha = gamma_rvs(1, 1) # partition = sample_from_crp(alpha, size, base_measure) partition = Partition() partition.add_cell(base_measure.random()) for item in range(size): partition.add_item(item, 0) return alpha, partition
def initialise_partition(self, data, init_method): ''' Args: data : (list) Data points. Kwargs: method : (str) Initialisation method to use. - 'separate' will allocate each data point to a separate partition. - 'together' will allocate all data points to the same partition. ''' self.partition = Partition() if init_method == 'separate': for item, _ in enumerate(data): self.partition.add_cell(self.partition_sampler.base_measure.random()) self.partition.add_item(item, item) elif init_method == 'together': self.partition.add_cell(self.partition_sampler.base_measure.random()) for item, _ in enumerate(data): self.partition.add_item(item, 0)
def sample_from_crp(alpha, size, base_measure): labels = [] values = [] tables = [] # Seat the first customer tables.append([ 0, ]) labels.append(0) values.append(base_measure.random()) for customer in range(1, size): p = _get_table_probabilities(tables, alpha) table_id = discrete_rvs(p) if table_id == len(tables): tables.append([ customer, ]) values.append(base_measure.random()) else: tables[table_id].append(customer) labels.append(table_id) partition = Partition() for v in values: partition.add_cell(v) for item, cell_index in enumerate(labels): partition.add_item(item, cell_index) return partition
class DirichletProcessSampler(object): def __init__(self, atom_sampler, partition_sampler, alpha=1.0, alpha_priors=None, global_params_sampler=None): self.atom_sampler = atom_sampler self.partition_sampler = partition_sampler self.alpha = alpha if alpha_priors is None: self.update_alpha = False else: self.update_alpha = True self.concentration_sampler = GammaPriorConcentrationSampler(alpha_priors['shape'], alpha_priors['rate']) if global_params_sampler is None: self.update_global_params = False else: self.update_global_params = True self.global_params_sampler = global_params_sampler self.num_iters = 0 @property def state(self): return { 'alpha' : self.alpha, 'labels' : self.partition.labels, 'params' : [param for param in self.partition.item_values], 'global_params' : self.atom_sampler.cluster_density.params } def initialise_partition(self, data, init_method): ''' Args: data : (list) Data points. Kwargs: method : (str) Initialisation method to use. - 'separate' will allocate each data point to a separate partition. - 'together' will allocate all data points to the same partition. ''' self.partition = Partition() if init_method == 'separate': for item, _ in enumerate(data): self.partition.add_cell(self.partition_sampler.base_measure.random()) self.partition.add_item(item, item) elif init_method == 'together': self.partition.add_cell(self.partition_sampler.base_measure.random()) for item, _ in enumerate(data): self.partition.add_item(item, 0) def sample(self, data, trace, num_iters, init_method='separate', print_freq=100): self.initialise_partition(data, init_method) for i in range(num_iters): if i % print_freq == 0: print self.num_iters, self.partition.number_of_cells, self.alpha if self.update_global_params: params = self.atom_sampler.cluster_density.params if isinstance(params, OrderedDict): print ','.join([str(x[0]) for x in self.atom_sampler.cluster_density.params.values()]) elif isinstance(params, tuple): print params[0] else: raise Exception('Object type {0} is not a valid cluster parameter'.format(type(params))) self.interactive_sample(data) trace.update(self.state) self.num_iters += 1 def interactive_sample(self, data): if self.update_alpha: self.alpha = self.concentration_sampler.sample(self.alpha, self.partition.number_of_cells, self.partition.number_of_items) self.partition_sampler.sample(data, self.partition, self.alpha) self.atom_sampler.sample(data, self.partition) if self.update_global_params: self.global_params_sampler.sample(data, self.partition)
def initialise_partition(self, data): self.partition = Partition() for item, _ in enumerate(data): self.partition.add_cell(self.base_measure.random()) self.partition.add_item(item, item)
class DirichletProcessSampler(object): def __init__(self, cluster_density, alpha=None, alpha_shape=None, alpha_rate=None): self.base_measure = PyCloneBaseMeasure() self.partition_sampler = AuxillaryParameterPartitionSampler( self.base_measure, cluster_density) self.atom_sampler = BaseMeasureAtomSampler(self.base_measure, cluster_density) if alpha is None: self.alpha = 1 self.update_alpha = True self.concentration_sampler =\ GammaPriorConcentrationSampler(alpha_shape, alpha_rate) else: self.alpha = alpha self.update_alpha = False self.num_iters = 0 @property def state(self): return { 'alpha': self.alpha, 'cellular_frequencies': [param.phi for param in self.partition.item_values], 'labels': self.partition.labels, 'phi': [param.phi for param in self.partition.cell_values] } def initialise_partition(self, data): self.partition = Partition() for item, _ in enumerate(data): self.partition.add_cell(self.base_measure.random()) self.partition.add_item(item, item) def sample(self, data, results_db, num_iters, print_freq=100): self.initialise_partition(data) for i in range(num_iters): if i % print_freq == 0: print self.num_iters, self.partition.number_of_cells, self.alpha self.interactive_sample(data) results_db.update_trace(self.state) self.num_iters += 1 def interactive_sample(self, data): if self.update_alpha: self.alpha = self.concentration_sampler.sample( self.alpha, self.partition.number_of_cells, self.partition.number_of_items) self.partition_sampler.sample(data, self.partition, self.alpha) self.atom_sampler.sample(data, self.partition) def _init_partition(self, base_measure): self.partition = Partition() for item, _ in enumerate(self.data): self.partition.add_cell(base_measure.random()) self.partition.add_item(item, item)
for i in range(1000): x = poisson_rvs(100) data.append(PoissonData(x)) for i in range(50): x = poisson_rvs(25) data.append(PoissonData(x)) alpha = 1 base_measure = GammaBaseMeasure(1, 1) cluster_density = PoissonDensity() partition = Partition() for item, data_point in enumerate(data): partition.add_cell(base_measure.random()) partition.add_item(item, item) concentration_sampler = GammaPriorConcentrationSampler(1, 1) posterior_density = NegativeBinomialDensity() partition_sampler = MarginalGibbsPartitionSampler(base_measure, cluster_density, posterior_density) #partition_sampler = AuxillaryParameterPartitionSampler(base_measure, cluster_density) atom_sampler = GammaPoissonGibbsAtomSampler(base_measure, cluster_density)
class DirichletProcessSampler(object): def __init__(self, atom_sampler, partition_sampler, alpha=1.0, alpha_priors=None, global_params_sampler=None): self.atom_sampler = atom_sampler self.partition_sampler = partition_sampler self.alpha = alpha if alpha_priors is None: self.update_alpha = False else: self.update_alpha = True self.concentration_sampler = GammaPriorConcentrationSampler( alpha_priors['shape'], alpha_priors['rate']) if global_params_sampler is None: self.update_global_params = False else: self.update_global_params = True self.global_params_sampler = global_params_sampler self.num_iters = 0 @property def state(self): return { 'alpha': self.alpha, 'labels': self.partition.labels, 'params': [param for param in self.partition.item_values], 'global_params': self.atom_sampler.cluster_density.params } def initialise_partition(self, data, init_method): ''' Args: data : (list) Data points. Kwargs: method : (str) Initialisation method to use. - 'separate' will allocate each data point to a separate partition. - 'together' will allocate all data points to the same partition. ''' self.partition = Partition() if init_method == 'separate': for item, _ in enumerate(data): self.partition.add_cell( self.partition_sampler.base_measure.random()) self.partition.add_item(item, item) elif init_method == 'together': self.partition.add_cell( self.partition_sampler.base_measure.random()) for item, _ in enumerate(data): self.partition.add_item(item, 0) def sample(self, data, trace, num_iters, init_method='separate', print_freq=100): self.initialise_partition(data, init_method) for i in range(num_iters): if i % print_freq == 0: print self.num_iters, self.partition.number_of_cells, self.alpha if self.update_global_params: params = self.atom_sampler.cluster_density.params if isinstance(params, OrderedDict): print ','.join([ str(x[0]) for x in self.atom_sampler.cluster_density.params.values() ]) elif isinstance(params, tuple): print params[0] else: raise Exception( 'Object type {0} is not a valid cluster parameter'. format(type(params))) self.interactive_sample(data) trace.update(self.state) self.num_iters += 1 def interactive_sample(self, data): if self.update_alpha: self.alpha = self.concentration_sampler.sample( self.alpha, self.partition.number_of_cells, self.partition.number_of_items) self.partition_sampler.sample(data, self.partition, self.alpha) self.atom_sampler.sample(data, self.partition) if self.update_global_params: self.global_params_sampler.sample(data, self.partition)