def sample(self, observations, steps, epsilon_init, n_samples=10000, n_samples_per_param=1, epsilon_percentile=10, covFactor=2, full_output=0, journal_file=None, journal_file_save=None): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : list A list, containing lists describing the observed data sets steps : integer Number of iterations in the sequential algoritm ("generations") epsilon_init : numpy.ndarray An array of proposed values of epsilon to be used at each steps. Can be supplied A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be used as the threshold in evry steps. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. epsilon_percentile : float, optional A value between [0, 100]. The default value is 10. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2 as considered in [1]. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. journal_file: str, optional Filename of a journal file to read an already saved journal file, from which the first iteration will start. The default value is None. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ self.accepted_parameters_manager.broadcast(self.backend, observations) self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param if (journal_file is None): journal = Journal(full_output) journal.configuration["type_model"] = [ type(model).__name__ for model in self.model ] journal.configuration["type_dist_func"] = type( self.distance).__name__ journal.configuration["n_samples"] = self.n_samples journal.configuration[ "n_samples_per_param"] = self.n_samples_per_param journal.configuration["steps"] = steps journal.configuration["epsilon_percentile"] = epsilon_percentile else: journal = Journal.fromFile(journal_file) accepted_parameters = None accepted_weights = None accepted_cov_mats = None # Define epsilon_arr if len(epsilon_init) == steps: epsilon_arr = epsilon_init else: if len(epsilon_init) == 1: epsilon_arr = [None] * steps epsilon_arr[0] = epsilon_init else: raise ValueError( "The length of epsilon_init can only be equal to 1 or steps." ) # main PMCABC algorithm self.logger.info("Starting PMC iterations") for aStep in range(steps): self.logger.debug("iteration {} of PMC algorithm".format(aStep)) if (aStep == 0 and journal_file is not None): accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # 3: calculate covariance self.logger.info("Calculateing covariance matrix") new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) # Since each entry of new_cov_mats is a numpy array, we can multiply like this accepted_cov_mats = [ covFactor * new_cov_mat for new_cov_mat in new_cov_mats ] seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) rng_arr = np.array( [np.random.RandomState(seed) for seed in seed_arr]) rng_pds = self.backend.parallelize(rng_arr) # 0: update remotely required variables # print("INFO: Broadcasting parameters.") self.logger.info("Broadcasting parameters") self.epsilon = epsilon_arr[aStep] self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters, accepted_weights, accepted_cov_mats) # 1: calculate resample parameters # print("INFO: Resampling parameters") self.logger.info("Resampling parameters") params_and_dists_and_counter_pds = self.backend.map( self._resample_parameter, rng_pds) params_and_dists_and_counter = self.backend.collect( params_and_dists_and_counter_pds) new_parameters, distances, counter = [ list(t) for t in zip(*params_and_dists_and_counter) ] new_parameters = np.array(new_parameters) distances = np.array(distances) for count in counter: self.simulation_counter += count # Compute epsilon for next step # print("INFO: Calculating acceptance threshold (epsilon).") self.logger.info("Calculating acceptances threshold") if aStep < steps - 1: if epsilon_arr[aStep + 1] == None: epsilon_arr[aStep + 1] = np.percentile( distances, epsilon_percentile) else: epsilon_arr[aStep + 1] = np.max([ np.percentile(distances, epsilon_percentile), epsilon_arr[aStep + 1] ]) # 2: calculate weights for new parameters self.logger.info("Calculating weights") new_parameters_pds = self.backend.parallelize(new_parameters) self.logger.info("Calculate weights") new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds) new_weights = np.array( self.backend.collect(new_weights_pds)).reshape(-1, 1) sum_of_weights = 0.0 for w in new_weights: sum_of_weights += w new_weights = new_weights / sum_of_weights # The calculation of cov_mats needs the new weights and new parameters self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=new_parameters, accepted_weights=new_weights) # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, # instead of collecting them at each kernel in each step kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # 3: calculate covariance self.logger.info("Calculating covariance matrix") new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) # Since each entry of new_cov_mats is a numpy array, we can multiply like this new_cov_mats = [ covFactor * new_cov_mat for new_cov_mat in new_cov_mats ] # 4: Update the newly computed values accepted_parameters = new_parameters accepted_weights = new_weights accepted_cov_mats = new_cov_mats self.logger.info("Save configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append(self.simulation_counter) print(journal_file_save) if journal_file_save is not None: if full_output == 1: journal.save( journal_file_save + '.jrl') # avoid writing a lot of different files. else: journal.save(journal_file_save + '_' + str(aStep) + '.jrl') # Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon_arr return journal
def sample(self, observations, steps, epsilon, n_samples=10000, n_samples_per_param=1, beta=2, delta=0.2, v=0.3, ar_cutoff=0.1, resample=None, n_update=None, full_output=0, journal_file=None): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : list A list, containing lists describing the observed data sets steps : integer Number of maximum iterations in the sequential algoritm ("generations") epsilon : numpy.float A proposed value of threshold to start with. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. beta : numpy.float Tuning parameter of SABC, default value is 2. delta : numpy.float Tuning parameter of SABC, default value is 0.2. v : numpy.float, optional Tuning parameter of SABC, The default value is 0.3. ar_cutoff : numpy.float Acceptance ratio cutoff, The default value is 0.1. resample: int, optional Resample after this many acceptance, The default value is None which takes value inside n_samples n_update: int, optional Number of perturbed parameters at each step, The default value is None which takes value inside n_samples full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. journal_file: str, optional Filename of a journal file to read an already saved journal file, from which the first iteration will start. The default value is None. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ global broken_preemptively self.sample_from_prior(rng=self.rng) self.accepted_parameters_manager.broadcast(self.backend, observations) self.epsilon = epsilon self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param if (journal_file is None): journal = Journal(full_output) journal.configuration["type_model"] = [ type(model).__name__ for model in self.model ] journal.configuration["type_dist_func"] = type( self.distance).__name__ journal.configuration["type_kernel_func"] = type(self.kernel) journal.configuration["n_samples"] = self.n_samples journal.configuration[ "n_samples_per_param"] = self.n_samples_per_param journal.configuration["beta"] = beta journal.configuration["delta"] = delta journal.configuration["v"] = v journal.configuration["ar_cutoff"] = ar_cutoff journal.configuration["resample"] = resample journal.configuration["n_update"] = n_update journal.configuration["full_output"] = full_output else: journal = Journal.fromFile(journal_file) accepted_parameters = None distances = np.zeros(shape=(n_samples, )) smooth_distances = np.zeros(shape=(n_samples, )) accepted_weights = np.ones(shape=(n_samples, 1)) all_distances = None accepted_cov_mat = None if resample == None: resample = n_samples if n_update == None: n_update = n_samples sample_array = np.ones(shape=(steps, )) sample_array[0] = n_samples sample_array[1:] = n_update ## Acceptance counter to determine the resampling step accept = 0 samples_until = 0 ## Counter whether broken preemptively broken_preemptively = False for aStep in range(0, steps): self.logger.debug("step {}".format(aStep)) if (aStep == 0 and journal_file is not None): accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) #Broadcast Accepted parameters and Accedpted weights self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) #Broadcast Accepted Kernel parameters self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: if not (new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: accepted_cov_mats.append( (beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1, 1)) # Broadcast Accepted Covariance Matrix self.accepted_parameters_manager.update_broadcast( self.backend, accepted_cov_mats=accepted_cov_mats) # main SABC algorithm # print("INFO: Initialization of SABC") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(sample_array[aStep]), dtype=np.uint32) rng_arr = np.array( [np.random.RandomState(seed) for seed in seed_arr]) index_arr = self.rng.randint(0, self.n_samples, size=int(sample_array[aStep]), dtype=np.uint32) data_arr = [] for i in range(len(rng_arr)): data_arr.append([rng_arr[i], index_arr[i]]) data_pds = self.backend.parallelize(data_arr) # 0: update remotely required variables self.logger.info("Broadcasting parameters") self.epsilon = epsilon # 1: Calculate parameters self.logger.info("Initial accepted parameters") params_and_dists_pds = self.backend.map(self._accept_parameter, data_pds) params_and_dists = self.backend.collect(params_and_dists_pds) new_parameters, filenames, index, acceptance, counter = [ list(t) for t in zip(*params_and_dists) ] # Keeping counter of number of simulations for count in counter: self.simulation_counter += count #new_parameters = np.array(new_parameters) index = np.array(index) acceptance = np.array(acceptance) # Reading all_distances at Initial step if aStep == 0: index = np.linspace(0, n_samples - 1, n_samples).astype(int).reshape( n_samples, ) accept = 0 # Initialize/Update the accepted parameters and their corresponding distances if accepted_parameters is None: accepted_parameters = new_parameters else: for ind in range(len(acceptance)): if acceptance[ind] == 1: accepted_parameters[index[ind]] = new_parameters[ind] # 1.5: Update the distance and recompute distances from observed data self.logger.info("Updating distance") distances = self.distance.distances[0].update( filenames, self.accepted_parameters_manager.observations_bds.value(), self.backend) self._update_broadcasts(distances=distances) # 2: Compute epsilon U = self._average_redefined_distance(distances, epsilon * (1 - delta)) epsilon = self._schedule(U, v) #U = np.mean(distances) #epsilon = np.percentile(distances, .1 * 100) print(epsilon) # 4: Show progress and if acceptance rate smaller than a value break the iteration if aStep > 0: accept = accept + np.sum(acceptance) samples_until = samples_until + sample_array[aStep] acceptance_rate = accept / samples_until msg = ( "updates= {:.2f}, epsilon= {}, u.mean={:e}, acceptance rate: {:.2f}" .format( np.sum(sample_array[1:aStep + 1]) / np.sum(sample_array[1:]) * 100, epsilon, U, acceptance_rate)) self.logger.debug(msg) if acceptance_rate < ar_cutoff: broken_preemptively = True self.logger.debug( "Stopping as acceptance rate is lower than cutoff") break # 5: Resampling if number of accepted particles greater than resample if accept >= resample and U > 1e-100: self.logger.info("Weighted resampling") weight = np.exp(-distances * delta / U) weight = weight / sum(weight) index_resampled = self.rng.choice(np.arange(n_samples, dtype=int), n_samples, replace=1, p=weight) accepted_parameters = [ accepted_parameters[i] for i in index_resampled ] distances = distances[index_resampled] ## Update U and epsilon: # # epsilon = epsilon * (1 - delta) # U = np.mean(distances) # # epsilon = self._schedule(U, v) # epsilon = np.percentile(distances, .1 * 100) U = self._average_redefined_distance(distances, epsilon * (1 - delta)) epsilon = self._schedule(U, v) ## Print effective sampling size print('Resampling: Effective sampling size: ', 1 / sum(pow(weight / sum(weight), 2))) accept = 0 samples_until = 0 ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix # Broadcast Accepted parameters and add to journal self.accepted_parameters_manager.update_broadcast( self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) # Compute Accepetd Kernel parameters and broadcast them kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: if not (new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: accepted_cov_mats.append( (beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1, 1)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_cov_mats=accepted_cov_mats) if (full_output == 1 and aStep <= steps - 1): ## Saving intermediate configuration to output journal. print('Saving after resampling') journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append( self.simulation_counter) else: ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix # Broadcast Accepted parameters self.accepted_parameters_manager.update_broadcast( self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) # Compute Accepetd Kernel parameters and broadcast them kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: if not (new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: accepted_cov_mats.append( (beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1, 1)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_cov_mats=accepted_cov_mats) if (full_output == 1 and aStep <= steps - 1): ## Saving intermediate configuration to output journal. journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append( self.simulation_counter) # Add epsilon_arr, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if (full_output == 0) or (full_output == 1 and broken_preemptively and aStep <= steps - 1): journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append(self.simulation_counter) journal.configuration["steps"] = aStep + 1 journal.configuration["epsilon"] = epsilon return journal