def test_load_and_save(self): params1 = np.zeros((2, 4)) weights1 = np.zeros((2, 4)) journal = Journal(0) # journal.add_parameters(params1) journal.add_weights(weights1) journal.save('journal_tests_testfile.pkl') new_journal = Journal.fromFile('journal_tests_testfile.pkl') # np.testing.assert_equal(journal.parameters, new_journal.parameters) np.testing.assert_equal(journal.weights, new_journal.weights)
def sample(self, observations, steps, epsilon_init, n_samples=10000, n_samples_per_param=1, epsilon_percentile=10, covFactor=2, full_output=0, journal_file=None, journal_file_save=None): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : list A list, containing lists describing the observed data sets steps : integer Number of iterations in the sequential algoritm ("generations") epsilon_init : numpy.ndarray An array of proposed values of epsilon to be used at each steps. Can be supplied A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be used as the threshold in evry steps. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. epsilon_percentile : float, optional A value between [0, 100]. The default value is 10. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2 as considered in [1]. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. journal_file: str, optional Filename of a journal file to read an already saved journal file, from which the first iteration will start. The default value is None. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ self.accepted_parameters_manager.broadcast(self.backend, observations) self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param if (journal_file is None): journal = Journal(full_output) journal.configuration["type_model"] = [ type(model).__name__ for model in self.model ] journal.configuration["type_dist_func"] = type( self.distance).__name__ journal.configuration["n_samples"] = self.n_samples journal.configuration[ "n_samples_per_param"] = self.n_samples_per_param journal.configuration["steps"] = steps journal.configuration["epsilon_percentile"] = epsilon_percentile else: journal = Journal.fromFile(journal_file) accepted_parameters = None accepted_weights = None accepted_cov_mats = None # Define epsilon_arr if len(epsilon_init) == steps: epsilon_arr = epsilon_init else: if len(epsilon_init) == 1: epsilon_arr = [None] * steps epsilon_arr[0] = epsilon_init else: raise ValueError( "The length of epsilon_init can only be equal to 1 or steps." ) # main PMCABC algorithm self.logger.info("Starting PMC iterations") for aStep in range(steps): self.logger.debug("iteration {} of PMC algorithm".format(aStep)) if (aStep == 0 and journal_file is not None): accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # 3: calculate covariance self.logger.info("Calculateing covariance matrix") new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) # Since each entry of new_cov_mats is a numpy array, we can multiply like this accepted_cov_mats = [ covFactor * new_cov_mat for new_cov_mat in new_cov_mats ] seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) rng_arr = np.array( [np.random.RandomState(seed) for seed in seed_arr]) rng_pds = self.backend.parallelize(rng_arr) # 0: update remotely required variables # print("INFO: Broadcasting parameters.") self.logger.info("Broadcasting parameters") self.epsilon = epsilon_arr[aStep] self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters, accepted_weights, accepted_cov_mats) # 1: calculate resample parameters # print("INFO: Resampling parameters") self.logger.info("Resampling parameters") params_and_dists_and_counter_pds = self.backend.map( self._resample_parameter, rng_pds) params_and_dists_and_counter = self.backend.collect( params_and_dists_and_counter_pds) new_parameters, distances, counter = [ list(t) for t in zip(*params_and_dists_and_counter) ] new_parameters = np.array(new_parameters) distances = np.array(distances) for count in counter: self.simulation_counter += count # Compute epsilon for next step # print("INFO: Calculating acceptance threshold (epsilon).") self.logger.info("Calculating acceptances threshold") if aStep < steps - 1: if epsilon_arr[aStep + 1] == None: epsilon_arr[aStep + 1] = np.percentile( distances, epsilon_percentile) else: epsilon_arr[aStep + 1] = np.max([ np.percentile(distances, epsilon_percentile), epsilon_arr[aStep + 1] ]) # 2: calculate weights for new parameters self.logger.info("Calculating weights") new_parameters_pds = self.backend.parallelize(new_parameters) self.logger.info("Calculate weights") new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds) new_weights = np.array( self.backend.collect(new_weights_pds)).reshape(-1, 1) sum_of_weights = 0.0 for w in new_weights: sum_of_weights += w new_weights = new_weights / sum_of_weights # The calculation of cov_mats needs the new weights and new parameters self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=new_parameters, accepted_weights=new_weights) # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, # instead of collecting them at each kernel in each step kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # 3: calculate covariance self.logger.info("Calculating covariance matrix") new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) # Since each entry of new_cov_mats is a numpy array, we can multiply like this new_cov_mats = [ covFactor * new_cov_mat for new_cov_mat in new_cov_mats ] # 4: Update the newly computed values accepted_parameters = new_parameters accepted_weights = new_weights accepted_cov_mats = new_cov_mats self.logger.info("Save configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append(self.simulation_counter) print(journal_file_save) if journal_file_save is not None: if full_output == 1: journal.save( journal_file_save + '.jrl') # avoid writing a lot of different files. else: journal.save(journal_file_save + '_' + str(aStep) + '.jrl') # Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon_arr return journal