def sample(self, journal_file): journal = Journal.fromFile(journal_file) accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) n_samples = journal.configuration["n_samples"] self.accepted_parameters_manager.broadcast(self.backend, 1) # Broadcast Accepted parameters and Accepted weights self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) index_arr = np.arange(0,n_samples,1) data_arr = [] for i in range(len(rng_arr)): data_arr.append([rng_arr[i], index_arr[i]]) data_pds = self.backend.parallelize(data_arr) parameters_simulations_pds = self.backend.map(self._sample_parameter, data_pds) parameters_simulations = self.backend.collect(parameters_simulations_pds) parameters, simulations = [list(t) for t in zip(*parameters_simulations)] parameters = np.squeeze(np.array(parameters)) simulations = np.squeeze(np.array(simulations)) return parameters, simulations
def test_load_and_save(self): params1 = np.zeros((2, 4)) weights1 = np.zeros((2, 4)) journal = Journal(0) # journal.add_parameters(params1) journal.add_weights(weights1) journal.save('journal_tests_testfile.pkl') new_journal = Journal.fromFile('journal_tests_testfile.pkl') # np.testing.assert_equal(journal.parameters, new_journal.parameters) np.testing.assert_equal(journal.weights, new_journal.weights)
def analyse_journal(journal): # output parameters and weights print(journal.parameters) print(journal.weights) # do post analysis print(journal.posterior_mean()) print(journal.posterior_cov()) print(journal.posterior_histogram()) # print configuration print(journal.configuration) # save and load journal journal.save("experiments.jnl") from abcpy.output import Journal new_journal = Journal.fromFile('experiments.jnl')
def analyse_journal(journal): # output parameters and weights print(journal.opt_values) print(journal.get_weights()) # do post analysis print(journal.posterior_mean()) print(journal.posterior_cov()) # print configuration print(journal.configuration) # plot posterior journal.plot_posterior_distr(path_to_save="posterior.png") # save and load journal journal.save("experiments.jnl") from abcpy.output import Journal new_journal = Journal.fromFile('experiments.jnl')
def compute_posterior_mode(whichobs, dim=None, type='multiply', data_type='_obs_'): print(data_type) journal_sabc = Journal.fromFile('Journals/sabc' + data_type + str(whichobs) + '_' + type + '.jrnl') # filenamere = '../APMCABC_Results/apmcabc_obs_' + str(whichobs) + '_re.jrnl' # if os.path.isfile(filenamere): # filename = '../APMCABC_Results/apmcabc_obs_'+str(whichobs)+'_re.jrnl' # else: # filename = '../APMCABC_Results/apmcabc_obs_'+str(whichobs)+'_reweighted.jrnl' # journal_sabc = Journal.fromFile(filename) ### Converting posterior samples to a matrix weights = np.concatenate(journal_sabc.get_weights(-1)) accepted_parameters = journal_sabc.get_accepted_parameters(-1) parameters = [] for ind in range(511): parameters.append( np.array([x[0] for x in accepted_parameters[ind]]).reshape(-1, 1)) parameters = np.array(parameters).squeeze() if dim is not None: parameters = parameters[:, dim] #print(weights.shape, parameters.shape) kernel = stats.gaussian_kde(parameters.transpose(), weights=weights, bw_method=0.45) def rosen(x): return -np.log(kernel(x)) posterior_mean = np.mean(parameters, axis=0) res = minimize(rosen, posterior_mean, method='nelder-mead', options={ 'xatol': 1e-8, 'disp': False }) return (np.array(res.x).reshape(1, -1))
from abcpy.output import Journal import numpy as np import pylab as plt from scipy.stats import gaussian_kde type = 'simulated' # type = 'observed' if type == 'simulated': #filename = 'VolcanojournalAPMCABC_triplet_simulated' filename = 'VolcanojournalAPMCABC_simulated' else: #filename = 'VolcanojournalAPMCABC_triplet-fromfile_Pululagua' filename = 'VolcanojournalAPMCABC_pululagua' journal = Journal.fromFile(filename + '.jrnl') # for ind in range(len(journal.weights)): # journal.weights[ind] = journal.weights[ind]/sum(journal.weights[ind]) # journal.save(filename + '_normalized.jrnl') mean = journal.posterior_mean() print(journal.posterior_mean()) print(journal.posterior_cov()) k = -1 prioru = np.concatenate(journal.get_parameters(0)['U0']) priorl = np.concatenate(journal.get_parameters(0)['L']) postu = np.concatenate(journal.get_parameters(k)['U0']).reshape(-1, ) postl = np.concatenate(journal.get_parameters(k)['L']).reshape(-1, ) weights = np.concatenate(journal.get_weights(k))
# load the actual observation if "fullLorenz95" in model: x_obs = np.load(observation_folder + "x_obs{}.npy".format(obs_index + 1)) else: x_obs = np.load(observation_folder + "timeseriers_obs{}.npy".format(obs_index + 1)) # theta_obs = np.load(observation_folder + "theta_obs{}.npy".format(obs_index + 1)) # reshape the observation: x_obs = x_obs.reshape(num_vars_in_Lorenz, -1) # print(x_obs.shape) # load now the posterior for that observation if inference_technique == "ABC": jrnl_ABC = Journal.fromFile(inference_folder + "jrnl" + namefile_postfix + ".jnl") params, weights = extract_params_and_weights_from_journal(jrnl_ABC) # subsample journal according to weights (bootstrap): # params_ABC_subsampled = subsample_params_according_to_weights(params_ABC, weights_ABC, # size=n_post_samples) else: trace_exchange = np.load(inference_folder + f"exchange_mcmc_trace{obs_index + 1}.npy") # subsample trace: params = subsample_trace(trace_exchange, size=subsample_size_exchange) weights = None # print("Results loaded correctly") # now simulate for all the different param values
# epsilon_percentile = 70 # # if "epsilon_arr" in jrnl.configuration.keys(): # eps = np.percentile(jrnl.distances[-1], epsilon_percentile) # print("using epsilon from last step: ", eps) # # start_journal_path = results_folder + "journal_3.jrl" # jrnl = ABC_inference("PMCABC", model, observation_france, final_distance, eps=eps, n_samples=500, n_steps=20, # backend=backend, full_output=1, journal_file=start_journal_path, # epsilon_percentile=epsilon_percentile, # journal_file_save=results_folder + "journal_4") # # # save the journal # jrnl.save(results_folder + "PMCABC_inf4.jrl") print("Inference 5") jrnl = Journal.fromFile(results_folder + "journal_4.jrl") epsilon_percentile = 70 if "epsilon_arr" in jrnl.configuration.keys(): eps = np.percentile(jrnl.distances[-1], epsilon_percentile) print("using epsilon from last step: ", eps) start_journal_path = results_folder + "journal_4.jrl" jrnl = ABC_inference("PMCABC", model, observation_france, final_distance, eps=eps, n_samples=500, n_steps=20, backend=backend, full_output=1, journal_file=start_journal_path, epsilon_percentile=epsilon_percentile, journal_file_save=results_folder + "journal_5") # save the journal jrnl.save(results_folder + "PMCABC_inf5.jrl")
print('SABC Inferring') # We use resultfakeobs1 as our observed dataset journal_sabc = sampler.sample([resultfakeobs1], steps=steps, epsilon=epsilon, n_samples=n_samples, n_samples_per_param=n_samples_per_param, ar_cutoff=ar_cutoff, full_output=full_output, journal_file=journal_file) print(journal_sabc.posterior_mean()) journal_sabc.save('sabc_earthworm_fakeobs1.jrnl') from abcpy.output import Journal jrnl = Journal.fromFile('sabc_earthworm_fakeobs1.jrnl') print(jrnl.configuration) fig, ax = jrnl.plot_ESS() fig.savefig('ess.pdf') true_param_value = [ 967, 0.25, 3.6, 10.6, 3.6, 3.5, 0.15, 0.011, 0.015, 0.5, 0.25, 0.177, 0.182, 0.004 ] parameters = [ 'B_0', 'activation_energy', 'energy_tissue', 'energy_food', 'energy_synthesis', 'half_saturation_coeff', 'max_ingestion_rate', 'mass_birth', 'mass_cocoon', 'mass_maximum', 'mass_sexual_maturity', 'growth_constant', 'max_reproduction_rate', 'speed' ] jrnl.plot_posterior_distr(path_to_save='posterior_1.pdf', parameters_to_show=parameters[:5],
def sample(self, observations, steps, epsilon_init, n_samples=10000, n_samples_per_param=1, epsilon_percentile=10, covFactor=2, full_output=0, journal_file=None, journal_file_save=None): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : list A list, containing lists describing the observed data sets steps : integer Number of iterations in the sequential algoritm ("generations") epsilon_init : numpy.ndarray An array of proposed values of epsilon to be used at each steps. Can be supplied A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be used as the threshold in evry steps. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. epsilon_percentile : float, optional A value between [0, 100]. The default value is 10. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2 as considered in [1]. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. journal_file: str, optional Filename of a journal file to read an already saved journal file, from which the first iteration will start. The default value is None. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ self.accepted_parameters_manager.broadcast(self.backend, observations) self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param if (journal_file is None): journal = Journal(full_output) journal.configuration["type_model"] = [ type(model).__name__ for model in self.model ] journal.configuration["type_dist_func"] = type( self.distance).__name__ journal.configuration["n_samples"] = self.n_samples journal.configuration[ "n_samples_per_param"] = self.n_samples_per_param journal.configuration["steps"] = steps journal.configuration["epsilon_percentile"] = epsilon_percentile else: journal = Journal.fromFile(journal_file) accepted_parameters = None accepted_weights = None accepted_cov_mats = None # Define epsilon_arr if len(epsilon_init) == steps: epsilon_arr = epsilon_init else: if len(epsilon_init) == 1: epsilon_arr = [None] * steps epsilon_arr[0] = epsilon_init else: raise ValueError( "The length of epsilon_init can only be equal to 1 or steps." ) # main PMCABC algorithm self.logger.info("Starting PMC iterations") for aStep in range(steps): self.logger.debug("iteration {} of PMC algorithm".format(aStep)) if (aStep == 0 and journal_file is not None): accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # 3: calculate covariance self.logger.info("Calculateing covariance matrix") new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) # Since each entry of new_cov_mats is a numpy array, we can multiply like this accepted_cov_mats = [ covFactor * new_cov_mat for new_cov_mat in new_cov_mats ] seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32) rng_arr = np.array( [np.random.RandomState(seed) for seed in seed_arr]) rng_pds = self.backend.parallelize(rng_arr) # 0: update remotely required variables # print("INFO: Broadcasting parameters.") self.logger.info("Broadcasting parameters") self.epsilon = epsilon_arr[aStep] self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters, accepted_weights, accepted_cov_mats) # 1: calculate resample parameters # print("INFO: Resampling parameters") self.logger.info("Resampling parameters") params_and_dists_and_counter_pds = self.backend.map( self._resample_parameter, rng_pds) params_and_dists_and_counter = self.backend.collect( params_and_dists_and_counter_pds) new_parameters, distances, counter = [ list(t) for t in zip(*params_and_dists_and_counter) ] new_parameters = np.array(new_parameters) distances = np.array(distances) for count in counter: self.simulation_counter += count # Compute epsilon for next step # print("INFO: Calculating acceptance threshold (epsilon).") self.logger.info("Calculating acceptances threshold") if aStep < steps - 1: if epsilon_arr[aStep + 1] == None: epsilon_arr[aStep + 1] = np.percentile( distances, epsilon_percentile) else: epsilon_arr[aStep + 1] = np.max([ np.percentile(distances, epsilon_percentile), epsilon_arr[aStep + 1] ]) # 2: calculate weights for new parameters self.logger.info("Calculating weights") new_parameters_pds = self.backend.parallelize(new_parameters) self.logger.info("Calculate weights") new_weights_pds = self.backend.map(self._calculate_weight, new_parameters_pds) new_weights = np.array( self.backend.collect(new_weights_pds)).reshape(-1, 1) sum_of_weights = 0.0 for w in new_weights: sum_of_weights += w new_weights = new_weights / sum_of_weights # The calculation of cov_mats needs the new weights and new parameters self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=new_parameters, accepted_weights=new_weights) # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, # instead of collecting them at each kernel in each step kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # 3: calculate covariance self.logger.info("Calculating covariance matrix") new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) # Since each entry of new_cov_mats is a numpy array, we can multiply like this new_cov_mats = [ covFactor * new_cov_mat for new_cov_mat in new_cov_mats ] # 4: Update the newly computed values accepted_parameters = new_parameters accepted_weights = new_weights accepted_cov_mats = new_cov_mats self.logger.info("Save configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append(self.simulation_counter) print(journal_file_save) if journal_file_save is not None: if full_output == 1: journal.save( journal_file_save + '.jrl') # avoid writing a lot of different files. else: journal.save(journal_file_save + '_' + str(aStep) + '.jrl') # Add epsilon_arr to the journal journal.configuration["epsilon_arr"] = epsilon_arr return journal
def sample(self, observations, steps, epsilon, n_samples=10000, n_samples_per_param=1, beta=2, delta=0.2, v=0.3, ar_cutoff=0.1, resample=None, n_update=None, full_output=0, journal_file=None): """Samples from the posterior distribution of the model parameter given the observed data observations. Parameters ---------- observations : list A list, containing lists describing the observed data sets steps : integer Number of maximum iterations in the sequential algoritm ("generations") epsilon : numpy.float A proposed value of threshold to start with. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. beta : numpy.float Tuning parameter of SABC, default value is 2. delta : numpy.float Tuning parameter of SABC, default value is 0.2. v : numpy.float, optional Tuning parameter of SABC, The default value is 0.3. ar_cutoff : numpy.float Acceptance ratio cutoff, The default value is 0.1. resample: int, optional Resample after this many acceptance, The default value is None which takes value inside n_samples n_update: int, optional Number of perturbed parameters at each step, The default value is None which takes value inside n_samples full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. journal_file: str, optional Filename of a journal file to read an already saved journal file, from which the first iteration will start. The default value is None. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ global broken_preemptively self.sample_from_prior(rng=self.rng) self.accepted_parameters_manager.broadcast(self.backend, observations) self.epsilon = epsilon self.n_samples = n_samples self.n_samples_per_param = n_samples_per_param if (journal_file is None): journal = Journal(full_output) journal.configuration["type_model"] = [ type(model).__name__ for model in self.model ] journal.configuration["type_dist_func"] = type( self.distance).__name__ journal.configuration["type_kernel_func"] = type(self.kernel) journal.configuration["n_samples"] = self.n_samples journal.configuration[ "n_samples_per_param"] = self.n_samples_per_param journal.configuration["beta"] = beta journal.configuration["delta"] = delta journal.configuration["v"] = v journal.configuration["ar_cutoff"] = ar_cutoff journal.configuration["resample"] = resample journal.configuration["n_update"] = n_update journal.configuration["full_output"] = full_output else: journal = Journal.fromFile(journal_file) accepted_parameters = None distances = np.zeros(shape=(n_samples, )) smooth_distances = np.zeros(shape=(n_samples, )) accepted_weights = np.ones(shape=(n_samples, 1)) all_distances = None accepted_cov_mat = None if resample == None: resample = n_samples if n_update == None: n_update = n_samples sample_array = np.ones(shape=(steps, )) sample_array[0] = n_samples sample_array[1:] = n_update ## Acceptance counter to determine the resampling step accept = 0 samples_until = 0 ## Counter whether broken preemptively broken_preemptively = False for aStep in range(0, steps): self.logger.debug("step {}".format(aStep)) if (aStep == 0 and journal_file is not None): accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) #Broadcast Accepted parameters and Accedpted weights self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) #Broadcast Accepted Kernel parameters self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: if not (new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: accepted_cov_mats.append( (beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1, 1)) # Broadcast Accepted Covariance Matrix self.accepted_parameters_manager.update_broadcast( self.backend, accepted_cov_mats=accepted_cov_mats) # main SABC algorithm # print("INFO: Initialization of SABC") seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=int(sample_array[aStep]), dtype=np.uint32) rng_arr = np.array( [np.random.RandomState(seed) for seed in seed_arr]) index_arr = self.rng.randint(0, self.n_samples, size=int(sample_array[aStep]), dtype=np.uint32) data_arr = [] for i in range(len(rng_arr)): data_arr.append([rng_arr[i], index_arr[i]]) data_pds = self.backend.parallelize(data_arr) # 0: update remotely required variables self.logger.info("Broadcasting parameters") self.epsilon = epsilon # 1: Calculate parameters self.logger.info("Initial accepted parameters") params_and_dists_pds = self.backend.map(self._accept_parameter, data_pds) params_and_dists = self.backend.collect(params_and_dists_pds) new_parameters, filenames, index, acceptance, counter = [ list(t) for t in zip(*params_and_dists) ] # Keeping counter of number of simulations for count in counter: self.simulation_counter += count #new_parameters = np.array(new_parameters) index = np.array(index) acceptance = np.array(acceptance) # Reading all_distances at Initial step if aStep == 0: index = np.linspace(0, n_samples - 1, n_samples).astype(int).reshape( n_samples, ) accept = 0 # Initialize/Update the accepted parameters and their corresponding distances if accepted_parameters is None: accepted_parameters = new_parameters else: for ind in range(len(acceptance)): if acceptance[ind] == 1: accepted_parameters[index[ind]] = new_parameters[ind] # 1.5: Update the distance and recompute distances from observed data self.logger.info("Updating distance") distances = self.distance.distances[0].update( filenames, self.accepted_parameters_manager.observations_bds.value(), self.backend) self._update_broadcasts(distances=distances) # 2: Compute epsilon U = self._average_redefined_distance(distances, epsilon * (1 - delta)) epsilon = self._schedule(U, v) #U = np.mean(distances) #epsilon = np.percentile(distances, .1 * 100) print(epsilon) # 4: Show progress and if acceptance rate smaller than a value break the iteration if aStep > 0: accept = accept + np.sum(acceptance) samples_until = samples_until + sample_array[aStep] acceptance_rate = accept / samples_until msg = ( "updates= {:.2f}, epsilon= {}, u.mean={:e}, acceptance rate: {:.2f}" .format( np.sum(sample_array[1:aStep + 1]) / np.sum(sample_array[1:]) * 100, epsilon, U, acceptance_rate)) self.logger.debug(msg) if acceptance_rate < ar_cutoff: broken_preemptively = True self.logger.debug( "Stopping as acceptance rate is lower than cutoff") break # 5: Resampling if number of accepted particles greater than resample if accept >= resample and U > 1e-100: self.logger.info("Weighted resampling") weight = np.exp(-distances * delta / U) weight = weight / sum(weight) index_resampled = self.rng.choice(np.arange(n_samples, dtype=int), n_samples, replace=1, p=weight) accepted_parameters = [ accepted_parameters[i] for i in index_resampled ] distances = distances[index_resampled] ## Update U and epsilon: # # epsilon = epsilon * (1 - delta) # U = np.mean(distances) # # epsilon = self._schedule(U, v) # epsilon = np.percentile(distances, .1 * 100) U = self._average_redefined_distance(distances, epsilon * (1 - delta)) epsilon = self._schedule(U, v) ## Print effective sampling size print('Resampling: Effective sampling size: ', 1 / sum(pow(weight / sum(weight), 2))) accept = 0 samples_until = 0 ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix # Broadcast Accepted parameters and add to journal self.accepted_parameters_manager.update_broadcast( self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) # Compute Accepetd Kernel parameters and broadcast them kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: if not (new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: accepted_cov_mats.append( (beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1, 1)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_cov_mats=accepted_cov_mats) if (full_output == 1 and aStep <= steps - 1): ## Saving intermediate configuration to output journal. print('Saving after resampling') journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append( self.simulation_counter) else: ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix # Broadcast Accepted parameters self.accepted_parameters_manager.update_broadcast( self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) # Compute Accepetd Kernel parameters and broadcast them kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager. get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values( self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov( self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: if not (new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: accepted_cov_mats.append( (beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1, 1)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_cov_mats=accepted_cov_mats) if (full_output == 1 and aStep <= steps - 1): ## Saving intermediate configuration to output journal. journal.add_accepted_parameters( copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append( self.simulation_counter) # Add epsilon_arr, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if (full_output == 0) or (full_output == 1 and broken_preemptively and aStep <= steps - 1): journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) self.accepted_parameters_manager.update_broadcast( self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append(self.simulation_counter) journal.configuration["steps"] = aStep + 1 journal.configuration["epsilon"] = epsilon return journal
import pylab as plt from sklearn.cluster import AgglomerativeClustering from sklearn.metrics.cluster import adjusted_rand_score from abcpy.output import Journal import numpy as np import os.path from scipy.stats import gaussian_kde names = ['pAD', 'pAg', 'pT', 'pF', 'aT', 'v_z_AP', 'v_z_NAP'] margmax, meanpost, ylabel = [], [], [] for ind in range(40): whichobs = ind filename = 'apmcabc_obs_' + str(whichobs) + '.jrnl' if os.path.isfile(filename): journal = Journal.fromFile(filename) weights = np.concatenate(journal.get_weights()) post1 = np.concatenate(journal.get_parameters()['pAD']) post2 = np.concatenate(journal.get_parameters()['pAg']) post3 = np.concatenate(journal.get_parameters()['pT']) post4 = np.concatenate(journal.get_parameters()['pF']) post5 = np.concatenate(journal.get_parameters()['aT']) post6 = np.concatenate(journal.get_parameters()['v_z_AP']) post7 = np.concatenate(journal.get_parameters()['v_z_NAP']) data = np.hstack((post1, post2, post3, post4, post5, post6, post7)) meanpost.append( np.array([ np.mean(np.concatenate(journal.get_parameters()[x])) for x in names ])) #meanpost.append(np.array([journal.posterior_mean()[x] for x in names]))
# parameters, simulations = dp.sample(file=dircertory + "/"+'rejection_one_pcent.csv') # np.savez(dircertory + "/"+'rejection_predict.npz', parameters=parameters, simulations=simulations) from model import DrawFromPosterior dp = DrawFromPosterior([Bass], backend) print('Prediction') parameters, simulations = dp.sample(journal_file=dircertory + "/" + algorithm + '_' + problem + '_obs.jrnl') np.savez(dircertory + "/" + algorithm + '_' + 'predict.npz', parameters=parameters, simulations=simulations) if plot: from abcpy.output import Journal journal_sabc = Journal.fromFile(dircertory + "/" + algorithm + "_" + problem + '_obs.jrnl') ### Converting posterior samples to a matrix accepted_parameters = journal_sabc.get_accepted_parameters(-1) parameters = [] for ind in range(111): parameters.append([x[0] for x in accepted_parameters[ind]]) parameters = np.array(parameters) parameters[:, -1] = parameters[:, -1] * 1e+13 np.savetxt("Figures/sabc_parameters.csv", np.array(parameters), delimiter=",") print(journal_sabc.configuration) print(journal_sabc.posterior_mean()) print(journal_sabc.posterior_cov())
def main(epsilon, sigma, filename_prefix, perform_standard_optimal_control=False, perform_iterative_strategy=True, use_sample_with_higher_weight=False, use_posterior_median=False, n_post_samples=None, shift_each_iteration=1, n_shifts=10, window_size=30, only_plot=False, plot_file=None, plot_days=None, loss="deaths_Isc", results_folder=None, journal_file_name=None, training_window_length=None, use_mpi=False, restart_at_index=None): """epsilon is an array with size 3, with order school, work, other If use_sample_with_higher_weight is True: we do the procedure with that only, no posterior expectation use_posterior_median: do the optimal control with the marginal posterior median. n_post_samples: for the posterior expectation. Ignored if use_sample_with_higher_weight or use_posterior_median is True, shift_each_iteration and n_shifts are for the iterative strategy. """ if use_mpi: print("Using MPI") backend = BackendMPI() else: backend = BackendDummy() print("Epsilon: ", epsilon) logging.basicConfig(level=logging.INFO) ############################ Load relevant data ################################################# if results_folder is None: results_folder = "results/SEI4RD_france_infer_1Mar_31Aug/" data_folder = "data/france_inference_data_1Mar_to_31Aug/" alpha_home = 1 # set this to 1 mobility_work = np.load(data_folder + "mobility_work.npy") mobility_other = np.load(data_folder + "mobility_other.npy") mobility_school = np.load(data_folder + "mobility_school.npy") france_pop = np.load(data_folder + "france_pop.npy", allow_pickle=True) contact_matrix_home = np.load(data_folder + "contact_matrix_home.npy") contact_matrix_work = np.load(data_folder + "contact_matrix_work.npy") contact_matrix_school = np.load(data_folder + "contact_matrix_school.npy") contact_matrix_other = np.load(data_folder + "contact_matrix_other.npy") if journal_file_name is None: jrnl = Journal.fromFile(results_folder + "PMCABC_inf3.jrl") else: jrnl = Journal.fromFile(results_folder + journal_file_name) #################################### Define Model ################################################# # parameters n = 5 # number of age groups dt = 0.1 # integration timestep if training_window_length is not None: T = training_window_length else: T = mobility_school.shape[0] - 1 # horizon time in days total_population = france_pop # population for each age group # 16th March: Boris Johnson asked old people to isolate; we then learn a new alpha from the 18th March: lockdown_day = 17 # alpha_home = np.repeat(alpha_home, np.int(1 / dt), axis=0) mobility_work = np.repeat(mobility_work[0:T + 1], np.int(1 / dt), axis=0) mobility_other = np.repeat(mobility_other[0:T + 1], np.int(1 / dt), axis=0) mobility_school = np.repeat(mobility_school[0:T + 1], np.int(1 / dt), axis=0) # daily_tests = np.repeat(daily_tests, np.int(1 / dt), axis=0) # ABC model (priors need to be fixed better): beta = Uniform( [[0], [0.5]], name='beta') # controls how fast the epidemics grows. Related to R_0 d_L = Uniform([[1], [16]], name='d_L') # average duration of incubation d_C = Uniform([[1], [16]], name='d_C') # average time before going to clinical d_R = Uniform([[1], [16]], name='d_R') # average recovery time d_RC = Uniform([[1], [16]], name='d_RC') # average recovery time d_D = Uniform( [[1], [16]], name='d_D' ) # average duration of infected clinical state (resulting in death) p01 = Uniform([[0], [1]], name="p01") p02 = Uniform([[0], [1]], name="p02") p03 = Uniform([[0], [1]], name="p03") p04 = Uniform([[0], [1]], name="p04") p05 = Uniform([[0], [1]], name="p05") p11 = Uniform([[0], [1]], name="p11") p12 = Uniform([[0], [1]], name="p12") p13 = Uniform([[0], [1]], name="p13") p14 = Uniform([[0], [1]], name="p14") p15 = Uniform([[0], [1]], name="p15") initial_exposed = Uniform([[0], [500]], name="initial_exposed") alpha_123 = Uniform([[0.3], [1]], name="alpha_123") alpha_4 = Uniform([[0], [1]], name="alpha_4") alpha_5 = Uniform([[0], [1]], name="alpha_5") model = SEI4RD([ beta, d_L, d_C, d_R, d_RC, d_D, p01, p02, p03, p04, p05, p11, p12, p13, p14, p15, initial_exposed, alpha_123, alpha_4, alpha_5 ], tot_population=total_population, T=T, contact_matrix_school=contact_matrix_school, contact_matrix_work=contact_matrix_work, contact_matrix_home=contact_matrix_home, contact_matrix_other=contact_matrix_other, alpha_school=mobility_school, alpha_work=mobility_work, alpha_home=alpha_home, alpha_other=mobility_other, modify_alpha_home=False, dt=dt, return_once_a_day=True, learn_alphas_old=True, lockdown_day=lockdown_day) # guess for a phi function NHS_max = 10000 def phi_func_sc(x): # this is an hard max function. return np.maximum(0, x - NHS_max) def phi_func_death(x): # this is an hard max function. return np.maximum(0, x) # def phi_func(x): # return np.pow(np.maximum(0, x - NHS_max), 2) # def phi_func(x, beta=.1): # this is the softplus, a smooth version of hard max # threshold = 30 # shape = x.shape # x = x.reshape(-1) # new_x = x - NHS_max # indices = new_x * beta < threshold # phi_x = copy.deepcopy(new_x) # is deepcopy actually needed? # phi_x[indices] = np.log( # 1 + np.exp(new_x[indices] * beta)) / beta # approximate for numerical stability in other places # return phi_x.reshape(shape) # extract posterior sample points and bootstrap them: seed = 1 np.random.seed(seed) iteration = -1 weights = jrnl.get_weights(iteration) / np.sum(jrnl.get_weights(iteration)) params = jrnl.get_parameters(iteration) if not use_posterior_median: if use_sample_with_higher_weight: post_samples = np.where(weights == weights.max())[0] else: # bootstrap if n_post_samples is None: n_post_samples = len(weights) post_samples = np.random.choice(range(len(weights)), p=weights.reshape(-1), size=n_post_samples) beta_values = np.array([params['beta'][i][0] for i in post_samples]) kappa_values = np.array( [1 / params['d_L'][i][0] for i in post_samples]) gamma_c_values = np.array( [1 / params['d_C'][i][0] for i in post_samples]) gamma_r_values = np.array( [1 / params['d_R'][i][0] for i in post_samples]) gamma_rc_values = np.array( [1 / params['d_RC'][i][0] for i in post_samples]) nu_values = np.array([1 / params['d_D'][i][0] for i in post_samples]) rho_values = np.array([ np.array([ params[key][i][0] for key in ['p01', 'p02', 'p03', 'p04', 'p05'] ]).reshape(-1) for i in post_samples ]) rho_prime_values = np.array([ np.array([ params[key][i][0] for key in ['p11', 'p12', 'p13', 'p14', 'p15'] ]).reshape(-1) for i in post_samples ]) alpha_123_values = np.array( [params["alpha_123"][i][0] for i in post_samples]) alpha_4_values = np.array( [params["alpha_4"][i][0] for i in post_samples]) alpha_5_values = np.array( [params["alpha_5"][i][0] for i in post_samples]) initial_exposed_values = np.array( [params["initial_exposed"][i][0] for i in post_samples]) else: params_array = np.array( [[params[key][i] for i in range(len(params[key]))] for key in params.keys()]).squeeze() marginal_medians = { key: weighted_quantile( np.array(params[key]).reshape(-1), [0.5], weights.squeeze()) for i in range(params_array.shape[0]) for key in params.keys() } beta_values = np.array([marginal_medians['beta'][0]]) kappa_values = np.array([1 / marginal_medians['d_L'][0]]) gamma_c_values = np.array([1 / marginal_medians['d_C'][0]]) gamma_r_values = np.array([1 / marginal_medians['d_R'][0]]) gamma_rc_values = np.array([1 / marginal_medians['d_RC'][0]]) nu_values = np.array([1 / marginal_medians['d_D'][0]]) rho_values = np.array([ np.array([ marginal_medians[key][0] for key in ['p01', 'p02', 'p03', 'p04', 'p05'] ]).reshape(-1) ]) rho_prime_values = np.array([ np.array([ marginal_medians[key][0] for key in ['p11', 'p12', 'p13', 'p14', 'p15'] ]).reshape(-1) ]) alpha_123_values = np.array([marginal_medians["alpha_123"][0]]) alpha_4_values = np.array([marginal_medians["alpha_4"][0]]) alpha_5_values = np.array([marginal_medians["alpha_5"][0]]) initial_exposed_values = np.array( [marginal_medians["initial_exposed"][0]]) # instantiate the posterior cost class: posterior_cost = PosteriorCost(model, phi_func_sc=phi_func_sc, phi_func_death=phi_func_death, beta_vals=beta_values, kappa_vals=kappa_values, gamma_c_vals=gamma_c_values, gamma_r_vals=gamma_r_values, gamma_rc_vals=gamma_rc_values, nu_vals=nu_values, rho_vals=rho_values, rho_prime_vals=rho_prime_values, alpha_123_vals=alpha_123_values, alpha_4_vals=alpha_4_values, alpha_5_vals=alpha_5_values, initial_exposed_vals=initial_exposed_values, loss=loss) if plot_days is None: n_days = 120 else: n_days = plot_days end_training_mobility_values = [ mobility_school[-1], mobility_work[-1], mobility_other[-1] ] # alpha initial is taken assuming values will be kept constant as it was on the last day observed mobility_initial = copy.deepcopy( np.stack((mobility_school[-1] * np.ones(shape=(n_days, )), mobility_work[-1] * np.ones(shape=(n_days, )), mobility_other[-1] * np.ones(shape=(n_days, ))))).flatten() # Only plot using a mobility file if only_plot: mobility = np.load(results_folder + plot_file)[:, 0:n_days] fig, ax = posterior_cost.produce_plot(mobility, n_days) plt.savefig(results_folder + filename_prefix + ".pdf") plt.close(fig) return # try cost computation: t = time.time() cost_initial = posterior_cost.compute_cost(mobility_initial, n_days, sigma, epsilon, backend) # fig, ax = posterior_cost.produce_plot(mobility_initial, n_days) # plt.savefig(results_folder + filename_prefix + "evolution_under_final_training_lockdown_conditions.pdf") # plt.close(fig) cost_no_lockdown = posterior_cost.compute_cost( np.ones_like(mobility_initial), n_days, sigma, epsilon, backend) # fig, ax = posterior_cost.produce_plot(np.ones_like(mobility_initial), n_days) # plt.savefig(results_folder + filename_prefix + "evolution_under_no_lockdown.pdf") # plt.close(fig) print("Initial cost: {:.2f}, no-lockdown cost: {:.2f}".format( cost_initial, cost_no_lockdown)) print(time.time() - t) # OPTIMAL CONTROL WITH NO MOVING WINDOW APPROACH if perform_standard_optimal_control: # bounds = different_bounds('startconstrained') bounds = different_bounds('realistic', n_days, mobility_initial, end_training_mobility_values) results_da = optimize.dual_annealing(posterior_cost.compute_cost, bounds=bounds, args=(n_days, sigma, epsilon, backend), maxiter=10, maxfun=1e3, x0=mobility_initial) # Plotting the figures mobility_initial = mobility_initial.reshape( 3, n_days) # 3 instead of 4 as we are not using alpha_home mobility_final = results_da.x.reshape(3, n_days) cost_final = posterior_cost.compute_cost(mobility_final, n_days, sigma, epsilon, backend) np.save(results_folder + filename_prefix + "mobility_standard", mobility_final) # MOVING WINDOW APPROACH if perform_iterative_strategy: print("Iterative strategy") # window_size = 30 # in days mobility_initial = copy.deepcopy( np.stack((mobility_school[-1] * np.ones(shape=(window_size, )), mobility_work[-1] * np.ones(shape=(window_size, )), mobility_other[-1] * np.ones(shape=(window_size, ))))).flatten() # shift_each_iteration = 10 # number of days by which to shift the sliding window at each iteration. # n_shifts = 10 total_days = n_shifts * shift_each_iteration print(total_days) total_mobility = np.zeros((3, total_days)) if restart_at_index is not None: total_mobility = np.load(results_folder + filename_prefix + "mobility_iterative_" + str(restart_at_index) + ".npy") bounds = different_bounds( 'realistic', n_days=window_size, alpha_initial=mobility_initial, end_training_alpha_values=end_training_mobility_values) for shift_idx in range(n_shifts): print('Running shift: ' + str(shift_idx)) if restart_at_index is not None and shift_idx <= restart_at_index: # we exploit the same loop in order to restart, so that the evolution of the model will be the same. mobility_final = np.zeros((3, window_size)) mobility_final[:, 0:shift_each_iteration] = \ total_mobility[:, shift_idx * shift_each_iteration:(shift_idx + 1) * shift_each_iteration] # keep that constant for the future; this is only used to initialize the next optimal control iteration: mobility_final[:, shift_each_iteration:] = mobility_final[:, shift_each_iteration - 1].reshape( 3, 1) else: # do the optimal control stuff results_da = optimize.dual_annealing( posterior_cost.compute_cost, bounds=bounds, args=(window_size, sigma, epsilon, backend), maxiter=10, maxfun=1e3, x0=mobility_initial) # get the result of the optimization in that time window mobility_final = results_da.x.reshape(3, window_size) # save it to the total_mobility array: total_mobility[:, shift_idx * shift_each_iteration:(shift_idx + 1) * shift_each_iteration] = \ mobility_final[:, 0:shift_each_iteration] # Save in between mobility steps np.save( results_folder + filename_prefix + "mobility_iterative_" + str(shift_idx), total_mobility) # update now the state of the model: posterior_cost.update_states( shift_each_iteration, mobility_final[:, :shift_each_iteration]) # update mobility_initial as well, with the translated values of mobility_final, it may speed up convergence. mobility_initial_tmp = np.zeros_like(mobility_final) mobility_initial_tmp[:, 0:window_size - shift_each_iteration] = mobility_final[:, shift_each_iteration: window_size] mobility_initial_tmp[:, window_size - shift_each_iteration:] = np.stack([ mobility_final[:, window_size - shift_each_iteration - 1] ] * shift_each_iteration, axis=1) mobility_initial = mobility_initial_tmp.flatten() np.save(results_folder + filename_prefix + "mobility_iterative", total_mobility)
import numpy as np from abcpy.output import Journal import matplotlib.pyplot as plt from scipy.stats import gaussian_kde # Reading your journal file claudio_journal = Journal.fromFile('apmcabc_fakeobs1.jrnl') # Read what were the parameters used for this specific inference scheme which produced this result # each key : value of the key print('Lets find out the configuration of the inference: ' + str(claudio_journal.configuration) + '\n') # It shows you have run APMCABC for 4 steps with 10000 n_samples and etc. # Most important is the epsilon_arr - This tells me the 'threshold vaklue' used at each of the 4 steps, this is automatically chosen. # In this case, your final epsilon value is 0.1696, which is rather large. # Our goal is to take this value as close to zeor possible # TIPS: You have to choose big steps more than 4 and see how many do you need to lets say bring them down to 0.01? # One thing you can do: start the inference APMCABC using the final samples stored in your previous journal file, which is # in this case 'apmcabc_fakeobs1.jrnl'. To that in the inputs of APMCABC sample put journal_file = 'apmcabc_fakeobs1.jrnl' # The posterior distribution is approximated by the 10000 (as n_samples=10000) samples drawn from the approximate posterior distribution # First we read the posterior samples in samples_dictionary which is a python dictionary object # (Here the samples we consider are stored at the end meaning after step 4) samples_dictionary = claudio_journal.get_parameters() # We first check what were parameters you were infering, this is stored as the keys print('The parameters for which inference was done: ' + str(samples_dictionary.keys()) + '\n') # We also get the weights corresponding to each sample weights = np.array(claudio_journal.get_weights()).reshape(10000, ) # Normalize weights weights = weights / sum(weights)