def setUp(self): if has_torch: self.net = createDefaultNN(2, 3)() self.net_with_scaler = ScalerAndNet(self.net, None) self.net_with_discard_wrapper = DiscardLastOutputNet(self.net) self.stat_calc = NeuralEmbedding(self.net) self.stat_calc_with_scaler = NeuralEmbedding(self.net_with_scaler) self.stat_calc_with_discard_wrapper = NeuralEmbedding( self.net_with_discard_wrapper) # reference input and output torch.random.manual_seed(1) self.tensor = torch.randn(1, 2) self.out = self.net(self.tensor) self.out_discard = self.net_with_discard_wrapper(self.tensor) # try now the statistics rescaling option: mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) sampler = DrawFromPrior([self.model], BackendDummy(), seed=1) reference_parameters, reference_simulations = sampler.sample_par_sim_pairs( 30, 1) reference_simulations = reference_simulations.reshape( reference_simulations.shape[0], reference_simulations.shape[2]) self.stat_calc_rescaling = NeuralEmbedding( self.net, reference_simulations=reference_simulations, previous_statistics=Identity(degree=2)) if not has_torch: self.assertRaises(ImportError, NeuralEmbedding, None)
def setUp(self): # setup backend dummy = BackendDummy() # define a uniform prior distribution mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) # define a stupid uniform model now self.model2 = Uniform([[0], [10]]) self.sampler = DrawFromPrior([self.model], dummy, seed=1) self.original_journal = self.sampler.sample(100) self.generate_from_journal = GenerateFromJournal([self.model], dummy, seed=2) self.generate_from_journal_2 = GenerateFromJournal([self.model2], dummy, seed=2) # expected mean values from bootstrapped samples: self.mu_mean = -0.2050921750330999 self.sigma_mean = 5.178647189918053 # expected mean values from subsampled samples: self.mu_mean_2 = -0.021275259024241676 self.sigma_mean_2 = 5.672004487129107
def estimate_bandwidth_timeseries(model_abc, backend, num_vars, n_theta=100, seed=42, return_values=["median"]): """Estimate the bandwidth for the gaussian kernel in KernelSR. Specifically, it generates n_samples_per_param simulations for each theta, then computes the pairwise distances and takes the median of it. The returned value is the median (by default; you can also compute the mean if preferred) of the latter over all considered values of theta. """ # generate the values of theta from prior theta_vect, simulations_theta_vect = DrawFromPrior([model_abc], backend, seed=seed).sample_par_sim_pairs(n_theta, 1) simulations_theta_vect = simulations_theta_vect.reshape(n_theta, num_vars, -1) # last index is the timestep n_timestep = simulations_theta_vect.shape[2] distances_median = np.zeros(n_timestep) for timestep_index in range(n_timestep): simulations = simulations_theta_vect[:, :, timestep_index] distances = np.linalg.norm( simulations.reshape(1, n_theta, -1) - simulations.reshape(n_theta, 1, -1), axis=-1)[ ~np.eye(n_theta, dtype=bool)].reshape(-1) # take the median over the second index: distances_median[timestep_index] = np.median(distances) return_list = [] if "median" in return_values: return_list.append(np.median(distances_median.flatten())) if "mean" in return_values: return_list.append(np.mean(distances_median.flatten())) return return_list[0] if len(return_list) == 1 else return_list
def setUp(self): self.coeff = np.array([[3, 4], [5, 6]]) self.stat_calc = LinearTransformation(self.coeff, degree=1, cross=False) # try now the statistics rescaling option: mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) sampler = DrawFromPrior([self.model], BackendDummy(), seed=1) reference_parameters, reference_simulations = sampler.sample_par_sim_pairs( 30, 1) reference_simulations = reference_simulations.reshape( reference_simulations.shape[0], reference_simulations.shape[2]) reference_simulations_double = np.concatenate( [reference_simulations, reference_simulations], axis=1) self.stat_calc_rescaling = LinearTransformation( self.coeff, reference_simulations=reference_simulations_double)
class GenerateFromJournalTests(unittest.TestCase): def setUp(self): # setup backend dummy = BackendDummy() # define a uniform prior distribution mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) # define a stupid uniform model now self.model2 = Uniform([[0], [10]]) self.sampler = DrawFromPrior([self.model], dummy, seed=1) self.original_journal = self.sampler.sample(100) self.generate_from_journal = GenerateFromJournal([self.model], dummy, seed=2) self.generate_from_journal_2 = GenerateFromJournal([self.model2], dummy, seed=2) # expected mean values from bootstrapped samples: self.mu_mean = -0.2050921750330999 self.sigma_mean = 5.178647189918053 # expected mean values from subsampled samples: self.mu_mean_2 = -0.021275259024241676 self.sigma_mean_2 = 5.672004487129107 def test_generate(self): # sample single simulation for each par value parameters, simulations, normalized_weights = self.generate_from_journal.generate( journal=self.original_journal) self.assertEqual(parameters.shape, (100, 2)) self.assertEqual(simulations.shape, (100, 1, 1)) self.assertEqual(normalized_weights.shape, (100, )) # sample multiple simulations for each par value parameters, simulations, normalized_weights = self.generate_from_journal.generate( self.original_journal, n_samples_per_param=3, iteration=-1) self.assertEqual(parameters.shape, (100, 2)) self.assertEqual(simulations.shape, (100, 3, 1)) self.assertEqual(normalized_weights.shape, (100, )) def test_errors(self): # check whether using a different model leads to errors: with self.assertRaises(RuntimeError): self.generate_from_journal_2.generate(self.original_journal)
def setUp(self): self.stat_calc = Identity(degree=1, cross=False) self.stat_calc_pipeline = Identity(degree=2, cross=False, previous_statistics=self.stat_calc) # try now the statistics rescaling option: mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model self.model = Normal([mu, sigma]) sampler = DrawFromPrior([self.model], BackendDummy(), seed=1) reference_parameters, reference_simulations = sampler.sample_par_sim_pairs( 30, 1) reference_simulations = reference_simulations.reshape( reference_simulations.shape[0], reference_simulations.shape[2]) reference_simulations_double = np.concatenate( [reference_simulations, reference_simulations], axis=1) self.stat_calc_rescaling = Identity( reference_simulations=reference_simulations_double) self.stat_calc_rescaling_2 = Identity( reference_simulations=reference_simulations)
def test_resample(self): # -- setup -- # setup backend dummy = BackendDummy() # define a uniform prior distribution mu = Uniform([[-5.0], [5.0]], name='mu') sigma = Uniform([[0.0], [10.0]], name='sigma') # define a Gaussian model model = Normal([mu, sigma]) sampler = DrawFromPrior([model], dummy, seed=1) original_journal = sampler.sample(100) # expected mean values from bootstrapped samples: mu_mean = -0.5631214403709973 sigma_mean = 5.2341427118053705 # expected mean values from subsampled samples: mu_mean_2 = -0.6414897172489 sigma_mean_2 = 6.217381777130734 # -- bootstrap -- new_j = original_journal.resample(path_to_save_journal="tmp.jnl", seed=42) mu_sample = np.array(new_j.get_parameters()['mu']) sigma_sample = np.array(new_j.get_parameters()['sigma']) accepted_parameters = new_j.get_accepted_parameters() self.assertEqual(len(accepted_parameters), 100) self.assertEqual(len(accepted_parameters[0]), 2) # test shape of samples mu_shape, sigma_shape = (len(mu_sample), mu_sample[0].shape[1]), \ (len(sigma_sample), sigma_sample[0].shape[1]) self.assertEqual(mu_shape, (100, 1)) self.assertEqual(sigma_shape, (100, 1)) # Compute posterior mean self.assertAlmostEqual(np.average(mu_sample), mu_mean) self.assertAlmostEqual(np.average(sigma_sample), sigma_mean) self.assertTrue(new_j.number_of_simulations[0] == 0) # check whether the dictionary or parameter list contain same data: self.assertEqual(new_j.get_parameters()["mu"][9], new_j.get_accepted_parameters()[9][0]) self.assertEqual(new_j.get_parameters()["sigma"][7], new_j.get_accepted_parameters()[7][1]) # -- subsample (replace=False, smaller number than the full sample) -- new_j_2 = original_journal.resample(replace=False, n_samples=10, seed=42) mu_sample = np.array(new_j_2.get_parameters()['mu']) sigma_sample = np.array(new_j_2.get_parameters()['sigma']) accepted_parameters = new_j_2.get_accepted_parameters() self.assertEqual(len(accepted_parameters), 10) self.assertEqual(len(accepted_parameters[0]), 2) # test shape of samples mu_shape, sigma_shape = (len(mu_sample), mu_sample[0].shape[1]), \ (len(sigma_sample), sigma_sample[0].shape[1]) self.assertEqual(mu_shape, (10, 1)) self.assertEqual(sigma_shape, (10, 1)) # Compute posterior mean self.assertAlmostEqual(np.average(mu_sample), mu_mean_2) self.assertAlmostEqual(np.average(sigma_sample), sigma_mean_2) self.assertTrue(new_j_2.number_of_simulations[0] == 0) # check whether the dictionary or parameter list contain same data: self.assertEqual(new_j_2.get_parameters()["mu"][9], new_j_2.get_accepted_parameters()[9][0]) self.assertEqual(new_j_2.get_parameters()["sigma"][7], new_j_2.get_accepted_parameters()[7][1]) # -- check that resampling the full samples with replace=False gives the exact same posterior mean and std -- new_j_3 = original_journal.resample(replace=False, n_samples=100) mu_sample = np.array(new_j_3.get_parameters()['mu']) sigma_sample = np.array(new_j_3.get_parameters()['sigma']) # original journal mu_sample_original = np.array(original_journal.get_parameters()['mu']) sigma_sample_original = np.array( original_journal.get_parameters()['sigma']) # Compute posterior mean and std self.assertAlmostEqual(np.average(mu_sample), np.average(mu_sample_original)) self.assertAlmostEqual(np.average(sigma_sample), np.average(sigma_sample_original)) self.assertAlmostEqual(np.std(mu_sample), np.std(mu_sample_original)) self.assertAlmostEqual(np.std(sigma_sample), np.std(sigma_sample_original)) # check whether the dictionary or parameter list contain same data: self.assertEqual(new_j_3.get_parameters()["mu"][9], new_j_3.get_accepted_parameters()[9][0]) self.assertEqual(new_j_3.get_parameters()["sigma"][7], new_j_3.get_accepted_parameters()[7][1]) # -- test the error -- with self.assertRaises(RuntimeError): original_journal.resample(replace=False, n_samples=200)
def infer_parameters(steps=2, n_sample=50, n_samples_per_param=1, logging_level=logging.WARN): """Perform inference for this example. Parameters ---------- steps : integer, optional Number of iterations in the sequential PMCABC algorithm ("generations"). The default value is 3 n_samples : integer, optional Number of posterior samples to generate. The default value is 250. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 10. Returns ------- abcpy.output.Journal A journal containing simulation results, metadata and optionally intermediate results. """ logging.basicConfig(level=logging_level) # define backend # Note, the dummy backend does not parallelize the code! from abcpy.backends import BackendDummy as Backend backend = Backend() # define observation for true parameters mean=170, std=15 height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084, 172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017, 183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515, 197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633, 165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537, 206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275, 171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235, 197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519, 185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741, 250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021, 138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626, 188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509] # define prior from abcpy.continuousmodels import Uniform mu = Uniform([[150], [200]], name="mu") sigma = Uniform([[5], [25]], name="sigma") # define the model from abcpy.continuousmodels import Normal height = Normal([mu, sigma], ) # 1) generate simulations from prior from abcpy.inferences import DrawFromPrior draw_from_prior = DrawFromPrior([height], backend=backend) # notice the use of the `.sample_par_sim_pairs` method rather than `.sample` to obtain data suitably formatted # for the summary statistics learning routines parameters, simulations = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1) # if you want to use the test loss to do early stopping in the training: parameters_val, simulations_val = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1) # discard the mid dimension (n_samples_per_param, as the StatisticsLearning classes use that =1) simulations = simulations.reshape(simulations.shape[0], simulations.shape[2]) simulations_val = simulations_val.reshape(simulations_val.shape[0], simulations_val.shape[2]) # 2) now train the NNs with the different methods with the generated data from abcpy.statistics import Identity identity = Identity() # to apply before computing the statistics logging.info("semiNN") from abcpy.statisticslearning import SemiautomaticNN, TripletDistanceLearning semiNN = SemiautomaticNN([height], identity, backend=backend, parameters=parameters, simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val, early_stopping=True, # early stopping seed=1, n_epochs=10, scale_samples=False, use_tqdm=False) logging.info("triplet") triplet = TripletDistanceLearning([height], identity, backend=backend, parameters=parameters, simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val, early_stopping=True, # early stopping seed=1, n_epochs=10, scale_samples=True, use_tqdm=False) # 3) save and re-load NNs: # get the statistics from the already fit StatisticsLearning object 'semiNN': learned_seminn_stat = semiNN.get_statistics() learned_triplet_stat = triplet.get_statistics() # this has a save net method: learned_seminn_stat.save_net("seminn_net.pth") # if you used `scale_samples=True` in learning the NNs, need to provide a path where pickle stores the scaler too: learned_triplet_stat.save_net("triplet_net.pth", path_to_scaler="scaler.pkl") # to reload: need to use the Neural Embedding statistics fromFile; this needs to know which kind of NN it is using; # need therefore to pass either the input/output size (it data size and number parameters) or the network class if # that was specified explicitly in the StatisticsLearning class. Check the docstring for NeuralEmbedding.fromFile # for more details. from abcpy.statistics import NeuralEmbedding learned_seminn_stat_loaded = NeuralEmbedding.fromFile("seminn_net.pth", input_size=1, output_size=2) learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2, path_to_scaler="scaler.pkl") # 4) you can optionally rescale the different summary statistics be their standard deviation on a reference dataset # of simulations. To do this, it is enough to pass at initialization the reference dataset, and the rescaling will # be applied every time the statistics is computed on some simulation or observation. learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2, path_to_scaler="scaler.pkl", reference_simulations=simulations_val) # 5) perform inference # define distance from abcpy.distances import Euclidean distance_calculator = Euclidean(learned_seminn_stat_loaded) # define kernel from abcpy.perturbationkernel import DefaultKernel kernel = DefaultKernel([mu, sigma]) # define sampling scheme from abcpy.inferences import PMCABC sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) eps_arr = np.array([500]) # starting value of epsilon; the smaller, the slower the algorithm. # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous # iteration from the observation epsilon_percentile = 10 journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) return journal
args_dict['ar1_bounds'] = ar1_bounds args_dict['ar2_bounds'] = ar2_bounds ar1 = Uniform([[ar1_bounds[0]], [ar1_bounds[1]]], name='ar1') ar2 = Uniform([[ar2_bounds[0]], [ar2_bounds[1]]], name='ar2') arma_abc_model = ARMAmodel([ar1, ar2], num_AR_params=2, num_MA_params=0, size=arma_size) if not load_train_data: print("Generating data... ({} samples in total)".format( n_samples_training + n_samples_evaluation)) start = time() draw_from_prior = DrawFromPrior([arma_abc_model], backend=backend, seed=seed) theta_vect, samples_matrix = draw_from_prior.sample_par_sim_pairs( n_samples_training, 1) theta_vect_test, samples_matrix_test = draw_from_prior.sample_par_sim_pairs( n_samples_evaluation, 1) samples_matrix = samples_matrix.reshape(samples_matrix.shape[0], samples_matrix.shape[-1]) samples_matrix_test = samples_matrix_test.reshape( samples_matrix_test.shape[0], samples_matrix_test.shape[-1]) print("Data generation took {:.2f} seconds".format(time() - start)) if save_train_data: # save data before scalers are applied. np.save(datasets_folder + "theta_vect.npy", theta_vect) np.save(datasets_folder + "samples_matrix.npy", samples_matrix) np.save(datasets_folder + "theta_vect_test.npy", theta_vect_test)