Exemple #1
0
    def setUp(self):
        if has_torch:
            self.net = createDefaultNN(2, 3)()
            self.net_with_scaler = ScalerAndNet(self.net, None)
            self.net_with_discard_wrapper = DiscardLastOutputNet(self.net)
            self.stat_calc = NeuralEmbedding(self.net)
            self.stat_calc_with_scaler = NeuralEmbedding(self.net_with_scaler)
            self.stat_calc_with_discard_wrapper = NeuralEmbedding(
                self.net_with_discard_wrapper)
            # reference input and output
            torch.random.manual_seed(1)
            self.tensor = torch.randn(1, 2)
            self.out = self.net(self.tensor)
            self.out_discard = self.net_with_discard_wrapper(self.tensor)

            # try now the statistics rescaling option:
            mu = Uniform([[-5.0], [5.0]], name='mu')
            sigma = Uniform([[0.0], [10.0]], name='sigma')
            # define a Gaussian model
            self.model = Normal([mu, sigma])

            sampler = DrawFromPrior([self.model], BackendDummy(), seed=1)
            reference_parameters, reference_simulations = sampler.sample_par_sim_pairs(
                30, 1)
            reference_simulations = reference_simulations.reshape(
                reference_simulations.shape[0], reference_simulations.shape[2])

            self.stat_calc_rescaling = NeuralEmbedding(
                self.net,
                reference_simulations=reference_simulations,
                previous_statistics=Identity(degree=2))

        if not has_torch:
            self.assertRaises(ImportError, NeuralEmbedding, None)
Exemple #2
0
    def setUp(self):
        # setup backend
        dummy = BackendDummy()

        # define a uniform prior distribution
        mu = Uniform([[-5.0], [5.0]], name='mu')
        sigma = Uniform([[0.0], [10.0]], name='sigma')
        # define a Gaussian model
        self.model = Normal([mu, sigma])

        # define a stupid uniform model now
        self.model2 = Uniform([[0], [10]])

        self.sampler = DrawFromPrior([self.model], dummy, seed=1)
        self.original_journal = self.sampler.sample(100)

        self.generate_from_journal = GenerateFromJournal([self.model],
                                                         dummy,
                                                         seed=2)
        self.generate_from_journal_2 = GenerateFromJournal([self.model2],
                                                           dummy,
                                                           seed=2)

        # expected mean values from bootstrapped samples:
        self.mu_mean = -0.2050921750330999
        self.sigma_mean = 5.178647189918053
        # expected mean values from subsampled samples:
        self.mu_mean_2 = -0.021275259024241676
        self.sigma_mean_2 = 5.672004487129107
def estimate_bandwidth_timeseries(model_abc, backend, num_vars, n_theta=100, seed=42, return_values=["median"]):
    """Estimate the bandwidth for the gaussian kernel in KernelSR. Specifically, it generates n_samples_per_param
    simulations for each theta, then computes the pairwise distances and takes the median of it. The returned value
    is the median (by default; you can also compute the mean if preferred) of the latter over all considered values
    of theta.  """

    # generate the values of theta from prior
    theta_vect, simulations_theta_vect = DrawFromPrior([model_abc], backend, seed=seed).sample_par_sim_pairs(n_theta, 1)
    simulations_theta_vect = simulations_theta_vect.reshape(n_theta, num_vars, -1)  # last index is the timestep
    n_timestep = simulations_theta_vect.shape[2]

    distances_median = np.zeros(n_timestep)
    for timestep_index in range(n_timestep):
        simulations = simulations_theta_vect[:, :, timestep_index]
        distances = np.linalg.norm(
            simulations.reshape(1, n_theta, -1) - simulations.reshape(n_theta, 1, -1), axis=-1)[
            ~np.eye(n_theta, dtype=bool)].reshape(-1)
        # take the median over the second index:
        distances_median[timestep_index] = np.median(distances)

    return_list = []
    if "median" in return_values:
        return_list.append(np.median(distances_median.flatten()))
    if "mean" in return_values:
        return_list.append(np.mean(distances_median.flatten()))

    return return_list[0] if len(return_list) == 1 else return_list
Exemple #4
0
    def setUp(self):
        self.coeff = np.array([[3, 4], [5, 6]])
        self.stat_calc = LinearTransformation(self.coeff,
                                              degree=1,
                                              cross=False)

        # try now the statistics rescaling option:
        mu = Uniform([[-5.0], [5.0]], name='mu')
        sigma = Uniform([[0.0], [10.0]], name='sigma')
        # define a Gaussian model
        self.model = Normal([mu, sigma])

        sampler = DrawFromPrior([self.model], BackendDummy(), seed=1)
        reference_parameters, reference_simulations = sampler.sample_par_sim_pairs(
            30, 1)
        reference_simulations = reference_simulations.reshape(
            reference_simulations.shape[0], reference_simulations.shape[2])
        reference_simulations_double = np.concatenate(
            [reference_simulations, reference_simulations], axis=1)

        self.stat_calc_rescaling = LinearTransformation(
            self.coeff, reference_simulations=reference_simulations_double)
Exemple #5
0
class GenerateFromJournalTests(unittest.TestCase):
    def setUp(self):
        # setup backend
        dummy = BackendDummy()

        # define a uniform prior distribution
        mu = Uniform([[-5.0], [5.0]], name='mu')
        sigma = Uniform([[0.0], [10.0]], name='sigma')
        # define a Gaussian model
        self.model = Normal([mu, sigma])

        # define a stupid uniform model now
        self.model2 = Uniform([[0], [10]])

        self.sampler = DrawFromPrior([self.model], dummy, seed=1)
        self.original_journal = self.sampler.sample(100)

        self.generate_from_journal = GenerateFromJournal([self.model],
                                                         dummy,
                                                         seed=2)
        self.generate_from_journal_2 = GenerateFromJournal([self.model2],
                                                           dummy,
                                                           seed=2)

        # expected mean values from bootstrapped samples:
        self.mu_mean = -0.2050921750330999
        self.sigma_mean = 5.178647189918053
        # expected mean values from subsampled samples:
        self.mu_mean_2 = -0.021275259024241676
        self.sigma_mean_2 = 5.672004487129107

    def test_generate(self):
        # sample single simulation for each par value
        parameters, simulations, normalized_weights = self.generate_from_journal.generate(
            journal=self.original_journal)
        self.assertEqual(parameters.shape, (100, 2))
        self.assertEqual(simulations.shape, (100, 1, 1))
        self.assertEqual(normalized_weights.shape, (100, ))

        # sample multiple simulations for each par value
        parameters, simulations, normalized_weights = self.generate_from_journal.generate(
            self.original_journal, n_samples_per_param=3, iteration=-1)
        self.assertEqual(parameters.shape, (100, 2))
        self.assertEqual(simulations.shape, (100, 3, 1))
        self.assertEqual(normalized_weights.shape, (100, ))

    def test_errors(self):
        # check whether using a different model leads to errors:
        with self.assertRaises(RuntimeError):
            self.generate_from_journal_2.generate(self.original_journal)
Exemple #6
0
    def setUp(self):
        self.stat_calc = Identity(degree=1, cross=False)
        self.stat_calc_pipeline = Identity(degree=2,
                                           cross=False,
                                           previous_statistics=self.stat_calc)

        # try now the statistics rescaling option:
        mu = Uniform([[-5.0], [5.0]], name='mu')
        sigma = Uniform([[0.0], [10.0]], name='sigma')
        # define a Gaussian model
        self.model = Normal([mu, sigma])

        sampler = DrawFromPrior([self.model], BackendDummy(), seed=1)
        reference_parameters, reference_simulations = sampler.sample_par_sim_pairs(
            30, 1)
        reference_simulations = reference_simulations.reshape(
            reference_simulations.shape[0], reference_simulations.shape[2])
        reference_simulations_double = np.concatenate(
            [reference_simulations, reference_simulations], axis=1)

        self.stat_calc_rescaling = Identity(
            reference_simulations=reference_simulations_double)
        self.stat_calc_rescaling_2 = Identity(
            reference_simulations=reference_simulations)
Exemple #7
0
    def test_resample(self):
        # -- setup --
        # setup backend
        dummy = BackendDummy()

        # define a uniform prior distribution
        mu = Uniform([[-5.0], [5.0]], name='mu')
        sigma = Uniform([[0.0], [10.0]], name='sigma')
        # define a Gaussian model
        model = Normal([mu, sigma])

        sampler = DrawFromPrior([model], dummy, seed=1)
        original_journal = sampler.sample(100)

        # expected mean values from bootstrapped samples:
        mu_mean = -0.5631214403709973
        sigma_mean = 5.2341427118053705
        # expected mean values from subsampled samples:
        mu_mean_2 = -0.6414897172489
        sigma_mean_2 = 6.217381777130734

        # -- bootstrap --
        new_j = original_journal.resample(path_to_save_journal="tmp.jnl",
                                          seed=42)
        mu_sample = np.array(new_j.get_parameters()['mu'])
        sigma_sample = np.array(new_j.get_parameters()['sigma'])

        accepted_parameters = new_j.get_accepted_parameters()
        self.assertEqual(len(accepted_parameters), 100)
        self.assertEqual(len(accepted_parameters[0]), 2)

        # test shape of samples
        mu_shape, sigma_shape = (len(mu_sample), mu_sample[0].shape[1]), \
                                (len(sigma_sample), sigma_sample[0].shape[1])
        self.assertEqual(mu_shape, (100, 1))
        self.assertEqual(sigma_shape, (100, 1))

        # Compute posterior mean
        self.assertAlmostEqual(np.average(mu_sample), mu_mean)
        self.assertAlmostEqual(np.average(sigma_sample), sigma_mean)

        self.assertTrue(new_j.number_of_simulations[0] == 0)

        # check whether the dictionary or parameter list contain same data:
        self.assertEqual(new_j.get_parameters()["mu"][9],
                         new_j.get_accepted_parameters()[9][0])
        self.assertEqual(new_j.get_parameters()["sigma"][7],
                         new_j.get_accepted_parameters()[7][1])

        # -- subsample (replace=False, smaller number than the full sample) --
        new_j_2 = original_journal.resample(replace=False,
                                            n_samples=10,
                                            seed=42)
        mu_sample = np.array(new_j_2.get_parameters()['mu'])
        sigma_sample = np.array(new_j_2.get_parameters()['sigma'])

        accepted_parameters = new_j_2.get_accepted_parameters()
        self.assertEqual(len(accepted_parameters), 10)
        self.assertEqual(len(accepted_parameters[0]), 2)

        # test shape of samples
        mu_shape, sigma_shape = (len(mu_sample), mu_sample[0].shape[1]), \
                                (len(sigma_sample), sigma_sample[0].shape[1])
        self.assertEqual(mu_shape, (10, 1))
        self.assertEqual(sigma_shape, (10, 1))

        # Compute posterior mean
        self.assertAlmostEqual(np.average(mu_sample), mu_mean_2)
        self.assertAlmostEqual(np.average(sigma_sample), sigma_mean_2)

        self.assertTrue(new_j_2.number_of_simulations[0] == 0)

        # check whether the dictionary or parameter list contain same data:
        self.assertEqual(new_j_2.get_parameters()["mu"][9],
                         new_j_2.get_accepted_parameters()[9][0])
        self.assertEqual(new_j_2.get_parameters()["sigma"][7],
                         new_j_2.get_accepted_parameters()[7][1])

        # -- check that resampling the full samples with replace=False gives the exact same posterior mean and std --
        new_j_3 = original_journal.resample(replace=False, n_samples=100)
        mu_sample = np.array(new_j_3.get_parameters()['mu'])
        sigma_sample = np.array(new_j_3.get_parameters()['sigma'])

        # original journal
        mu_sample_original = np.array(original_journal.get_parameters()['mu'])
        sigma_sample_original = np.array(
            original_journal.get_parameters()['sigma'])

        # Compute posterior mean and std
        self.assertAlmostEqual(np.average(mu_sample),
                               np.average(mu_sample_original))
        self.assertAlmostEqual(np.average(sigma_sample),
                               np.average(sigma_sample_original))
        self.assertAlmostEqual(np.std(mu_sample), np.std(mu_sample_original))
        self.assertAlmostEqual(np.std(sigma_sample),
                               np.std(sigma_sample_original))

        # check whether the dictionary or parameter list contain same data:
        self.assertEqual(new_j_3.get_parameters()["mu"][9],
                         new_j_3.get_accepted_parameters()[9][0])
        self.assertEqual(new_j_3.get_parameters()["sigma"][7],
                         new_j_3.get_accepted_parameters()[7][1])

        # -- test the error --
        with self.assertRaises(RuntimeError):
            original_journal.resample(replace=False, n_samples=200)
def infer_parameters(steps=2, n_sample=50, n_samples_per_param=1, logging_level=logging.WARN):
    """Perform inference for this example.

    Parameters
    ----------
    steps : integer, optional
        Number of iterations in the sequential PMCABC algorithm ("generations"). The default value is 3
    n_samples : integer, optional
        Number of posterior samples to generate. The default value is 250.
    n_samples_per_param : integer, optional
        Number of data points in each simulated data set. The default value is 10.

    Returns
    -------
    abcpy.output.Journal
        A journal containing simulation results, metadata and optionally intermediate results.
    """
    logging.basicConfig(level=logging_level)
    # define backend
    # Note, the dummy backend does not parallelize the code!
    from abcpy.backends import BackendDummy as Backend
    backend = Backend()

    # define observation for true parameters mean=170, std=15
    height_obs = [160.82499176, 167.24266737, 185.71695756, 153.7045709, 163.40568812, 140.70658699, 169.59102084,
                  172.81041696, 187.38782738, 179.66358934, 176.63417241, 189.16082803, 181.98288443, 170.18565017,
                  183.78493886, 166.58387299, 161.9521899, 155.69213073, 156.17867343, 144.51580379, 170.29847515,
                  197.96767899, 153.36646527, 162.22710198, 158.70012047, 178.53470703, 170.77697743, 164.31392633,
                  165.88595994, 177.38083686, 146.67058471763457, 179.41946565658628, 238.02751620619537,
                  206.22458790620766, 220.89530574344568, 221.04082532837026, 142.25301427453394, 261.37656571434275,
                  171.63761180867033, 210.28121820385866, 237.29130237612236, 175.75558340169619, 224.54340549862235,
                  197.42448680731226, 165.88273684581381, 166.55094082844519, 229.54308602661584, 222.99844054358519,
                  185.30223966014586, 152.69149367593846, 206.94372818527413, 256.35498655339154, 165.43140916577741,
                  250.19273595481803, 148.87781549665536, 223.05547559193792, 230.03418198709608, 146.13611923127021,
                  138.24716809523139, 179.26755740864527, 141.21704876815426, 170.89587081800852, 222.96391329259626,
                  188.27229523693822, 202.67075179617672, 211.75963110985992, 217.45423324370509]

    # define prior
    from abcpy.continuousmodels import Uniform
    mu = Uniform([[150], [200]], name="mu")
    sigma = Uniform([[5], [25]], name="sigma")

    # define the model
    from abcpy.continuousmodels import Normal
    height = Normal([mu, sigma], )

    # 1) generate simulations from prior
    from abcpy.inferences import DrawFromPrior
    draw_from_prior = DrawFromPrior([height], backend=backend)

    # notice the use of the `.sample_par_sim_pairs` method rather than `.sample` to obtain data suitably formatted
    # for the summary statistics learning routines
    parameters, simulations = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1)
    # if you want to use the test loss to do early stopping in the training:
    parameters_val, simulations_val = draw_from_prior.sample_par_sim_pairs(100, n_samples_per_param=1)
    # discard the mid dimension (n_samples_per_param, as the StatisticsLearning classes use that =1)
    simulations = simulations.reshape(simulations.shape[0], simulations.shape[2])
    simulations_val = simulations_val.reshape(simulations_val.shape[0], simulations_val.shape[2])

    # 2) now train the NNs with the different methods with the generated data
    from abcpy.statistics import Identity
    identity = Identity()  # to apply before computing the statistics

    logging.info("semiNN")
    from abcpy.statisticslearning import SemiautomaticNN, TripletDistanceLearning
    semiNN = SemiautomaticNN([height], identity, backend=backend, parameters=parameters,
                             simulations=simulations, parameters_val=parameters_val, simulations_val=simulations_val,
                             early_stopping=True,  # early stopping
                             seed=1, n_epochs=10, scale_samples=False, use_tqdm=False)
    logging.info("triplet")
    triplet = TripletDistanceLearning([height], identity, backend=backend, parameters=parameters,
                                      simulations=simulations, parameters_val=parameters_val,
                                      simulations_val=simulations_val,
                                      early_stopping=True,  # early stopping
                                      seed=1, n_epochs=10, scale_samples=True, use_tqdm=False)

    # 3) save and re-load NNs:
    # get the statistics from the already fit StatisticsLearning object 'semiNN':
    learned_seminn_stat = semiNN.get_statistics()
    learned_triplet_stat = triplet.get_statistics()

    # this has a save net method:
    learned_seminn_stat.save_net("seminn_net.pth")
    # if you used `scale_samples=True` in learning the NNs, need to provide a path where pickle stores the scaler too:
    learned_triplet_stat.save_net("triplet_net.pth", path_to_scaler="scaler.pkl")

    # to reload: need to use the Neural Embedding statistics fromFile; this needs to know which kind of NN it is using;
    # need therefore to pass either the input/output size (it data size and number parameters) or the network class if
    # that was specified explicitly in the StatisticsLearning class. Check the docstring for NeuralEmbedding.fromFile
    # for more details.
    from abcpy.statistics import NeuralEmbedding
    learned_seminn_stat_loaded = NeuralEmbedding.fromFile("seminn_net.pth", input_size=1, output_size=2)
    learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2,
                                                           path_to_scaler="scaler.pkl")

    # 4) you can optionally rescale the different summary statistics be their standard deviation on a reference dataset
    # of simulations. To do this, it is enough to pass at initialization the reference dataset, and the rescaling will
    # be applied every time the statistics is computed on some simulation or observation.
    learned_triplet_stat_loaded = NeuralEmbedding.fromFile("triplet_net.pth", input_size=1, output_size=2,
                                                           path_to_scaler="scaler.pkl",
                                                           reference_simulations=simulations_val)

    # 5) perform inference
    # define distance
    from abcpy.distances import Euclidean
    distance_calculator = Euclidean(learned_seminn_stat_loaded)

    # define kernel
    from abcpy.perturbationkernel import DefaultKernel
    kernel = DefaultKernel([mu, sigma])

    # define sampling scheme
    from abcpy.inferences import PMCABC
    sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1)

    eps_arr = np.array([500])  # starting value of epsilon; the smaller, the slower the algorithm.
    # at each iteration, take as epsilon the epsilon_percentile of the distances obtained by simulations at previous
    # iteration from the observation
    epsilon_percentile = 10
    journal = sampler.sample([height_obs], steps, eps_arr, n_sample, n_samples_per_param, epsilon_percentile)

    return journal
Exemple #9
0
    args_dict['ar1_bounds'] = ar1_bounds
    args_dict['ar2_bounds'] = ar2_bounds

    ar1 = Uniform([[ar1_bounds[0]], [ar1_bounds[1]]], name='ar1')
    ar2 = Uniform([[ar2_bounds[0]], [ar2_bounds[1]]], name='ar2')
    arma_abc_model = ARMAmodel([ar1, ar2],
                               num_AR_params=2,
                               num_MA_params=0,
                               size=arma_size)

    if not load_train_data:
        print("Generating data... ({} samples in total)".format(
            n_samples_training + n_samples_evaluation))
        start = time()
        draw_from_prior = DrawFromPrior([arma_abc_model],
                                        backend=backend,
                                        seed=seed)
        theta_vect, samples_matrix = draw_from_prior.sample_par_sim_pairs(
            n_samples_training, 1)
        theta_vect_test, samples_matrix_test = draw_from_prior.sample_par_sim_pairs(
            n_samples_evaluation, 1)
        samples_matrix = samples_matrix.reshape(samples_matrix.shape[0],
                                                samples_matrix.shape[-1])
        samples_matrix_test = samples_matrix_test.reshape(
            samples_matrix_test.shape[0], samples_matrix_test.shape[-1])
        print("Data generation took {:.2f} seconds".format(time() - start))
        if save_train_data:
            # save data before scalers are applied.
            np.save(datasets_folder + "theta_vect.npy", theta_vect)
            np.save(datasets_folder + "samples_matrix.npy", samples_matrix)
            np.save(datasets_folder + "theta_vect_test.npy", theta_vect_test)