Beispiel #1
0
    def test_load_and_save(self):
        params1 = np.zeros((2, 4))
        weights1 = np.zeros((2, 4))

        journal = Journal(0)
        # journal.add_parameters(params1)
        journal.add_weights(weights1)
        journal.save('journal_tests_testfile.pkl')

        new_journal = Journal.fromFile('journal_tests_testfile.pkl')
        # np.testing.assert_equal(journal.parameters, new_journal.parameters)
        np.testing.assert_equal(journal.weights, new_journal.weights)
Beispiel #2
0
    def sample(self,
               observations,
               steps,
               epsilon_init,
               n_samples=10000,
               n_samples_per_param=1,
               epsilon_percentile=10,
               covFactor=2,
               full_output=0,
               journal_file=None,
               journal_file_save=None):
        """Samples from the posterior distribution of the model parameter given the observed
        data observations.

        Parameters
        ----------
        observations : list
            A list, containing lists describing the observed data sets
        steps : integer
            Number of iterations in the sequential algoritm ("generations")
        epsilon_init : numpy.ndarray
            An array of proposed values of epsilon to be used at each steps. Can be supplied
            A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be
            used as the threshold in evry steps.
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional
            Number of data points in each simulated data set. The default value is 1.
        epsilon_percentile : float, optional
            A value between [0, 100]. The default value is 10.
        covFactor : float, optional
            scaling parameter of the covariance matrix. The default value is 2 as considered in [1].
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal.
            The default value is 0, meaning the intermediate results are not saved.
        journal_file: str, optional
            Filename of a journal file to read an already saved journal file, from which the first iteration will start.
            The default value is None.

        Returns
        -------
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        self.accepted_parameters_manager.broadcast(self.backend, observations)
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param

        if (journal_file is None):
            journal = Journal(full_output)
            journal.configuration["type_model"] = [
                type(model).__name__ for model in self.model
            ]
            journal.configuration["type_dist_func"] = type(
                self.distance).__name__
            journal.configuration["n_samples"] = self.n_samples
            journal.configuration[
                "n_samples_per_param"] = self.n_samples_per_param
            journal.configuration["steps"] = steps
            journal.configuration["epsilon_percentile"] = epsilon_percentile
        else:
            journal = Journal.fromFile(journal_file)

        accepted_parameters = None
        accepted_weights = None
        accepted_cov_mats = None

        # Define epsilon_arr
        if len(epsilon_init) == steps:
            epsilon_arr = epsilon_init
        else:
            if len(epsilon_init) == 1:
                epsilon_arr = [None] * steps
                epsilon_arr[0] = epsilon_init
            else:
                raise ValueError(
                    "The length of epsilon_init can only be equal to 1 or steps."
                )

        # main PMCABC algorithm
        self.logger.info("Starting PMC iterations")
        for aStep in range(steps):
            self.logger.debug("iteration {} of PMC algorithm".format(aStep))
            if (aStep == 0 and journal_file is not None):
                accepted_parameters = journal.get_accepted_parameters(-1)
                accepted_weights = journal.get_weights(-1)

                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_parameters=accepted_parameters,
                    accepted_weights=accepted_weights)

                kernel_parameters = []
                for kernel in self.kernel.kernels:
                    kernel_parameters.append(
                        self.accepted_parameters_manager.
                        get_accepted_parameters_bds_values(kernel.models))
                self.accepted_parameters_manager.update_kernel_values(
                    self.backend, kernel_parameters=kernel_parameters)

                # 3: calculate covariance
                self.logger.info("Calculateing covariance matrix")
                new_cov_mats = self.kernel.calculate_cov(
                    self.accepted_parameters_manager)
                # Since each entry of new_cov_mats is a numpy array, we can multiply like this
                accepted_cov_mats = [
                    covFactor * new_cov_mat for new_cov_mat in new_cov_mats
                ]

            seed_arr = self.rng.randint(0,
                                        np.iinfo(np.uint32).max,
                                        size=n_samples,
                                        dtype=np.uint32)
            rng_arr = np.array(
                [np.random.RandomState(seed) for seed in seed_arr])
            rng_pds = self.backend.parallelize(rng_arr)

            # 0: update remotely required variables
            # print("INFO: Broadcasting parameters.")
            self.logger.info("Broadcasting parameters")
            self.epsilon = epsilon_arr[aStep]
            self.accepted_parameters_manager.update_broadcast(
                self.backend, accepted_parameters, accepted_weights,
                accepted_cov_mats)

            # 1: calculate resample parameters
            # print("INFO: Resampling parameters")
            self.logger.info("Resampling parameters")

            params_and_dists_and_counter_pds = self.backend.map(
                self._resample_parameter, rng_pds)
            params_and_dists_and_counter = self.backend.collect(
                params_and_dists_and_counter_pds)
            new_parameters, distances, counter = [
                list(t) for t in zip(*params_and_dists_and_counter)
            ]
            new_parameters = np.array(new_parameters)
            distances = np.array(distances)

            for count in counter:
                self.simulation_counter += count

            # Compute epsilon for next step
            # print("INFO: Calculating acceptance threshold (epsilon).")
            self.logger.info("Calculating acceptances threshold")
            if aStep < steps - 1:
                if epsilon_arr[aStep + 1] == None:
                    epsilon_arr[aStep + 1] = np.percentile(
                        distances, epsilon_percentile)
                else:
                    epsilon_arr[aStep + 1] = np.max([
                        np.percentile(distances, epsilon_percentile),
                        epsilon_arr[aStep + 1]
                    ])

            # 2: calculate weights for new parameters
            self.logger.info("Calculating weights")

            new_parameters_pds = self.backend.parallelize(new_parameters)
            self.logger.info("Calculate weights")
            new_weights_pds = self.backend.map(self._calculate_weight,
                                               new_parameters_pds)
            new_weights = np.array(
                self.backend.collect(new_weights_pds)).reshape(-1, 1)
            sum_of_weights = 0.0
            for w in new_weights:
                sum_of_weights += w
            new_weights = new_weights / sum_of_weights

            # The calculation of cov_mats needs the new weights and new parameters
            self.accepted_parameters_manager.update_broadcast(
                self.backend,
                accepted_parameters=new_parameters,
                accepted_weights=new_weights)

            # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once,
            # instead of collecting them at each kernel in each step
            kernel_parameters = []
            for kernel in self.kernel.kernels:
                kernel_parameters.append(
                    self.accepted_parameters_manager.
                    get_accepted_parameters_bds_values(kernel.models))
            self.accepted_parameters_manager.update_kernel_values(
                self.backend, kernel_parameters=kernel_parameters)

            # 3: calculate covariance
            self.logger.info("Calculating covariance matrix")
            new_cov_mats = self.kernel.calculate_cov(
                self.accepted_parameters_manager)
            # Since each entry of new_cov_mats is a numpy array, we can multiply like this
            new_cov_mats = [
                covFactor * new_cov_mat for new_cov_mat in new_cov_mats
            ]

            # 4: Update the newly computed values
            accepted_parameters = new_parameters
            accepted_weights = new_weights
            accepted_cov_mats = new_cov_mats

            self.logger.info("Save configuration to output journal")

            if (full_output == 1
                    and aStep <= steps - 1) or (full_output == 0
                                                and aStep == steps - 1):
                journal.add_accepted_parameters(
                    copy.deepcopy(accepted_parameters))
                journal.add_distances(copy.deepcopy(distances))
                journal.add_weights(copy.deepcopy(accepted_weights))
                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_parameters=accepted_parameters,
                    accepted_weights=accepted_weights)
                names_and_parameters = self._get_names_and_parameters()
                journal.add_user_parameters(names_and_parameters)
                journal.number_of_simulations.append(self.simulation_counter)
                print(journal_file_save)
                if journal_file_save is not None:
                    if full_output == 1:
                        journal.save(
                            journal_file_save +
                            '.jrl')  # avoid writing a lot of different files.
                    else:
                        journal.save(journal_file_save + '_' + str(aStep) +
                                     '.jrl')

        # Add epsilon_arr to the journal
        journal.configuration["epsilon_arr"] = epsilon_arr

        return journal