Ejemplo n.º 1
0
    def sample(self,
               observations,
               steps,
               epsilon_init,
               n_samples=10000,
               n_samples_per_param=1,
               epsilon_percentile=10,
               covFactor=2,
               full_output=0,
               journal_file=None,
               journal_file_save=None):
        """Samples from the posterior distribution of the model parameter given the observed
        data observations.

        Parameters
        ----------
        observations : list
            A list, containing lists describing the observed data sets
        steps : integer
            Number of iterations in the sequential algoritm ("generations")
        epsilon_init : numpy.ndarray
            An array of proposed values of epsilon to be used at each steps. Can be supplied
            A single value to be used as the threshold in Step 1 or a `steps`-dimensional array of values to be
            used as the threshold in evry steps.
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional
            Number of data points in each simulated data set. The default value is 1.
        epsilon_percentile : float, optional
            A value between [0, 100]. The default value is 10.
        covFactor : float, optional
            scaling parameter of the covariance matrix. The default value is 2 as considered in [1].
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal.
            The default value is 0, meaning the intermediate results are not saved.
        journal_file: str, optional
            Filename of a journal file to read an already saved journal file, from which the first iteration will start.
            The default value is None.

        Returns
        -------
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        self.accepted_parameters_manager.broadcast(self.backend, observations)
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param

        if (journal_file is None):
            journal = Journal(full_output)
            journal.configuration["type_model"] = [
                type(model).__name__ for model in self.model
            ]
            journal.configuration["type_dist_func"] = type(
                self.distance).__name__
            journal.configuration["n_samples"] = self.n_samples
            journal.configuration[
                "n_samples_per_param"] = self.n_samples_per_param
            journal.configuration["steps"] = steps
            journal.configuration["epsilon_percentile"] = epsilon_percentile
        else:
            journal = Journal.fromFile(journal_file)

        accepted_parameters = None
        accepted_weights = None
        accepted_cov_mats = None

        # Define epsilon_arr
        if len(epsilon_init) == steps:
            epsilon_arr = epsilon_init
        else:
            if len(epsilon_init) == 1:
                epsilon_arr = [None] * steps
                epsilon_arr[0] = epsilon_init
            else:
                raise ValueError(
                    "The length of epsilon_init can only be equal to 1 or steps."
                )

        # main PMCABC algorithm
        self.logger.info("Starting PMC iterations")
        for aStep in range(steps):
            self.logger.debug("iteration {} of PMC algorithm".format(aStep))
            if (aStep == 0 and journal_file is not None):
                accepted_parameters = journal.get_accepted_parameters(-1)
                accepted_weights = journal.get_weights(-1)

                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_parameters=accepted_parameters,
                    accepted_weights=accepted_weights)

                kernel_parameters = []
                for kernel in self.kernel.kernels:
                    kernel_parameters.append(
                        self.accepted_parameters_manager.
                        get_accepted_parameters_bds_values(kernel.models))
                self.accepted_parameters_manager.update_kernel_values(
                    self.backend, kernel_parameters=kernel_parameters)

                # 3: calculate covariance
                self.logger.info("Calculateing covariance matrix")
                new_cov_mats = self.kernel.calculate_cov(
                    self.accepted_parameters_manager)
                # Since each entry of new_cov_mats is a numpy array, we can multiply like this
                accepted_cov_mats = [
                    covFactor * new_cov_mat for new_cov_mat in new_cov_mats
                ]

            seed_arr = self.rng.randint(0,
                                        np.iinfo(np.uint32).max,
                                        size=n_samples,
                                        dtype=np.uint32)
            rng_arr = np.array(
                [np.random.RandomState(seed) for seed in seed_arr])
            rng_pds = self.backend.parallelize(rng_arr)

            # 0: update remotely required variables
            # print("INFO: Broadcasting parameters.")
            self.logger.info("Broadcasting parameters")
            self.epsilon = epsilon_arr[aStep]
            self.accepted_parameters_manager.update_broadcast(
                self.backend, accepted_parameters, accepted_weights,
                accepted_cov_mats)

            # 1: calculate resample parameters
            # print("INFO: Resampling parameters")
            self.logger.info("Resampling parameters")

            params_and_dists_and_counter_pds = self.backend.map(
                self._resample_parameter, rng_pds)
            params_and_dists_and_counter = self.backend.collect(
                params_and_dists_and_counter_pds)
            new_parameters, distances, counter = [
                list(t) for t in zip(*params_and_dists_and_counter)
            ]
            new_parameters = np.array(new_parameters)
            distances = np.array(distances)

            for count in counter:
                self.simulation_counter += count

            # Compute epsilon for next step
            # print("INFO: Calculating acceptance threshold (epsilon).")
            self.logger.info("Calculating acceptances threshold")
            if aStep < steps - 1:
                if epsilon_arr[aStep + 1] == None:
                    epsilon_arr[aStep + 1] = np.percentile(
                        distances, epsilon_percentile)
                else:
                    epsilon_arr[aStep + 1] = np.max([
                        np.percentile(distances, epsilon_percentile),
                        epsilon_arr[aStep + 1]
                    ])

            # 2: calculate weights for new parameters
            self.logger.info("Calculating weights")

            new_parameters_pds = self.backend.parallelize(new_parameters)
            self.logger.info("Calculate weights")
            new_weights_pds = self.backend.map(self._calculate_weight,
                                               new_parameters_pds)
            new_weights = np.array(
                self.backend.collect(new_weights_pds)).reshape(-1, 1)
            sum_of_weights = 0.0
            for w in new_weights:
                sum_of_weights += w
            new_weights = new_weights / sum_of_weights

            # The calculation of cov_mats needs the new weights and new parameters
            self.accepted_parameters_manager.update_broadcast(
                self.backend,
                accepted_parameters=new_parameters,
                accepted_weights=new_weights)

            # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once,
            # instead of collecting them at each kernel in each step
            kernel_parameters = []
            for kernel in self.kernel.kernels:
                kernel_parameters.append(
                    self.accepted_parameters_manager.
                    get_accepted_parameters_bds_values(kernel.models))
            self.accepted_parameters_manager.update_kernel_values(
                self.backend, kernel_parameters=kernel_parameters)

            # 3: calculate covariance
            self.logger.info("Calculating covariance matrix")
            new_cov_mats = self.kernel.calculate_cov(
                self.accepted_parameters_manager)
            # Since each entry of new_cov_mats is a numpy array, we can multiply like this
            new_cov_mats = [
                covFactor * new_cov_mat for new_cov_mat in new_cov_mats
            ]

            # 4: Update the newly computed values
            accepted_parameters = new_parameters
            accepted_weights = new_weights
            accepted_cov_mats = new_cov_mats

            self.logger.info("Save configuration to output journal")

            if (full_output == 1
                    and aStep <= steps - 1) or (full_output == 0
                                                and aStep == steps - 1):
                journal.add_accepted_parameters(
                    copy.deepcopy(accepted_parameters))
                journal.add_distances(copy.deepcopy(distances))
                journal.add_weights(copy.deepcopy(accepted_weights))
                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_parameters=accepted_parameters,
                    accepted_weights=accepted_weights)
                names_and_parameters = self._get_names_and_parameters()
                journal.add_user_parameters(names_and_parameters)
                journal.number_of_simulations.append(self.simulation_counter)
                print(journal_file_save)
                if journal_file_save is not None:
                    if full_output == 1:
                        journal.save(
                            journal_file_save +
                            '.jrl')  # avoid writing a lot of different files.
                    else:
                        journal.save(journal_file_save + '_' + str(aStep) +
                                     '.jrl')

        # Add epsilon_arr to the journal
        journal.configuration["epsilon_arr"] = epsilon_arr

        return journal
Ejemplo n.º 2
0
    def sample(self,
               observations,
               steps,
               epsilon,
               n_samples=10000,
               n_samples_per_param=1,
               beta=2,
               delta=0.2,
               v=0.3,
               ar_cutoff=0.1,
               resample=None,
               n_update=None,
               full_output=0,
               journal_file=None):
        """Samples from the posterior distribution of the model parameter given the observed
        data observations.

        Parameters
        ----------
        observations : list
            A list, containing lists describing the observed data sets
        steps : integer
            Number of maximum iterations in the sequential algoritm ("generations")
        epsilon : numpy.float
            A proposed value of threshold to start with.
        n_samples : integer, optional
            Number of samples to generate. The default value is 10000.
        n_samples_per_param : integer, optional
            Number of data points in each simulated data set. The default value is 1.
        beta : numpy.float
            Tuning parameter of SABC, default value is 2.
        delta : numpy.float
            Tuning parameter of SABC, default value is 0.2.
        v : numpy.float, optional
            Tuning parameter of SABC, The default value is 0.3.
        ar_cutoff : numpy.float
            Acceptance ratio cutoff, The default value is 0.1.
        resample: int, optional
            Resample after this many acceptance, The default value is None which takes value inside n_samples
        n_update: int, optional
            Number of perturbed parameters at each step, The default value is None which takes value inside n_samples
        full_output: integer, optional
            If full_output==1, intermediate results are included in output journal.
            The default value is 0, meaning the intermediate results are not saved.
        journal_file: str, optional
            Filename of a journal file to read an already saved journal file, from which the first iteration will start.
            The default value is None.

        Returns
        -------
        abcpy.output.Journal
            A journal containing simulation results, metadata and optionally intermediate results.
        """
        global broken_preemptively
        self.sample_from_prior(rng=self.rng)
        self.accepted_parameters_manager.broadcast(self.backend, observations)
        self.epsilon = epsilon
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param

        if (journal_file is None):
            journal = Journal(full_output)
            journal.configuration["type_model"] = [
                type(model).__name__ for model in self.model
            ]
            journal.configuration["type_dist_func"] = type(
                self.distance).__name__
            journal.configuration["type_kernel_func"] = type(self.kernel)
            journal.configuration["n_samples"] = self.n_samples
            journal.configuration[
                "n_samples_per_param"] = self.n_samples_per_param
            journal.configuration["beta"] = beta
            journal.configuration["delta"] = delta
            journal.configuration["v"] = v
            journal.configuration["ar_cutoff"] = ar_cutoff
            journal.configuration["resample"] = resample
            journal.configuration["n_update"] = n_update
            journal.configuration["full_output"] = full_output
        else:
            journal = Journal.fromFile(journal_file)

        accepted_parameters = None
        distances = np.zeros(shape=(n_samples, ))
        smooth_distances = np.zeros(shape=(n_samples, ))
        accepted_weights = np.ones(shape=(n_samples, 1))
        all_distances = None
        accepted_cov_mat = None

        if resample == None:
            resample = n_samples
        if n_update == None:
            n_update = n_samples
        sample_array = np.ones(shape=(steps, ))
        sample_array[0] = n_samples
        sample_array[1:] = n_update

        ## Acceptance counter to determine the resampling step
        accept = 0
        samples_until = 0

        ## Counter whether broken preemptively
        broken_preemptively = False

        for aStep in range(0, steps):
            self.logger.debug("step {}".format(aStep))
            if (aStep == 0 and journal_file is not None):
                accepted_parameters = journal.get_accepted_parameters(-1)
                accepted_weights = journal.get_weights(-1)

                #Broadcast Accepted parameters and Accedpted weights
                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_parameters=accepted_parameters,
                    accepted_weights=accepted_weights)

                kernel_parameters = []
                for kernel in self.kernel.kernels:
                    kernel_parameters.append(
                        self.accepted_parameters_manager.
                        get_accepted_parameters_bds_values(kernel.models))

                #Broadcast Accepted Kernel parameters
                self.accepted_parameters_manager.update_kernel_values(
                    self.backend, kernel_parameters=kernel_parameters)

                new_cov_mats = self.kernel.calculate_cov(
                    self.accepted_parameters_manager)
                accepted_cov_mats = []
                for new_cov_mat in new_cov_mats:
                    if not (new_cov_mat.size == 1):
                        accepted_cov_mats.append(beta * new_cov_mat + 0.0001 *
                                                 np.trace(new_cov_mat) *
                                                 np.eye(new_cov_mat.shape[0]))
                    else:
                        accepted_cov_mats.append(
                            (beta * new_cov_mat +
                             0.0001 * new_cov_mat).reshape(1, 1))

                # Broadcast Accepted Covariance Matrix
                self.accepted_parameters_manager.update_broadcast(
                    self.backend, accepted_cov_mats=accepted_cov_mats)

            # main SABC algorithm
            # print("INFO: Initialization of SABC")
            seed_arr = self.rng.randint(0,
                                        np.iinfo(np.uint32).max,
                                        size=int(sample_array[aStep]),
                                        dtype=np.uint32)
            rng_arr = np.array(
                [np.random.RandomState(seed) for seed in seed_arr])
            index_arr = self.rng.randint(0,
                                         self.n_samples,
                                         size=int(sample_array[aStep]),
                                         dtype=np.uint32)
            data_arr = []
            for i in range(len(rng_arr)):
                data_arr.append([rng_arr[i], index_arr[i]])
            data_pds = self.backend.parallelize(data_arr)

            # 0: update remotely required variables
            self.logger.info("Broadcasting parameters")
            self.epsilon = epsilon

            # 1: Calculate  parameters
            self.logger.info("Initial accepted parameters")
            params_and_dists_pds = self.backend.map(self._accept_parameter,
                                                    data_pds)
            params_and_dists = self.backend.collect(params_and_dists_pds)
            new_parameters, filenames, index, acceptance, counter = [
                list(t) for t in zip(*params_and_dists)
            ]

            # Keeping counter of number of simulations
            for count in counter:
                self.simulation_counter += count

            #new_parameters = np.array(new_parameters)
            index = np.array(index)
            acceptance = np.array(acceptance)

            # Reading all_distances at Initial step
            if aStep == 0:
                index = np.linspace(0, n_samples - 1,
                                    n_samples).astype(int).reshape(
                                        n_samples, )
                accept = 0

            # Initialize/Update the accepted parameters and their corresponding distances
            if accepted_parameters is None:
                accepted_parameters = new_parameters
            else:
                for ind in range(len(acceptance)):
                    if acceptance[ind] == 1:
                        accepted_parameters[index[ind]] = new_parameters[ind]

            # 1.5: Update the distance and recompute distances from observed data
            self.logger.info("Updating distance")
            distances = self.distance.distances[0].update(
                filenames,
                self.accepted_parameters_manager.observations_bds.value(),
                self.backend)
            self._update_broadcasts(distances=distances)

            # 2: Compute epsilon
            U = self._average_redefined_distance(distances,
                                                 epsilon * (1 - delta))
            epsilon = self._schedule(U, v)
            #U = np.mean(distances)
            #epsilon = np.percentile(distances, .1 * 100)
            print(epsilon)
            # 4: Show progress and if acceptance rate smaller than a value break the iteration
            if aStep > 0:
                accept = accept + np.sum(acceptance)
                samples_until = samples_until + sample_array[aStep]
                acceptance_rate = accept / samples_until

                msg = (
                    "updates= {:.2f}, epsilon= {}, u.mean={:e}, acceptance rate: {:.2f}"
                    .format(
                        np.sum(sample_array[1:aStep + 1]) /
                        np.sum(sample_array[1:]) * 100, epsilon, U,
                        acceptance_rate))
                self.logger.debug(msg)
                if acceptance_rate < ar_cutoff:
                    broken_preemptively = True
                    self.logger.debug(
                        "Stopping as acceptance rate is lower than cutoff")
                    break

            # 5: Resampling if number of accepted particles greater than resample
            if accept >= resample and U > 1e-100:
                self.logger.info("Weighted resampling")
                weight = np.exp(-distances * delta / U)
                weight = weight / sum(weight)
                index_resampled = self.rng.choice(np.arange(n_samples,
                                                            dtype=int),
                                                  n_samples,
                                                  replace=1,
                                                  p=weight)
                accepted_parameters = [
                    accepted_parameters[i] for i in index_resampled
                ]
                distances = distances[index_resampled]

                ## Update U and epsilon:
                # # epsilon = epsilon * (1 - delta)
                # U = np.mean(distances)
                # # epsilon = self._schedule(U, v)
                # epsilon = np.percentile(distances, .1 * 100)
                U = self._average_redefined_distance(distances,
                                                     epsilon * (1 - delta))
                epsilon = self._schedule(U, v)

                ## Print effective sampling size
                print('Resampling: Effective sampling size: ',
                      1 / sum(pow(weight / sum(weight), 2)))
                accept = 0
                samples_until = 0

                ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix
                # Broadcast Accepted parameters and add to journal
                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_weights=accepted_weights,
                    accepted_parameters=accepted_parameters)
                # Compute Accepetd Kernel parameters and broadcast them
                kernel_parameters = []
                for kernel in self.kernel.kernels:
                    kernel_parameters.append(
                        self.accepted_parameters_manager.
                        get_accepted_parameters_bds_values(kernel.models))
                self.accepted_parameters_manager.update_kernel_values(
                    self.backend, kernel_parameters=kernel_parameters)
                # Compute Kernel Covariance Matrix and broadcast it
                new_cov_mats = self.kernel.calculate_cov(
                    self.accepted_parameters_manager)
                accepted_cov_mats = []
                for new_cov_mat in new_cov_mats:
                    if not (new_cov_mat.size == 1):
                        accepted_cov_mats.append(beta * new_cov_mat + 0.0001 *
                                                 np.trace(new_cov_mat) *
                                                 np.eye(new_cov_mat.shape[0]))
                    else:
                        accepted_cov_mats.append(
                            (beta * new_cov_mat +
                             0.0001 * new_cov_mat).reshape(1, 1))

                self.accepted_parameters_manager.update_broadcast(
                    self.backend, accepted_cov_mats=accepted_cov_mats)

                if (full_output == 1 and aStep <= steps - 1):
                    ## Saving intermediate configuration to output journal.
                    print('Saving after resampling')
                    journal.add_accepted_parameters(
                        copy.deepcopy(accepted_parameters))
                    journal.add_weights(copy.deepcopy(accepted_weights))
                    journal.add_distances(copy.deepcopy(distances))
                    names_and_parameters = self._get_names_and_parameters()
                    journal.add_user_parameters(names_and_parameters)
                    journal.number_of_simulations.append(
                        self.simulation_counter)
            else:
                ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix
                # Broadcast Accepted parameters
                self.accepted_parameters_manager.update_broadcast(
                    self.backend,
                    accepted_weights=accepted_weights,
                    accepted_parameters=accepted_parameters)
                # Compute Accepetd Kernel parameters and broadcast them
                kernel_parameters = []
                for kernel in self.kernel.kernels:
                    kernel_parameters.append(
                        self.accepted_parameters_manager.
                        get_accepted_parameters_bds_values(kernel.models))
                self.accepted_parameters_manager.update_kernel_values(
                    self.backend, kernel_parameters=kernel_parameters)
                # Compute Kernel Covariance Matrix and broadcast it
                new_cov_mats = self.kernel.calculate_cov(
                    self.accepted_parameters_manager)
                accepted_cov_mats = []
                for new_cov_mat in new_cov_mats:
                    if not (new_cov_mat.size == 1):
                        accepted_cov_mats.append(beta * new_cov_mat + 0.0001 *
                                                 np.trace(new_cov_mat) *
                                                 np.eye(new_cov_mat.shape[0]))
                    else:
                        accepted_cov_mats.append(
                            (beta * new_cov_mat +
                             0.0001 * new_cov_mat).reshape(1, 1))

                self.accepted_parameters_manager.update_broadcast(
                    self.backend, accepted_cov_mats=accepted_cov_mats)

                if (full_output == 1 and aStep <= steps - 1):
                    ## Saving intermediate configuration to output journal.
                    journal.add_accepted_parameters(
                        copy.deepcopy(accepted_parameters))
                    journal.add_weights(copy.deepcopy(accepted_weights))
                    journal.add_distances(copy.deepcopy(distances))
                    names_and_parameters = self._get_names_and_parameters()
                    journal.add_user_parameters(names_and_parameters)
                    journal.number_of_simulations.append(
                        self.simulation_counter)

        # Add epsilon_arr, number of final steps and final output to the journal
        # print("INFO: Saving final configuration to output journal.")
        if (full_output == 0) or (full_output == 1 and broken_preemptively
                                  and aStep <= steps - 1):
            journal.add_accepted_parameters(copy.deepcopy(accepted_parameters))
            journal.add_weights(copy.deepcopy(accepted_weights))
            journal.add_distances(copy.deepcopy(distances))
            self.accepted_parameters_manager.update_broadcast(
                self.backend,
                accepted_parameters=accepted_parameters,
                accepted_weights=accepted_weights)
            names_and_parameters = self._get_names_and_parameters()
            journal.add_user_parameters(names_and_parameters)
            journal.number_of_simulations.append(self.simulation_counter)

        journal.configuration["steps"] = aStep + 1
        journal.configuration["epsilon"] = epsilon

        return journal