예제 #1
0
def find_clusters(posterior, base_directory='./', threshold=2.0, K_g=1, analysis_file=None):
    """
    Finds clusters among posterior MCMC samples, grouped by Gelman-Rubin R value, and creates a Gaussian mixture density.

    Finding clusters and creating a Gaussian mixture density is a necessary intermediate step before using the sample-pmc subcommand.
    The input files are expected in EOS_BASE_DIRECTORY/POSTERIOR/mcmc-*. All MCMC input files present will be used in the clustering.
    The output files will be stored in EOS_BASE_DIRECTORY/POSTERIOR/clusters.

    :param posterior: The name of the posterior.
    :type posterior: str
    :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable.
    :type base_directory: str, optional
    :param threshold: The R value threshold. If two sample subsets have an R value larger than this threshold, they will be treated as two distinct clusters. Defaults to 2.0.
    :type threshold: float > 1.0, optional
    :param K_g: The number of mixture components per cluster. Default to 1.
    :type K_g: int >= 1, optional
    """

    import pathlib
    output_path = os.path.join(base_directory, posterior, 'clusters')
    _set_log_file(output_path, 'log')
    input_paths = [str(p) for p in pathlib.Path(os.path.join(base_directory, posterior)).glob('mcmc-*')]
    chains    = [eos.data.MarkovChain(path).samples for path in input_paths]
    n = len(chains[0])
    for chain in chains:
        assert len(chain) == n, 'Every chains must contain the same number of samples'

    groups = pypmc.mix_adapt.r_value.r_group([_np.mean(chain.T, axis=1) for chain in chains],
                           [_np.var (chain.T, axis=1, ddof=1) for chain in chains],
                           n, threshold)
    eos.info('Found {} groups using an R value threshold of {}'.format(len(groups), threshold))
    density   = pypmc.mix_adapt.r_value.make_r_gaussmix(chains, K_g=K_g, critical_r=threshold)
    eos.info(f'Created mixture density with {len(density.components)} components')
    eos.data.MixtureDensity.create(output_path, density)
예제 #2
0
    def optimize(self, start_point=None, **kwargs):
        """
        Optimize the log(posterior) and returns a best-fit-point summary.

        :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters. If not specified, optimization starts at the current parameter point.
        :param start_point: iterable, optional
        """
        if start_point == None:
            start_point = [float(p) for p in self.varied_parameters]

        default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}}
        if kwargs is None:
            kwargs = default_kwargs

        res = scipy.optimize.minimize(self.negative_log_pdf,
                                      start_point,
                                      args=None,
                                      bounds=self.bounds,
                                      **kwargs)

        if not res.success:
            eos.warn('Optimization did not succeed')
            eos.warn('  optimizer' ' message reas: {}'.format(res.message))
        else:
            eos.info(
                'Optimization goal achieved after {nfev} function evaluations'.
                format(nfev=res.nfev))

        for p, v in zip(self.varied_parameters, res.x):
            p.set(v)

        return eos.BestFitPoint(self, res.x)
예제 #3
0
    def optimize(self, start_point=None, rng=np.random.mtrand, **kwargs):
        """
        Optimize the log(posterior) and returns a best-fit-point summary.

        :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters.
                            If set to "random", optimization starts at the random point in the space of the priors.
                            If not specified, optimization starts at the current parameter point.
        :type start_point: iterable, optional
        :param rng: Optional random number generator

        """
        if start_point == None:
            start_point = [float(p) for p in self.varied_parameters]
        elif start_point == "random":
            start_point = [
                p.inverse_cdf(rng.uniform())
                for p in self._log_posterior.log_priors()
            ]

        default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}}
        if kwargs is None:
            kwargs = default_kwargs

        res = scipy.optimize.minimize(self.negative_log_pdf,
                                      self._par_to_x(start_point),
                                      args=None,
                                      bounds=[(-1.0, 1.0)
                                              for b in self.bounds],
                                      **kwargs)

        if not res.success:
            eos.warn('Optimization did not succeed')
            eos.warn('  optimizer' ' message reas: {}'.format(res.message))
        else:
            eos.info(
                'Optimization goal achieved after {nfev} function evaluations'.
                format(nfev=res.nfev))

        bfp = self._x_to_par(res.x)

        for p, v in zip(self.varied_parameters, bfp):
            p.set(v)

        return eos.BestFitPoint(self, bfp)
예제 #4
0
    def run(self):
        """Runs predefined analysis steps recorded in the analysis file."""
        import inspect
        command_map = {
            'find-clusters': eos.find_clusters,
            'predict-observables': eos.predict_observables,
            'sample-mcmc': eos.sample_mcmc,
            'sample-pmc': eos.sample_pmc,
        }

        for idx, step in enumerate(self._steps):
            if type(step) is not dict:
                raise ValueError("Step #{} is not a key/value map.")

            if 'command' not in step:
                raise ValueError("Step #{} contains no command.")

            command = step['command']
            func = command_map[command]
            params = step['parameters'] if 'parameters' in step else {}
            params = {params_map[(command, k)]: v for k, v in params.items()}
            paramstr = ','.join(
                ['{k}={v}'.format(k=k, v=v) for k, v in params])

            func_sig = inspect.signature(func)
            func_required_args = {}
            for n, p in func_sig.parameters.items():
                if p.default() != p.empty():
                    continue
                func_required_args += {n}
            for n in func_required_args:
                if n in params.keys():
                    continue
                eos.error('Mandatory argument \'{}\' not provided'.format(n))
                return

            eos.info('Beginning step #{i}: {cmd}({params})'.format(
                i=i, cmd=cmd, params=paramstr))
            func(**params)
            eos.info('Step #{i} complete'.format(i=i))
예제 #5
0
파일: signal_pdf.py 프로젝트: cBobeth/eos
    def sample_mcmc(self, N, stride, pre_N, preruns, cov_scale=0.1, start_point=None, rng=np.random.mtrand):
        """
        Return samples of the kinematic variables and the log(PDF).

        Obtains random samples of the log(PDF) using an adaptive Markov Chain Monte Carlo with PyPMC.
        A prerun with adaptations is carried out first and its samples are discarded.

        :param N: Number of samples that shall be returned
        :param stride: Stride, i.e., the number by which the actual amount of samples shall be thinned to return N samples.
        :param pre_N: Number of samples in each prerun.
        :param preruns: Number of preruns.
        :param cov_scale: Scale factor for the initial guess of the covariance matrix.
        :param start_point: Optional starting point for the chain
        :type start_point: list-like, optional
        :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.markov_chain.MarkovChain)

        :return: A tuple of the kinematic variables as array of size N and the log(PDF) as array of size N.

        .. note::
           This method requires the PyPMC python module, which can be installed from PyPI.
        """
        import pypmc
        try:
            from tqdm.auto import tqdm
            progressbar = tqdm
        except ImportError:
            progressbar = lambda x, **kw: x

        ind_lower = np.array([bound[0].evaluate() for bound in self.bounds])
        ind_upper = np.array([bound[1].evaluate() for bound in self.bounds])
        ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper)

        log_target = pypmc.tools.indicator.merge_function_with_indicator(self.log_pdf, ind, -np.inf)

        # create initial covariance
        sigma = np.diag([np.square(bound[1].evaluate() - bound[0].evaluate()) / 12 * cov_scale for bound in self.bounds])
        log_proposal = pypmc.density.gauss.LocalGauss(sigma)

        # create start point, if not provided
        if start_point is None:
            u = np.array([rng.uniform(0.0, 1.0) for j in range(0, len(ind_lower))])
            ubar = 1.0 - u
            start_point = ubar * ind_upper + u * ind_lower

        # create MC sampler
        sampler = pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, log_proposal, start_point, save_target_values=True, rng=rng)

        # pre run to adapt markov chains
        for i in progressbar(range(0, preruns), desc="Pre-runs", leave=False):
            eos.info('Prerun {} out of {}'.format(i, preruns))
            accept_count = sampler.run(pre_N)
            accept_rate  = accept_count / pre_N * 100
            eos.info('Prerun {}: acceptance rate is {:3.0f}%'.format(i, accept_rate))
            sampler.adapt()
        sampler.clear()

        # obtain final samples
        eos.info('Main run: started ...')
        sample_total  = N * stride
        sample_chunk  = sample_total // 100
        sample_chunks = [sample_chunk for i in range(0, 99)]
        sample_chunks.append(sample_total - 99 * sample_chunk)
        for current_chunk in progressbar(sample_chunks, desc="Main run", leave=False):
            accept_count = accept_count + sampler.run(current_chunk)
        accept_rate  = accept_count / (N * stride) * 100
        eos.info('Main run: acceptance rate is {:3.0f}%'.format(accept_rate))

        parameter_samples = sampler.samples[:][::stride]
        weights = sampler.target_values[:][::stride, 0]

        return(parameter_samples, weights)
예제 #6
0
    def __init__(self,
                 priors,
                 likelihood,
                 global_options={},
                 manual_constraints={},
                 fixed_parameters={}):
        """Constructor."""
        self.init_args = {
            'priors': priors,
            'likelihood': likelihood,
            'global_options': global_options,
            'manual_constraints': manual_constraints,
            'fixed_parameters': fixed_parameters
        }
        self.parameters = eos.Parameters.Defaults()
        self.global_options = eos.Options()
        self.log_likelihood = eos.LogLikelihood(self.parameters)
        self.log_posterior = eos.LogPosterior(self.log_likelihood)
        self.varied_parameters = []
        self.bounds = []

        eos.info(
            'Creating analysis with {nprior} priors, {nconst} EOS-wide constraints, {nopts} global options, {nmanual} manually-entered constraints and {nparams} fixed parameters.'
            .format(nprior=len(priors),
                    nconst=len(likelihood),
                    nopts=len(global_options),
                    nmanual=len(manual_constraints),
                    nparams=len(fixed_parameters)))
        eos.debug('priors:')
        for p in priors:
            eos.debug(' - {name} ({type}) [{min}, {max}]'.format(
                name=p['parameter'],
                type=p['type'],
                min=p['min'],
                max=p['max']))
        eos.debug('constraints:')
        for cn in likelihood:
            eos.debug(' - {name}'.format(name=cn))
        eos.debug('manual_constraints:')
        for cn, ce in manual_constraints.items():
            eos.debug(' - {name}'.format(name=cn))
        eos.debug('fixed_parameters:')
        for pn, pe in fixed_parameters.items():
            eos.debug(' - {name}'.format(name=pn))

        # collect the global options
        for key, value in global_options.items():
            self.global_options.set(key, value)

        # Fix specified parameters
        for param, value in fixed_parameters.items():
            self.parameters.set(param, value)

        # create the priors
        for prior in priors:
            parameter = prior['parameter']
            minv = prior['min']
            maxv = prior['max']
            prior_type = prior['type'] if 'type' in prior else 'uniform'
            if 'uniform' == prior_type or 'flat' == prior_type:
                self.log_posterior.add(
                    eos.LogPrior.Flat(self.parameters, parameter,
                                      eos.ParameterRange(minv, maxv)), False)
            elif 'gauss' == prior_type or 'gaussian' == prior_type:
                central = prior['central']
                sigma = prior['sigma']
                if type(sigma) is list or type(sigma) is tuple:
                    sigma_lo = sigma[0]
                    sigma_hi = sigma[1]
                else:
                    sigma_lo = sigma
                    sigma_hi = sigma
                self.log_posterior.add(
                    eos.LogPrior.Gauss(self.parameters, parameter,
                                       eos.ParameterRange(minv, maxv),
                                       central - sigma_lo, central,
                                       central + sigma_hi), False)
            else:
                raise ValueError(
                    'Unknown prior type \'{}\''.format(prior_type))

            self.bounds.append((minv, maxv))
            p = self.parameters[parameter]
            p.set_min(minv)
            p.set_max(maxv)
            self.varied_parameters.append(p)

        # create the likelihood
        for constraint_name in likelihood:
            constraint = eos.Constraint.make(constraint_name,
                                             self.global_options)
            self.log_likelihood.add(constraint)

        # add manual constraints to the likelihood
        for constraint_name, constraint_data in manual_constraints.items():
            import yaml
            yaml_string = yaml.dump(constraint_data)
            constraint_entry = eos.ConstraintEntry.deserialize(
                constraint_name, yaml_string)
            constraint = constraint_entry.make(constraint_name,
                                               self.global_options)
            self.log_likelihood.add(constraint)

        # perform some sanity checks
        varied_parameter_names = set(
            [p.name() for p in self.varied_parameters])
        used_parameter_names = set()
        for observable in self.log_likelihood.observable_cache():
            for i in observable.used_parameter_ids():
                used_parameter_names.add(self.parameters.by_id(i).name())

        used_but_unvaried = used_parameter_names - varied_parameter_names
        if (len(used_but_unvaried) > 0):
            eos.info(
                'likelihood probably depends on {} parameter(s) that do not appear in the prior; check prior?'
                .format(len(used_but_unvaried)))
        for n in used_but_unvaried:
            eos.debug('used, but not included in any prior: \'{}\''.format(n))
        for n in varied_parameter_names - used_parameter_names:
            eos.warn(
                'likelihood does not depend on parameter \'{}\'; remove from prior or check options!'
                .format(n))
예제 #7
0
    def sample_pmc(self,
                   log_proposal,
                   step_N=1000,
                   steps=10,
                   final_N=5000,
                   rng=np.random.mtrand):
        """
        Return samples of the parameters and log(weights)

        Obtains random samples of the log(posterior) using adaptive importance sampling following
        the Popoulation Monte Carlo approach with PyPMC.

        :param step_N: Number of samples that shall be drawn in each adaptation step.
        :param steps: Number of adaptation steps.
        :param final_N: Number of samples that shall be drawn after all adaptation steps.
        :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportancSampler)

        :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the
            final proposal function as pypmc.density.mixture.MixtureDensity.

        .. note::
           This method requires the PyPMC python module, which can be installed from PyPI.
        """
        import pypmc
        try:
            from tqdm.auto import tqdm
            progressbar = tqdm
        except ImportError:
            progressbar = lambda x, **kw: x

        ind_lower = np.array([bound[0] for bound in self.bounds])
        ind_upper = np.array([bound[1] for bound in self.bounds])
        ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper)

        log_target = pypmc.tools.indicator.merge_function_with_indicator(
            self.log_pdf, ind, -np.inf)

        # create PMC sampler
        sampler = pypmc.sampler.importance_sampling.ImportanceSampler(
            log_target, log_proposal, save_target_values=True, rng=rng)
        generating_components = []

        eps = np.finfo(float).eps

        # carry out adaptions
        for step in progressbar(range(steps), desc="Adaptions", leave=False):
            origins = sampler.run(step_N, trace_sort=True)
            generating_components.append(origins)
            samples = sampler.samples[:]
            last_weights = np.copy(sampler.weights[-1][:, 0])
            for i, w in enumerate(last_weights):
                if w <= 0 or np.isnan(w):
                    last_weights[i] = eps
            normalized_last_weights = last_weights / np.sum(last_weights)
            last_entropy = -1.0 * np.dot(np.log(normalized_last_weights),
                                         normalized_last_weights)
            last_perplexity = np.exp(last_entropy) / len(
                normalized_last_weights)
            eos.info(
                'Perplexity of the last samples after sampling in step {}: {}'.
                format(step, last_perplexity))
            weights = sampler.weights[:][:, 0]
            adjusted_weights = np.copy(weights)
            for i, w in enumerate(adjusted_weights):
                if w <= 0 or np.isnan(w):
                    adjusted_weights[i] = eps
            normalized_weights = adjusted_weights / np.sum(adjusted_weights)
            entropy = -1.0 * np.dot(np.log(normalized_weights),
                                    normalized_weights)
            perplexity = np.exp(entropy) / len(normalized_weights)
            eos.info(
                'Perplexity of all previous samples after sampling in step {}: {}'
                .format(step, perplexity))
            pypmc.mix_adapt.pmc.gaussian_pmc(samples,
                                             sampler.proposal,
                                             adjusted_weights,
                                             mincount=0,
                                             rb=True,
                                             copy=False)
            sampler.proposal.normalize()

        # draw final samples
        origins = sampler.run(final_N, trace_sort=True)
        generating_components.append(origins)
        samples = sampler.samples[:]
        weights = sampler.weights[:][:, 0]
        adjusted_weights = np.copy(weights)
        for i, w in enumerate(adjusted_weights):
            if w <= 0 or np.isnan(w):
                adjusted_weights[i] = eps
        normalized_weights = adjusted_weights / np.sum(adjusted_weights)
        entropy = -1.0 * np.dot(np.log(normalized_weights), normalized_weights)
        perplexity = np.exp(entropy) / len(normalized_weights)
        eos.info('Perplexity after final samples: {}'.format(perplexity))

        return samples, weights, sampler.proposal
예제 #8
0
    def sample_pmc(self,
                   log_proposal,
                   step_N=1000,
                   steps=10,
                   final_N=5000,
                   rng=np.random.mtrand,
                   return_final_only=True,
                   final_perplexity_threshold=1.0,
                   weight_threshold=1e-10):
        """
        Return samples of the parameters and log(weights), and a mixture density adapted to the posterior.

        Obtains random samples of the log(posterior) using adaptive importance sampling following
        the Population Monte Carlo approach with PyPMC.

        :param log_proposal: Initial gaussian mixture density that shall be adapted to the posterior density.
        :type log_proposal: pypmc.density.mixture.MixtureDensity
        :param step_N: Number of samples that shall be drawn in each adaptation step.
        :type step_N: int
        :param steps: Number of adaptation steps.
        :type steps: int
        :param final_N: Number of samples that shall be drawn after all adaptation steps.
        :type final_N: int
        :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportanceSampler)
        :param return_final_only: If set to True, only returns the samples and weights of the final sampling step, after all adaptations have finished.
        :param final_perplexity_threshold: Adaptations are stopped if the perplexity of the last adaptation step is above this threshold value.
        :param weight_threshold: Mixture components with a weight smaller than this threshold are pruned.

        :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the
            final proposal function as pypmc.density.mixture.MixtureDensity.

        This method should be called after obtaining approximate samples of the
        log(posterior) by other means, e.g., by using :meth:`eos.Analysis.sample`.
        A possible (incomplete) example could look as follows:

        .. code-block:: python3

           from pypmc.mix_adapt.r_value import make_r_gaussmix
           chains = []
           for i in range(10):
               # run Markov Chains for your problem
               chain, _ = analysis.sample(...)
               chains.append(chain)

           # please consult the pypmc documentation for details on the call below
           proposal_density = make_r_gaussmix(chains, K_g=3, critical_r=1.1)

           # adapt the proposal to the posterior and obtain high-quality samples
           analysis.sample_pmc(proposal_density, ...)


        .. note::
           This method requires the PyPMC python module, which can be installed from PyPI.
        """
        import pypmc
        try:
            from tqdm.auto import tqdm
            progressbar = tqdm
        except ImportError:
            progressbar = lambda x, **kw: x

        # create log_target
        ind_lower = np.array([-1.0 for bound in self.bounds])
        ind_upper = np.array([+1.0 for bound in self.bounds])
        ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper)

        log_target = pypmc.tools.indicator.merge_function_with_indicator(
            self.log_pdf, ind, -np.inf)

        # rescale log_proposal arguments to [-1, 1]
        for component in log_proposal.components:
            rescaled_mu = self._par_to_x(component.mu)
            rescaled_sigma = np.array([[
                4 * component.sigma[i, j] / (bj[1] - bj[0]) / (bi[1] - bi[0])
                for j, bj in enumerate(self.bounds)
            ] for i, bi in enumerate(self.bounds)])
            component.update(rescaled_mu, rescaled_sigma)

        # create PMC sampler
        sampler = pypmc.sampler.importance_sampling.ImportanceSampler(
            log_target, log_proposal, save_target_values=True, rng=rng)
        generating_components = []

        eps = np.finfo(float).eps

        # carry out adaptions
        for step in progressbar(range(steps), desc="Adaptions", leave=False):
            origins = sampler.run(step_N, trace_sort=True)
            generating_components.append(origins)
            samples = sampler.samples[:]
            last_perplexity = self._perplexity(
                np.copy(sampler.weights[-1][:, 0]))
            eos.info(
                'Perplexity of the last samples after sampling in step {}: {}'.
                format(step, last_perplexity))
            if last_perplexity < 0.05:
                eos.warn(
                    "Last step's perplexity is very low. This could possibly be improved by running the markov chains that are used to form the initial PDF for a bit longer"
                )
            weights = sampler.weights[:][:, 0]
            adjusted_weights = np.copy(weights)
            # replace negative and nan weights by eps
            adjusted_weights = np.where(
                np.logical_or(adjusted_weights <= 0,
                              np.isnan(adjusted_weights)), eps,
                adjusted_weights)
            eos.info(
                'Perplexity of all previous samples after sampling in step {}: {}'
                .format(step, self._perplexity(adjusted_weights)))
            pypmc.mix_adapt.pmc.gaussian_pmc(samples,
                                             sampler.proposal,
                                             adjusted_weights,
                                             mincount=0,
                                             rb=True,
                                             copy=False)
            # Normalize the weights and remove components with a weight smaller than weight_threshold
            sampler.proposal.normalize()
            sampler.proposal.prune(threshold=weight_threshold)
            # stop adaptation if the perplexity of the last step is larger than the threshold
            if last_perplexity > final_perplexity_threshold:
                eos.info(
                    'Perplexity threshold reached after {} step(s)'.format(
                        step))
                break

        # draw final samples
        origins = sampler.run(final_N, trace_sort=True)
        generating_components.append(origins)

        # rescale proposal components back to their physical bounds
        for component in sampler.proposal.components:
            rescaled_mu = self._x_to_par(component.mu)
            rescaled_sigma = np.array([[
                component.sigma[i, j] * (bj[1] - bj[0]) * (bi[1] - bi[0]) / 4
                for j, bj in enumerate(self.bounds)
            ] for i, bi in enumerate(self.bounds)])
            component.update(rescaled_mu, rescaled_sigma)

        # rescale the samples back to their physical bounds
        if return_final_only:
            # only returns the final_N final samples
            samples = np.apply_along_axis(self._x_to_par, 1,
                                          sampler.samples[:][-final_N:])
            weights = sampler.weights[:][-final_N:, 0]
        else:
            # returns all samples
            samples = np.apply_along_axis(self._x_to_par, 1,
                                          sampler.samples[:])
            weights = sampler.weights[:][:, 0]
        perplexity = self._perplexity(np.copy(weights))
        eos.info('Perplexity after final samples: {}'.format(perplexity))

        return samples, weights, sampler.proposal
예제 #9
0
    def sample(self,
               N=1000,
               stride=5,
               pre_N=150,
               preruns=3,
               cov_scale=0.1,
               observables=None,
               start_point=None,
               rng=np.random.mtrand):
        """
        Return samples of the parameters, log(weights), and optionally posterior-predictive samples for a sequence of observables.

        Obtains random samples of the log(posterior) using an adaptive Markov Chain Monte Carlo with PyPMC.
        A prerun with adaptations is carried out first and its samples are discarded.

        :param N: Number of samples that shall be returned
        :param stride: Stride, i.e., the number by which the actual amount of samples shall be thinned to return N samples.
        :param pre_N: Number of samples in each prerun.
        :param preruns: Number of preruns.
        :param cov_scale: Scale factor for the initial guess of the covariance matrix.
        :param observables: Observables for which posterior-predictive samples shall be obtained.
        :type observables: list-like, optional
        :param start_point: Optional starting point for the chain
        :type start_point: list-like, optional
        :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.markov_chain.MarkovChain)

        :return: A tuple of the parameters as array of size N, the logarithmic weights as array of size N, and optionally the posterior-predictive samples of the observables as array of size N x len(observables).

        .. note::
           This method requiries the PyPMC python module, which can be installed from PyPI.
        """
        import pypmc
        try:
            from tqdm.auto import tqdm
            progressbar = tqdm
        except ImportError:
            progressbar = lambda x, **kw: x

        ind_lower = np.array([-1.0 for bound in self.bounds])
        ind_upper = np.array([+1.0 for bound in self.bounds])
        ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper)

        log_target = pypmc.tools.indicator.merge_function_with_indicator(
            self.log_pdf, ind, -np.inf)

        # create initial covariance, assuming that each (rescaled) parameter is uniformly distributed on [-1, +1].
        sigma = np.diag([1.0 / 3.0 * cov_scale for bound in self.bounds
                         ])  # 1 / 3 is the covariance on the interval [-1, +1]
        log_proposal = pypmc.density.gauss.LocalGauss(sigma)

        # create start point, if not provided or rescale a provided start point to [-1, 1]
        if start_point is None:
            start_point = np.array(
                [rng.uniform(-1.0, 1.0) for bound in self.bounds])
        else:
            start_point = self._par_to_x(start_point)

        # create MC sampler
        sampler = pypmc.sampler.markov_chain.AdaptiveMarkovChain(
            log_target,
            log_proposal,
            start_point,
            save_target_values=True,
            rng=rng)

        # pre run to adapt markov chains
        for i in progressbar(range(0, preruns), desc="Pre-runs", leave=False):
            eos.info('Prerun {} out of {}'.format(i, preruns))
            accept_count = sampler.run(pre_N)
            accept_rate = accept_count / pre_N * 100
            eos.info('Prerun {}: acceptance rate is {:3.0f}%'.format(
                i, accept_rate))
            sampler.adapt()
        sampler.clear()

        # obtain final samples
        eos.info('Main run: started ...')
        sample_total = N * stride
        sample_chunk = sample_total // 100
        sample_chunks = [sample_chunk for i in range(0, 99)]
        sample_chunks.append(sample_total - 99 * sample_chunk)
        for current_chunk in progressbar(sample_chunks,
                                         desc="Main run",
                                         leave=False):
            accept_count = accept_count + sampler.run(current_chunk)
        accept_rate = accept_count / (N * stride) * 100
        eos.info('Main run: acceptance rate is {:3.0f}%'.format(accept_rate))

        # Rescale the parameters back to their original bounds
        parameter_samples = np.apply_along_axis(self._x_to_par, 1,
                                                sampler.samples[:][::stride])
        weights = sampler.target_values[:][::stride, 0]

        if not observables:
            return (parameter_samples, weights)
        else:
            observable_samples = []
            for parameters in parameter_samples:
                for p, v in zip(self.varied_parameters, parameters):
                    p.set(v)

                observable_samples.append([o.evaluate() for o in observables])

            return (parameter_samples, weights, np.array(observable_samples))
예제 #10
0
    def sample_pmc(self,
                   log_proposal,
                   step_N=1000,
                   steps=10,
                   final_N=5000,
                   rng=np.random.mtrand):
        """
        Return samples of the parameters and log(weights)

        Obtains random samples of the log(posterior) using adaptive importance sampling following
        the Popoulation Monte Carlo approach with PyPMC.

        :param step_N: Number of samples that shall be drawn in each adaptation step.
        :param steps: Number of adaptation steps.
        :param final_N: Number of samples that shall be drawn after all adaptation steps.
        :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportancSampler)

        :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the
            final proposal function as pypmc.density.mixture.MixtureDensity.

        .. note::
           This method requires the PyPMC python module, which can be installed from PyPI.
        """
        import pypmc
        try:
            from tqdm.auto import tqdm
            progressbar = tqdm
        except ImportError:
            progressbar = lambda x, **kw: x

        # create log_target
        ind_lower = np.array([-1.0 for bound in self.bounds])
        ind_upper = np.array([+1.0 for bound in self.bounds])
        ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper)

        log_target = pypmc.tools.indicator.merge_function_with_indicator(
            self.log_pdf, ind, -np.inf)

        # rescale log_proposal arguments to [-1, 1]
        for component in log_proposal.components:
            rescaled_mu = self._par_to_x(component.mu)
            rescaled_sigma = np.array([[
                4 * component.sigma[i, j] / (bj[1] - bj[0]) / (bi[1] - bi[0])
                for j, bj in enumerate(self.bounds)
            ] for i, bi in enumerate(self.bounds)])
            component.update(rescaled_mu, rescaled_sigma)

        # create PMC sampler
        sampler = pypmc.sampler.importance_sampling.ImportanceSampler(
            log_target, log_proposal, save_target_values=True, rng=rng)
        generating_components = []

        eps = np.finfo(float).eps

        # carry out adaptions
        for step in progressbar(range(steps), desc="Adaptions", leave=False):
            origins = sampler.run(step_N, trace_sort=True)
            generating_components.append(origins)
            samples = sampler.samples[:]
            last_perplexity = self._perplexity(
                np.copy(sampler.weights[-1][:, 0]))
            eos.info(
                'Perplexity of the last samples after sampling in step {}: {}'.
                format(step, last_perplexity))
            weights = sampler.weights[:][:, 0]
            adjusted_weights = np.copy(weights)
            # replace negative and nan weights by eps
            adjusted_weights = np.where(
                np.logical_or(adjusted_weights <= 0,
                              np.isnan(adjusted_weights)), eps,
                adjusted_weights)
            eos.info(
                'Perplexity of all previous samples after sampling in step {}: {}'
                .format(step, self._perplexity(adjusted_weights)))
            pypmc.mix_adapt.pmc.gaussian_pmc(samples,
                                             sampler.proposal,
                                             adjusted_weights,
                                             mincount=0,
                                             rb=True,
                                             copy=False)
            sampler.proposal.normalize()

        # draw final samples
        origins = sampler.run(final_N, trace_sort=True)
        generating_components.append(origins)
        samples = np.apply_along_axis(
            self._x_to_par, 1, sampler.samples[:]
        )  # Rescale the parameters back to their original bounds
        weights = sampler.weights[:][:, 0]
        perplexity = self._perplexity(np.copy(weights))
        eos.info('Perplexity after final samples: {}'.format(perplexity))

        return samples, weights, sampler.proposal