コード例 #1
0
    def __init__(self, file):
        # open the input file for reading
        self.file = h5py.File(file, 'r')

        # check that the input file has format=MCMC
        if 'format' not in self.file.attrs:
            eos.warn(
                'input file does not have attribute \'format\'; assuming format \'MCMC\''
            )
        elif 'MCMC' != self.file.attrs['format']:
            raise FileFormatError('MCMC', self.file.attrs['format'])

        # extract parameter descriptions of the n-tuples
        self.parameters = None
        if '/descriptions/main run/chain #0/parameters' in self.file:
            self.parameters = self.file[
                '/descriptions/main run/chain #0/parameters']
        elif '/descriptions/prerun/chain #0/parameters' in self.file:
            self.parameters = self.file[
                '/descriptions/prerun/chain #0/parameters']
        else:
            RuntimeError(
                'input file has no valid parameter descriptions: is it corrupted?'
            )

        super().__init__()
コード例 #2
0
    def optimize(self, start_point=None, **kwargs):
        """
        Optimize the log(posterior) and returns a best-fit-point summary.

        :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters. If not specified, optimization starts at the current parameter point.
        :param start_point: iterable, optional
        """
        if start_point == None:
            start_point = [float(p) for p in self.varied_parameters]

        default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}}
        if kwargs is None:
            kwargs = default_kwargs

        res = scipy.optimize.minimize(self.negative_log_pdf,
                                      start_point,
                                      args=None,
                                      bounds=self.bounds,
                                      **kwargs)

        if not res.success:
            eos.warn('Optimization did not succeed')
            eos.warn('  optimizer' ' message reas: {}'.format(res.message))
        else:
            eos.info(
                'Optimization goal achieved after {nfev} function evaluations'.
                format(nfev=res.nfev))

        for p, v in zip(self.varied_parameters, res.x):
            p.set(v)

        return eos.BestFitPoint(self, res.x)
コード例 #3
0
    def __init__(self, file):
        self.name = file
        # open the input file for reading
        self.file = h5py.File(file, 'r')

        # check that the input file has format=PMC
        if 'format' not in self.file.attrs:
            eos.warn(
                'input file does not have attribute \'format\'; assuming format \'UNC\''
            )
        elif 'UNC' != self.file.attrs['format']:
            raise FileFormatError('UNC', self.file.attrs['format'])

        # extract parameter descriptions of the n-tuples
        self.parameters = []
        if '/descriptions/parameters' in self.file:
            for i in range(len(self.file['/descriptions/observables'])):
                desc = self.file['/descriptions/observables/%d' % i]
                name = desc.attrs.get("name")
                kinematics = desc.attrs.get("kinematics")
                self.parameters.append(
                    [name, kinematics, sys.float_info.min, sys.float_info.max])
        else:
            RuntimeError(
                'input file has no valid parameter descriptions: is it corrupted?'
            )

        super().__init__()
コード例 #4
0
    def optimize(self, start_point=None, rng=np.random.mtrand, **kwargs):
        """
        Optimize the log(posterior) and returns a best-fit-point summary.

        :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters.
                            If set to "random", optimization starts at the random point in the space of the priors.
                            If not specified, optimization starts at the current parameter point.
        :type start_point: iterable, optional
        :param rng: Optional random number generator

        """
        if start_point == None:
            start_point = [float(p) for p in self.varied_parameters]
        elif start_point == "random":
            start_point = [
                p.inverse_cdf(rng.uniform())
                for p in self._log_posterior.log_priors()
            ]

        default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}}
        if kwargs is None:
            kwargs = default_kwargs

        res = scipy.optimize.minimize(self.negative_log_pdf,
                                      self._par_to_x(start_point),
                                      args=None,
                                      bounds=[(-1.0, 1.0)
                                              for b in self.bounds],
                                      **kwargs)

        if not res.success:
            eos.warn('Optimization did not succeed')
            eos.warn('  optimizer' ' message reas: {}'.format(res.message))
        else:
            eos.info(
                'Optimization goal achieved after {nfev} function evaluations'.
                format(nfev=res.nfev))

        bfp = self._x_to_par(res.x)

        for p, v in zip(self.varied_parameters, bfp):
            p.set(v)

        return eos.BestFitPoint(self, bfp)
コード例 #5
0
    def modes(self):
        groupname = 'main run'

        if 'main run' not in self.file:
            eos.warn('input file does not contain results from a main run')
            groupname = 'prerun'

        group = self.file[groupname]

        # start with no data
        result = []

        # append each dataset to data
        for chainname in group:
            chain = group[chainname]
            dset = chain['stats/mode']

            log_posterior = dset[-1][-1]
            mode = dset[-1][0:-1]

            result.append((mode, log_posterior))

        return result
コード例 #6
0
    def data(self):
        groupname = 'main run'

        if 'main run' not in self.file:
            eos.warn('input file does not contain results from a main run')
            groupname = 'prerun'

        group = self.file[groupname]

        # start with no data
        data = None

        # append each dataset to data
        for chainname in group:
            chain = group[chainname]
            dset = chain['samples']

            if data is None:
                data = numpy.array(dset[:])
            else:
                data = numpy.append(data, dset[:], axis=0)

        return data
コード例 #7
0
    def __init__(self,
                 priors,
                 likelihood,
                 global_options={},
                 manual_constraints={},
                 fixed_parameters={}):
        """Constructor."""
        self.init_args = {
            'priors': priors,
            'likelihood': likelihood,
            'global_options': global_options,
            'manual_constraints': manual_constraints,
            'fixed_parameters': fixed_parameters
        }
        self.parameters = eos.Parameters.Defaults()
        self.global_options = eos.Options()
        self.log_likelihood = eos.LogLikelihood(self.parameters)
        self.log_posterior = eos.LogPosterior(self.log_likelihood)
        self.varied_parameters = []
        self.bounds = []

        eos.info(
            'Creating analysis with {nprior} priors, {nconst} EOS-wide constraints, {nopts} global options, {nmanual} manually-entered constraints and {nparams} fixed parameters.'
            .format(nprior=len(priors),
                    nconst=len(likelihood),
                    nopts=len(global_options),
                    nmanual=len(manual_constraints),
                    nparams=len(fixed_parameters)))
        eos.debug('priors:')
        for p in priors:
            eos.debug(' - {name} ({type}) [{min}, {max}]'.format(
                name=p['parameter'],
                type=p['type'],
                min=p['min'],
                max=p['max']))
        eos.debug('constraints:')
        for cn in likelihood:
            eos.debug(' - {name}'.format(name=cn))
        eos.debug('manual_constraints:')
        for cn, ce in manual_constraints.items():
            eos.debug(' - {name}'.format(name=cn))
        eos.debug('fixed_parameters:')
        for pn, pe in fixed_parameters.items():
            eos.debug(' - {name}'.format(name=pn))

        # collect the global options
        for key, value in global_options.items():
            self.global_options.set(key, value)

        # Fix specified parameters
        for param, value in fixed_parameters.items():
            self.parameters.set(param, value)

        # create the priors
        for prior in priors:
            parameter = prior['parameter']
            minv = prior['min']
            maxv = prior['max']
            prior_type = prior['type'] if 'type' in prior else 'uniform'
            if 'uniform' == prior_type or 'flat' == prior_type:
                self.log_posterior.add(
                    eos.LogPrior.Flat(self.parameters, parameter,
                                      eos.ParameterRange(minv, maxv)), False)
            elif 'gauss' == prior_type or 'gaussian' == prior_type:
                central = prior['central']
                sigma = prior['sigma']
                if type(sigma) is list or type(sigma) is tuple:
                    sigma_lo = sigma[0]
                    sigma_hi = sigma[1]
                else:
                    sigma_lo = sigma
                    sigma_hi = sigma
                self.log_posterior.add(
                    eos.LogPrior.Gauss(self.parameters, parameter,
                                       eos.ParameterRange(minv, maxv),
                                       central - sigma_lo, central,
                                       central + sigma_hi), False)
            else:
                raise ValueError(
                    'Unknown prior type \'{}\''.format(prior_type))

            self.bounds.append((minv, maxv))
            p = self.parameters[parameter]
            p.set_min(minv)
            p.set_max(maxv)
            self.varied_parameters.append(p)

        # create the likelihood
        for constraint_name in likelihood:
            constraint = eos.Constraint.make(constraint_name,
                                             self.global_options)
            self.log_likelihood.add(constraint)

        # add manual constraints to the likelihood
        for constraint_name, constraint_data in manual_constraints.items():
            import yaml
            yaml_string = yaml.dump(constraint_data)
            constraint_entry = eos.ConstraintEntry.deserialize(
                constraint_name, yaml_string)
            constraint = constraint_entry.make(constraint_name,
                                               self.global_options)
            self.log_likelihood.add(constraint)

        # perform some sanity checks
        varied_parameter_names = set(
            [p.name() for p in self.varied_parameters])
        used_parameter_names = set()
        for observable in self.log_likelihood.observable_cache():
            for i in observable.used_parameter_ids():
                used_parameter_names.add(self.parameters.by_id(i).name())

        used_but_unvaried = used_parameter_names - varied_parameter_names
        if (len(used_but_unvaried) > 0):
            eos.info(
                'likelihood probably depends on {} parameter(s) that do not appear in the prior; check prior?'
                .format(len(used_but_unvaried)))
        for n in used_but_unvaried:
            eos.debug('used, but not included in any prior: \'{}\''.format(n))
        for n in varied_parameter_names - used_parameter_names:
            eos.warn(
                'likelihood does not depend on parameter \'{}\'; remove from prior or check options!'
                .format(n))
コード例 #8
0
    def sample_pmc(self,
                   log_proposal,
                   step_N=1000,
                   steps=10,
                   final_N=5000,
                   rng=np.random.mtrand,
                   return_final_only=True,
                   final_perplexity_threshold=1.0,
                   weight_threshold=1e-10):
        """
        Return samples of the parameters and log(weights), and a mixture density adapted to the posterior.

        Obtains random samples of the log(posterior) using adaptive importance sampling following
        the Population Monte Carlo approach with PyPMC.

        :param log_proposal: Initial gaussian mixture density that shall be adapted to the posterior density.
        :type log_proposal: pypmc.density.mixture.MixtureDensity
        :param step_N: Number of samples that shall be drawn in each adaptation step.
        :type step_N: int
        :param steps: Number of adaptation steps.
        :type steps: int
        :param final_N: Number of samples that shall be drawn after all adaptation steps.
        :type final_N: int
        :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportanceSampler)
        :param return_final_only: If set to True, only returns the samples and weights of the final sampling step, after all adaptations have finished.
        :param final_perplexity_threshold: Adaptations are stopped if the perplexity of the last adaptation step is above this threshold value.
        :param weight_threshold: Mixture components with a weight smaller than this threshold are pruned.

        :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the
            final proposal function as pypmc.density.mixture.MixtureDensity.

        This method should be called after obtaining approximate samples of the
        log(posterior) by other means, e.g., by using :meth:`eos.Analysis.sample`.
        A possible (incomplete) example could look as follows:

        .. code-block:: python3

           from pypmc.mix_adapt.r_value import make_r_gaussmix
           chains = []
           for i in range(10):
               # run Markov Chains for your problem
               chain, _ = analysis.sample(...)
               chains.append(chain)

           # please consult the pypmc documentation for details on the call below
           proposal_density = make_r_gaussmix(chains, K_g=3, critical_r=1.1)

           # adapt the proposal to the posterior and obtain high-quality samples
           analysis.sample_pmc(proposal_density, ...)


        .. note::
           This method requires the PyPMC python module, which can be installed from PyPI.
        """
        import pypmc
        try:
            from tqdm.auto import tqdm
            progressbar = tqdm
        except ImportError:
            progressbar = lambda x, **kw: x

        # create log_target
        ind_lower = np.array([-1.0 for bound in self.bounds])
        ind_upper = np.array([+1.0 for bound in self.bounds])
        ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper)

        log_target = pypmc.tools.indicator.merge_function_with_indicator(
            self.log_pdf, ind, -np.inf)

        # rescale log_proposal arguments to [-1, 1]
        for component in log_proposal.components:
            rescaled_mu = self._par_to_x(component.mu)
            rescaled_sigma = np.array([[
                4 * component.sigma[i, j] / (bj[1] - bj[0]) / (bi[1] - bi[0])
                for j, bj in enumerate(self.bounds)
            ] for i, bi in enumerate(self.bounds)])
            component.update(rescaled_mu, rescaled_sigma)

        # create PMC sampler
        sampler = pypmc.sampler.importance_sampling.ImportanceSampler(
            log_target, log_proposal, save_target_values=True, rng=rng)
        generating_components = []

        eps = np.finfo(float).eps

        # carry out adaptions
        for step in progressbar(range(steps), desc="Adaptions", leave=False):
            origins = sampler.run(step_N, trace_sort=True)
            generating_components.append(origins)
            samples = sampler.samples[:]
            last_perplexity = self._perplexity(
                np.copy(sampler.weights[-1][:, 0]))
            eos.info(
                'Perplexity of the last samples after sampling in step {}: {}'.
                format(step, last_perplexity))
            if last_perplexity < 0.05:
                eos.warn(
                    "Last step's perplexity is very low. This could possibly be improved by running the markov chains that are used to form the initial PDF for a bit longer"
                )
            weights = sampler.weights[:][:, 0]
            adjusted_weights = np.copy(weights)
            # replace negative and nan weights by eps
            adjusted_weights = np.where(
                np.logical_or(adjusted_weights <= 0,
                              np.isnan(adjusted_weights)), eps,
                adjusted_weights)
            eos.info(
                'Perplexity of all previous samples after sampling in step {}: {}'
                .format(step, self._perplexity(adjusted_weights)))
            pypmc.mix_adapt.pmc.gaussian_pmc(samples,
                                             sampler.proposal,
                                             adjusted_weights,
                                             mincount=0,
                                             rb=True,
                                             copy=False)
            # Normalize the weights and remove components with a weight smaller than weight_threshold
            sampler.proposal.normalize()
            sampler.proposal.prune(threshold=weight_threshold)
            # stop adaptation if the perplexity of the last step is larger than the threshold
            if last_perplexity > final_perplexity_threshold:
                eos.info(
                    'Perplexity threshold reached after {} step(s)'.format(
                        step))
                break

        # draw final samples
        origins = sampler.run(final_N, trace_sort=True)
        generating_components.append(origins)

        # rescale proposal components back to their physical bounds
        for component in sampler.proposal.components:
            rescaled_mu = self._x_to_par(component.mu)
            rescaled_sigma = np.array([[
                component.sigma[i, j] * (bj[1] - bj[0]) * (bi[1] - bi[0]) / 4
                for j, bj in enumerate(self.bounds)
            ] for i, bi in enumerate(self.bounds)])
            component.update(rescaled_mu, rescaled_sigma)

        # rescale the samples back to their physical bounds
        if return_final_only:
            # only returns the final_N final samples
            samples = np.apply_along_axis(self._x_to_par, 1,
                                          sampler.samples[:][-final_N:])
            weights = sampler.weights[:][-final_N:, 0]
        else:
            # returns all samples
            samples = np.apply_along_axis(self._x_to_par, 1,
                                          sampler.samples[:])
            weights = sampler.weights[:][:, 0]
        perplexity = self._perplexity(np.copy(weights))
        eos.info('Perplexity after final samples: {}'.format(perplexity))

        return samples, weights, sampler.proposal