def __init__(self, file): # open the input file for reading self.file = h5py.File(file, 'r') # check that the input file has format=MCMC if 'format' not in self.file.attrs: eos.warn( 'input file does not have attribute \'format\'; assuming format \'MCMC\'' ) elif 'MCMC' != self.file.attrs['format']: raise FileFormatError('MCMC', self.file.attrs['format']) # extract parameter descriptions of the n-tuples self.parameters = None if '/descriptions/main run/chain #0/parameters' in self.file: self.parameters = self.file[ '/descriptions/main run/chain #0/parameters'] elif '/descriptions/prerun/chain #0/parameters' in self.file: self.parameters = self.file[ '/descriptions/prerun/chain #0/parameters'] else: RuntimeError( 'input file has no valid parameter descriptions: is it corrupted?' ) super().__init__()
def optimize(self, start_point=None, **kwargs): """ Optimize the log(posterior) and returns a best-fit-point summary. :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters. If not specified, optimization starts at the current parameter point. :param start_point: iterable, optional """ if start_point == None: start_point = [float(p) for p in self.varied_parameters] default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}} if kwargs is None: kwargs = default_kwargs res = scipy.optimize.minimize(self.negative_log_pdf, start_point, args=None, bounds=self.bounds, **kwargs) if not res.success: eos.warn('Optimization did not succeed') eos.warn(' optimizer' ' message reas: {}'.format(res.message)) else: eos.info( 'Optimization goal achieved after {nfev} function evaluations'. format(nfev=res.nfev)) for p, v in zip(self.varied_parameters, res.x): p.set(v) return eos.BestFitPoint(self, res.x)
def __init__(self, file): self.name = file # open the input file for reading self.file = h5py.File(file, 'r') # check that the input file has format=PMC if 'format' not in self.file.attrs: eos.warn( 'input file does not have attribute \'format\'; assuming format \'UNC\'' ) elif 'UNC' != self.file.attrs['format']: raise FileFormatError('UNC', self.file.attrs['format']) # extract parameter descriptions of the n-tuples self.parameters = [] if '/descriptions/parameters' in self.file: for i in range(len(self.file['/descriptions/observables'])): desc = self.file['/descriptions/observables/%d' % i] name = desc.attrs.get("name") kinematics = desc.attrs.get("kinematics") self.parameters.append( [name, kinematics, sys.float_info.min, sys.float_info.max]) else: RuntimeError( 'input file has no valid parameter descriptions: is it corrupted?' ) super().__init__()
def optimize(self, start_point=None, rng=np.random.mtrand, **kwargs): """ Optimize the log(posterior) and returns a best-fit-point summary. :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters. If set to "random", optimization starts at the random point in the space of the priors. If not specified, optimization starts at the current parameter point. :type start_point: iterable, optional :param rng: Optional random number generator """ if start_point == None: start_point = [float(p) for p in self.varied_parameters] elif start_point == "random": start_point = [ p.inverse_cdf(rng.uniform()) for p in self._log_posterior.log_priors() ] default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}} if kwargs is None: kwargs = default_kwargs res = scipy.optimize.minimize(self.negative_log_pdf, self._par_to_x(start_point), args=None, bounds=[(-1.0, 1.0) for b in self.bounds], **kwargs) if not res.success: eos.warn('Optimization did not succeed') eos.warn(' optimizer' ' message reas: {}'.format(res.message)) else: eos.info( 'Optimization goal achieved after {nfev} function evaluations'. format(nfev=res.nfev)) bfp = self._x_to_par(res.x) for p, v in zip(self.varied_parameters, bfp): p.set(v) return eos.BestFitPoint(self, bfp)
def modes(self): groupname = 'main run' if 'main run' not in self.file: eos.warn('input file does not contain results from a main run') groupname = 'prerun' group = self.file[groupname] # start with no data result = [] # append each dataset to data for chainname in group: chain = group[chainname] dset = chain['stats/mode'] log_posterior = dset[-1][-1] mode = dset[-1][0:-1] result.append((mode, log_posterior)) return result
def data(self): groupname = 'main run' if 'main run' not in self.file: eos.warn('input file does not contain results from a main run') groupname = 'prerun' group = self.file[groupname] # start with no data data = None # append each dataset to data for chainname in group: chain = group[chainname] dset = chain['samples'] if data is None: data = numpy.array(dset[:]) else: data = numpy.append(data, dset[:], axis=0) return data
def __init__(self, priors, likelihood, global_options={}, manual_constraints={}, fixed_parameters={}): """Constructor.""" self.init_args = { 'priors': priors, 'likelihood': likelihood, 'global_options': global_options, 'manual_constraints': manual_constraints, 'fixed_parameters': fixed_parameters } self.parameters = eos.Parameters.Defaults() self.global_options = eos.Options() self.log_likelihood = eos.LogLikelihood(self.parameters) self.log_posterior = eos.LogPosterior(self.log_likelihood) self.varied_parameters = [] self.bounds = [] eos.info( 'Creating analysis with {nprior} priors, {nconst} EOS-wide constraints, {nopts} global options, {nmanual} manually-entered constraints and {nparams} fixed parameters.' .format(nprior=len(priors), nconst=len(likelihood), nopts=len(global_options), nmanual=len(manual_constraints), nparams=len(fixed_parameters))) eos.debug('priors:') for p in priors: eos.debug(' - {name} ({type}) [{min}, {max}]'.format( name=p['parameter'], type=p['type'], min=p['min'], max=p['max'])) eos.debug('constraints:') for cn in likelihood: eos.debug(' - {name}'.format(name=cn)) eos.debug('manual_constraints:') for cn, ce in manual_constraints.items(): eos.debug(' - {name}'.format(name=cn)) eos.debug('fixed_parameters:') for pn, pe in fixed_parameters.items(): eos.debug(' - {name}'.format(name=pn)) # collect the global options for key, value in global_options.items(): self.global_options.set(key, value) # Fix specified parameters for param, value in fixed_parameters.items(): self.parameters.set(param, value) # create the priors for prior in priors: parameter = prior['parameter'] minv = prior['min'] maxv = prior['max'] prior_type = prior['type'] if 'type' in prior else 'uniform' if 'uniform' == prior_type or 'flat' == prior_type: self.log_posterior.add( eos.LogPrior.Flat(self.parameters, parameter, eos.ParameterRange(minv, maxv)), False) elif 'gauss' == prior_type or 'gaussian' == prior_type: central = prior['central'] sigma = prior['sigma'] if type(sigma) is list or type(sigma) is tuple: sigma_lo = sigma[0] sigma_hi = sigma[1] else: sigma_lo = sigma sigma_hi = sigma self.log_posterior.add( eos.LogPrior.Gauss(self.parameters, parameter, eos.ParameterRange(minv, maxv), central - sigma_lo, central, central + sigma_hi), False) else: raise ValueError( 'Unknown prior type \'{}\''.format(prior_type)) self.bounds.append((minv, maxv)) p = self.parameters[parameter] p.set_min(minv) p.set_max(maxv) self.varied_parameters.append(p) # create the likelihood for constraint_name in likelihood: constraint = eos.Constraint.make(constraint_name, self.global_options) self.log_likelihood.add(constraint) # add manual constraints to the likelihood for constraint_name, constraint_data in manual_constraints.items(): import yaml yaml_string = yaml.dump(constraint_data) constraint_entry = eos.ConstraintEntry.deserialize( constraint_name, yaml_string) constraint = constraint_entry.make(constraint_name, self.global_options) self.log_likelihood.add(constraint) # perform some sanity checks varied_parameter_names = set( [p.name() for p in self.varied_parameters]) used_parameter_names = set() for observable in self.log_likelihood.observable_cache(): for i in observable.used_parameter_ids(): used_parameter_names.add(self.parameters.by_id(i).name()) used_but_unvaried = used_parameter_names - varied_parameter_names if (len(used_but_unvaried) > 0): eos.info( 'likelihood probably depends on {} parameter(s) that do not appear in the prior; check prior?' .format(len(used_but_unvaried))) for n in used_but_unvaried: eos.debug('used, but not included in any prior: \'{}\''.format(n)) for n in varied_parameter_names - used_parameter_names: eos.warn( 'likelihood does not depend on parameter \'{}\'; remove from prior or check options!' .format(n))
def sample_pmc(self, log_proposal, step_N=1000, steps=10, final_N=5000, rng=np.random.mtrand, return_final_only=True, final_perplexity_threshold=1.0, weight_threshold=1e-10): """ Return samples of the parameters and log(weights), and a mixture density adapted to the posterior. Obtains random samples of the log(posterior) using adaptive importance sampling following the Population Monte Carlo approach with PyPMC. :param log_proposal: Initial gaussian mixture density that shall be adapted to the posterior density. :type log_proposal: pypmc.density.mixture.MixtureDensity :param step_N: Number of samples that shall be drawn in each adaptation step. :type step_N: int :param steps: Number of adaptation steps. :type steps: int :param final_N: Number of samples that shall be drawn after all adaptation steps. :type final_N: int :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportanceSampler) :param return_final_only: If set to True, only returns the samples and weights of the final sampling step, after all adaptations have finished. :param final_perplexity_threshold: Adaptations are stopped if the perplexity of the last adaptation step is above this threshold value. :param weight_threshold: Mixture components with a weight smaller than this threshold are pruned. :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the final proposal function as pypmc.density.mixture.MixtureDensity. This method should be called after obtaining approximate samples of the log(posterior) by other means, e.g., by using :meth:`eos.Analysis.sample`. A possible (incomplete) example could look as follows: .. code-block:: python3 from pypmc.mix_adapt.r_value import make_r_gaussmix chains = [] for i in range(10): # run Markov Chains for your problem chain, _ = analysis.sample(...) chains.append(chain) # please consult the pypmc documentation for details on the call below proposal_density = make_r_gaussmix(chains, K_g=3, critical_r=1.1) # adapt the proposal to the posterior and obtain high-quality samples analysis.sample_pmc(proposal_density, ...) .. note:: This method requires the PyPMC python module, which can be installed from PyPI. """ import pypmc try: from tqdm.auto import tqdm progressbar = tqdm except ImportError: progressbar = lambda x, **kw: x # create log_target ind_lower = np.array([-1.0 for bound in self.bounds]) ind_upper = np.array([+1.0 for bound in self.bounds]) ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper) log_target = pypmc.tools.indicator.merge_function_with_indicator( self.log_pdf, ind, -np.inf) # rescale log_proposal arguments to [-1, 1] for component in log_proposal.components: rescaled_mu = self._par_to_x(component.mu) rescaled_sigma = np.array([[ 4 * component.sigma[i, j] / (bj[1] - bj[0]) / (bi[1] - bi[0]) for j, bj in enumerate(self.bounds) ] for i, bi in enumerate(self.bounds)]) component.update(rescaled_mu, rescaled_sigma) # create PMC sampler sampler = pypmc.sampler.importance_sampling.ImportanceSampler( log_target, log_proposal, save_target_values=True, rng=rng) generating_components = [] eps = np.finfo(float).eps # carry out adaptions for step in progressbar(range(steps), desc="Adaptions", leave=False): origins = sampler.run(step_N, trace_sort=True) generating_components.append(origins) samples = sampler.samples[:] last_perplexity = self._perplexity( np.copy(sampler.weights[-1][:, 0])) eos.info( 'Perplexity of the last samples after sampling in step {}: {}'. format(step, last_perplexity)) if last_perplexity < 0.05: eos.warn( "Last step's perplexity is very low. This could possibly be improved by running the markov chains that are used to form the initial PDF for a bit longer" ) weights = sampler.weights[:][:, 0] adjusted_weights = np.copy(weights) # replace negative and nan weights by eps adjusted_weights = np.where( np.logical_or(adjusted_weights <= 0, np.isnan(adjusted_weights)), eps, adjusted_weights) eos.info( 'Perplexity of all previous samples after sampling in step {}: {}' .format(step, self._perplexity(adjusted_weights))) pypmc.mix_adapt.pmc.gaussian_pmc(samples, sampler.proposal, adjusted_weights, mincount=0, rb=True, copy=False) # Normalize the weights and remove components with a weight smaller than weight_threshold sampler.proposal.normalize() sampler.proposal.prune(threshold=weight_threshold) # stop adaptation if the perplexity of the last step is larger than the threshold if last_perplexity > final_perplexity_threshold: eos.info( 'Perplexity threshold reached after {} step(s)'.format( step)) break # draw final samples origins = sampler.run(final_N, trace_sort=True) generating_components.append(origins) # rescale proposal components back to their physical bounds for component in sampler.proposal.components: rescaled_mu = self._x_to_par(component.mu) rescaled_sigma = np.array([[ component.sigma[i, j] * (bj[1] - bj[0]) * (bi[1] - bi[0]) / 4 for j, bj in enumerate(self.bounds) ] for i, bi in enumerate(self.bounds)]) component.update(rescaled_mu, rescaled_sigma) # rescale the samples back to their physical bounds if return_final_only: # only returns the final_N final samples samples = np.apply_along_axis(self._x_to_par, 1, sampler.samples[:][-final_N:]) weights = sampler.weights[:][-final_N:, 0] else: # returns all samples samples = np.apply_along_axis(self._x_to_par, 1, sampler.samples[:]) weights = sampler.weights[:][:, 0] perplexity = self._perplexity(np.copy(weights)) eos.info('Perplexity after final samples: {}'.format(perplexity)) return samples, weights, sampler.proposal