def find_clusters(posterior, base_directory='./', threshold=2.0, K_g=1, analysis_file=None): """ Finds clusters among posterior MCMC samples, grouped by Gelman-Rubin R value, and creates a Gaussian mixture density. Finding clusters and creating a Gaussian mixture density is a necessary intermediate step before using the sample-pmc subcommand. The input files are expected in EOS_BASE_DIRECTORY/POSTERIOR/mcmc-*. All MCMC input files present will be used in the clustering. The output files will be stored in EOS_BASE_DIRECTORY/POSTERIOR/clusters. :param posterior: The name of the posterior. :type posterior: str :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable. :type base_directory: str, optional :param threshold: The R value threshold. If two sample subsets have an R value larger than this threshold, they will be treated as two distinct clusters. Defaults to 2.0. :type threshold: float > 1.0, optional :param K_g: The number of mixture components per cluster. Default to 1. :type K_g: int >= 1, optional """ import pathlib output_path = os.path.join(base_directory, posterior, 'clusters') _set_log_file(output_path, 'log') input_paths = [str(p) for p in pathlib.Path(os.path.join(base_directory, posterior)).glob('mcmc-*')] chains = [eos.data.MarkovChain(path).samples for path in input_paths] n = len(chains[0]) for chain in chains: assert len(chain) == n, 'Every chains must contain the same number of samples' groups = pypmc.mix_adapt.r_value.r_group([_np.mean(chain.T, axis=1) for chain in chains], [_np.var (chain.T, axis=1, ddof=1) for chain in chains], n, threshold) eos.info('Found {} groups using an R value threshold of {}'.format(len(groups), threshold)) density = pypmc.mix_adapt.r_value.make_r_gaussmix(chains, K_g=K_g, critical_r=threshold) eos.info(f'Created mixture density with {len(density.components)} components') eos.data.MixtureDensity.create(output_path, density)
def optimize(self, start_point=None, **kwargs): """ Optimize the log(posterior) and returns a best-fit-point summary. :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters. If not specified, optimization starts at the current parameter point. :param start_point: iterable, optional """ if start_point == None: start_point = [float(p) for p in self.varied_parameters] default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}} if kwargs is None: kwargs = default_kwargs res = scipy.optimize.minimize(self.negative_log_pdf, start_point, args=None, bounds=self.bounds, **kwargs) if not res.success: eos.warn('Optimization did not succeed') eos.warn(' optimizer' ' message reas: {}'.format(res.message)) else: eos.info( 'Optimization goal achieved after {nfev} function evaluations'. format(nfev=res.nfev)) for p, v in zip(self.varied_parameters, res.x): p.set(v) return eos.BestFitPoint(self, res.x)
def optimize(self, start_point=None, rng=np.random.mtrand, **kwargs): """ Optimize the log(posterior) and returns a best-fit-point summary. :param start_point: Parameter point from which to start the optimization, with the elements in the same order as in eos.Analysis.varied_parameters. If set to "random", optimization starts at the random point in the space of the priors. If not specified, optimization starts at the current parameter point. :type start_point: iterable, optional :param rng: Optional random number generator """ if start_point == None: start_point = [float(p) for p in self.varied_parameters] elif start_point == "random": start_point = [ p.inverse_cdf(rng.uniform()) for p in self._log_posterior.log_priors() ] default_kwargs = {'method': 'SLSQP', 'options': {'ftol': 1.0e-13}} if kwargs is None: kwargs = default_kwargs res = scipy.optimize.minimize(self.negative_log_pdf, self._par_to_x(start_point), args=None, bounds=[(-1.0, 1.0) for b in self.bounds], **kwargs) if not res.success: eos.warn('Optimization did not succeed') eos.warn(' optimizer' ' message reas: {}'.format(res.message)) else: eos.info( 'Optimization goal achieved after {nfev} function evaluations'. format(nfev=res.nfev)) bfp = self._x_to_par(res.x) for p, v in zip(self.varied_parameters, bfp): p.set(v) return eos.BestFitPoint(self, bfp)
def run(self): """Runs predefined analysis steps recorded in the analysis file.""" import inspect command_map = { 'find-clusters': eos.find_clusters, 'predict-observables': eos.predict_observables, 'sample-mcmc': eos.sample_mcmc, 'sample-pmc': eos.sample_pmc, } for idx, step in enumerate(self._steps): if type(step) is not dict: raise ValueError("Step #{} is not a key/value map.") if 'command' not in step: raise ValueError("Step #{} contains no command.") command = step['command'] func = command_map[command] params = step['parameters'] if 'parameters' in step else {} params = {params_map[(command, k)]: v for k, v in params.items()} paramstr = ','.join( ['{k}={v}'.format(k=k, v=v) for k, v in params]) func_sig = inspect.signature(func) func_required_args = {} for n, p in func_sig.parameters.items(): if p.default() != p.empty(): continue func_required_args += {n} for n in func_required_args: if n in params.keys(): continue eos.error('Mandatory argument \'{}\' not provided'.format(n)) return eos.info('Beginning step #{i}: {cmd}({params})'.format( i=i, cmd=cmd, params=paramstr)) func(**params) eos.info('Step #{i} complete'.format(i=i))
def sample_mcmc(self, N, stride, pre_N, preruns, cov_scale=0.1, start_point=None, rng=np.random.mtrand): """ Return samples of the kinematic variables and the log(PDF). Obtains random samples of the log(PDF) using an adaptive Markov Chain Monte Carlo with PyPMC. A prerun with adaptations is carried out first and its samples are discarded. :param N: Number of samples that shall be returned :param stride: Stride, i.e., the number by which the actual amount of samples shall be thinned to return N samples. :param pre_N: Number of samples in each prerun. :param preruns: Number of preruns. :param cov_scale: Scale factor for the initial guess of the covariance matrix. :param start_point: Optional starting point for the chain :type start_point: list-like, optional :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.markov_chain.MarkovChain) :return: A tuple of the kinematic variables as array of size N and the log(PDF) as array of size N. .. note:: This method requires the PyPMC python module, which can be installed from PyPI. """ import pypmc try: from tqdm.auto import tqdm progressbar = tqdm except ImportError: progressbar = lambda x, **kw: x ind_lower = np.array([bound[0].evaluate() for bound in self.bounds]) ind_upper = np.array([bound[1].evaluate() for bound in self.bounds]) ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper) log_target = pypmc.tools.indicator.merge_function_with_indicator(self.log_pdf, ind, -np.inf) # create initial covariance sigma = np.diag([np.square(bound[1].evaluate() - bound[0].evaluate()) / 12 * cov_scale for bound in self.bounds]) log_proposal = pypmc.density.gauss.LocalGauss(sigma) # create start point, if not provided if start_point is None: u = np.array([rng.uniform(0.0, 1.0) for j in range(0, len(ind_lower))]) ubar = 1.0 - u start_point = ubar * ind_upper + u * ind_lower # create MC sampler sampler = pypmc.sampler.markov_chain.AdaptiveMarkovChain(log_target, log_proposal, start_point, save_target_values=True, rng=rng) # pre run to adapt markov chains for i in progressbar(range(0, preruns), desc="Pre-runs", leave=False): eos.info('Prerun {} out of {}'.format(i, preruns)) accept_count = sampler.run(pre_N) accept_rate = accept_count / pre_N * 100 eos.info('Prerun {}: acceptance rate is {:3.0f}%'.format(i, accept_rate)) sampler.adapt() sampler.clear() # obtain final samples eos.info('Main run: started ...') sample_total = N * stride sample_chunk = sample_total // 100 sample_chunks = [sample_chunk for i in range(0, 99)] sample_chunks.append(sample_total - 99 * sample_chunk) for current_chunk in progressbar(sample_chunks, desc="Main run", leave=False): accept_count = accept_count + sampler.run(current_chunk) accept_rate = accept_count / (N * stride) * 100 eos.info('Main run: acceptance rate is {:3.0f}%'.format(accept_rate)) parameter_samples = sampler.samples[:][::stride] weights = sampler.target_values[:][::stride, 0] return(parameter_samples, weights)
def __init__(self, priors, likelihood, global_options={}, manual_constraints={}, fixed_parameters={}): """Constructor.""" self.init_args = { 'priors': priors, 'likelihood': likelihood, 'global_options': global_options, 'manual_constraints': manual_constraints, 'fixed_parameters': fixed_parameters } self.parameters = eos.Parameters.Defaults() self.global_options = eos.Options() self.log_likelihood = eos.LogLikelihood(self.parameters) self.log_posterior = eos.LogPosterior(self.log_likelihood) self.varied_parameters = [] self.bounds = [] eos.info( 'Creating analysis with {nprior} priors, {nconst} EOS-wide constraints, {nopts} global options, {nmanual} manually-entered constraints and {nparams} fixed parameters.' .format(nprior=len(priors), nconst=len(likelihood), nopts=len(global_options), nmanual=len(manual_constraints), nparams=len(fixed_parameters))) eos.debug('priors:') for p in priors: eos.debug(' - {name} ({type}) [{min}, {max}]'.format( name=p['parameter'], type=p['type'], min=p['min'], max=p['max'])) eos.debug('constraints:') for cn in likelihood: eos.debug(' - {name}'.format(name=cn)) eos.debug('manual_constraints:') for cn, ce in manual_constraints.items(): eos.debug(' - {name}'.format(name=cn)) eos.debug('fixed_parameters:') for pn, pe in fixed_parameters.items(): eos.debug(' - {name}'.format(name=pn)) # collect the global options for key, value in global_options.items(): self.global_options.set(key, value) # Fix specified parameters for param, value in fixed_parameters.items(): self.parameters.set(param, value) # create the priors for prior in priors: parameter = prior['parameter'] minv = prior['min'] maxv = prior['max'] prior_type = prior['type'] if 'type' in prior else 'uniform' if 'uniform' == prior_type or 'flat' == prior_type: self.log_posterior.add( eos.LogPrior.Flat(self.parameters, parameter, eos.ParameterRange(minv, maxv)), False) elif 'gauss' == prior_type or 'gaussian' == prior_type: central = prior['central'] sigma = prior['sigma'] if type(sigma) is list or type(sigma) is tuple: sigma_lo = sigma[0] sigma_hi = sigma[1] else: sigma_lo = sigma sigma_hi = sigma self.log_posterior.add( eos.LogPrior.Gauss(self.parameters, parameter, eos.ParameterRange(minv, maxv), central - sigma_lo, central, central + sigma_hi), False) else: raise ValueError( 'Unknown prior type \'{}\''.format(prior_type)) self.bounds.append((minv, maxv)) p = self.parameters[parameter] p.set_min(minv) p.set_max(maxv) self.varied_parameters.append(p) # create the likelihood for constraint_name in likelihood: constraint = eos.Constraint.make(constraint_name, self.global_options) self.log_likelihood.add(constraint) # add manual constraints to the likelihood for constraint_name, constraint_data in manual_constraints.items(): import yaml yaml_string = yaml.dump(constraint_data) constraint_entry = eos.ConstraintEntry.deserialize( constraint_name, yaml_string) constraint = constraint_entry.make(constraint_name, self.global_options) self.log_likelihood.add(constraint) # perform some sanity checks varied_parameter_names = set( [p.name() for p in self.varied_parameters]) used_parameter_names = set() for observable in self.log_likelihood.observable_cache(): for i in observable.used_parameter_ids(): used_parameter_names.add(self.parameters.by_id(i).name()) used_but_unvaried = used_parameter_names - varied_parameter_names if (len(used_but_unvaried) > 0): eos.info( 'likelihood probably depends on {} parameter(s) that do not appear in the prior; check prior?' .format(len(used_but_unvaried))) for n in used_but_unvaried: eos.debug('used, but not included in any prior: \'{}\''.format(n)) for n in varied_parameter_names - used_parameter_names: eos.warn( 'likelihood does not depend on parameter \'{}\'; remove from prior or check options!' .format(n))
def sample_pmc(self, log_proposal, step_N=1000, steps=10, final_N=5000, rng=np.random.mtrand): """ Return samples of the parameters and log(weights) Obtains random samples of the log(posterior) using adaptive importance sampling following the Popoulation Monte Carlo approach with PyPMC. :param step_N: Number of samples that shall be drawn in each adaptation step. :param steps: Number of adaptation steps. :param final_N: Number of samples that shall be drawn after all adaptation steps. :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportancSampler) :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the final proposal function as pypmc.density.mixture.MixtureDensity. .. note:: This method requires the PyPMC python module, which can be installed from PyPI. """ import pypmc try: from tqdm.auto import tqdm progressbar = tqdm except ImportError: progressbar = lambda x, **kw: x ind_lower = np.array([bound[0] for bound in self.bounds]) ind_upper = np.array([bound[1] for bound in self.bounds]) ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper) log_target = pypmc.tools.indicator.merge_function_with_indicator( self.log_pdf, ind, -np.inf) # create PMC sampler sampler = pypmc.sampler.importance_sampling.ImportanceSampler( log_target, log_proposal, save_target_values=True, rng=rng) generating_components = [] eps = np.finfo(float).eps # carry out adaptions for step in progressbar(range(steps), desc="Adaptions", leave=False): origins = sampler.run(step_N, trace_sort=True) generating_components.append(origins) samples = sampler.samples[:] last_weights = np.copy(sampler.weights[-1][:, 0]) for i, w in enumerate(last_weights): if w <= 0 or np.isnan(w): last_weights[i] = eps normalized_last_weights = last_weights / np.sum(last_weights) last_entropy = -1.0 * np.dot(np.log(normalized_last_weights), normalized_last_weights) last_perplexity = np.exp(last_entropy) / len( normalized_last_weights) eos.info( 'Perplexity of the last samples after sampling in step {}: {}'. format(step, last_perplexity)) weights = sampler.weights[:][:, 0] adjusted_weights = np.copy(weights) for i, w in enumerate(adjusted_weights): if w <= 0 or np.isnan(w): adjusted_weights[i] = eps normalized_weights = adjusted_weights / np.sum(adjusted_weights) entropy = -1.0 * np.dot(np.log(normalized_weights), normalized_weights) perplexity = np.exp(entropy) / len(normalized_weights) eos.info( 'Perplexity of all previous samples after sampling in step {}: {}' .format(step, perplexity)) pypmc.mix_adapt.pmc.gaussian_pmc(samples, sampler.proposal, adjusted_weights, mincount=0, rb=True, copy=False) sampler.proposal.normalize() # draw final samples origins = sampler.run(final_N, trace_sort=True) generating_components.append(origins) samples = sampler.samples[:] weights = sampler.weights[:][:, 0] adjusted_weights = np.copy(weights) for i, w in enumerate(adjusted_weights): if w <= 0 or np.isnan(w): adjusted_weights[i] = eps normalized_weights = adjusted_weights / np.sum(adjusted_weights) entropy = -1.0 * np.dot(np.log(normalized_weights), normalized_weights) perplexity = np.exp(entropy) / len(normalized_weights) eos.info('Perplexity after final samples: {}'.format(perplexity)) return samples, weights, sampler.proposal
def sample_pmc(self, log_proposal, step_N=1000, steps=10, final_N=5000, rng=np.random.mtrand, return_final_only=True, final_perplexity_threshold=1.0, weight_threshold=1e-10): """ Return samples of the parameters and log(weights), and a mixture density adapted to the posterior. Obtains random samples of the log(posterior) using adaptive importance sampling following the Population Monte Carlo approach with PyPMC. :param log_proposal: Initial gaussian mixture density that shall be adapted to the posterior density. :type log_proposal: pypmc.density.mixture.MixtureDensity :param step_N: Number of samples that shall be drawn in each adaptation step. :type step_N: int :param steps: Number of adaptation steps. :type steps: int :param final_N: Number of samples that shall be drawn after all adaptation steps. :type final_N: int :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportanceSampler) :param return_final_only: If set to True, only returns the samples and weights of the final sampling step, after all adaptations have finished. :param final_perplexity_threshold: Adaptations are stopped if the perplexity of the last adaptation step is above this threshold value. :param weight_threshold: Mixture components with a weight smaller than this threshold are pruned. :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the final proposal function as pypmc.density.mixture.MixtureDensity. This method should be called after obtaining approximate samples of the log(posterior) by other means, e.g., by using :meth:`eos.Analysis.sample`. A possible (incomplete) example could look as follows: .. code-block:: python3 from pypmc.mix_adapt.r_value import make_r_gaussmix chains = [] for i in range(10): # run Markov Chains for your problem chain, _ = analysis.sample(...) chains.append(chain) # please consult the pypmc documentation for details on the call below proposal_density = make_r_gaussmix(chains, K_g=3, critical_r=1.1) # adapt the proposal to the posterior and obtain high-quality samples analysis.sample_pmc(proposal_density, ...) .. note:: This method requires the PyPMC python module, which can be installed from PyPI. """ import pypmc try: from tqdm.auto import tqdm progressbar = tqdm except ImportError: progressbar = lambda x, **kw: x # create log_target ind_lower = np.array([-1.0 for bound in self.bounds]) ind_upper = np.array([+1.0 for bound in self.bounds]) ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper) log_target = pypmc.tools.indicator.merge_function_with_indicator( self.log_pdf, ind, -np.inf) # rescale log_proposal arguments to [-1, 1] for component in log_proposal.components: rescaled_mu = self._par_to_x(component.mu) rescaled_sigma = np.array([[ 4 * component.sigma[i, j] / (bj[1] - bj[0]) / (bi[1] - bi[0]) for j, bj in enumerate(self.bounds) ] for i, bi in enumerate(self.bounds)]) component.update(rescaled_mu, rescaled_sigma) # create PMC sampler sampler = pypmc.sampler.importance_sampling.ImportanceSampler( log_target, log_proposal, save_target_values=True, rng=rng) generating_components = [] eps = np.finfo(float).eps # carry out adaptions for step in progressbar(range(steps), desc="Adaptions", leave=False): origins = sampler.run(step_N, trace_sort=True) generating_components.append(origins) samples = sampler.samples[:] last_perplexity = self._perplexity( np.copy(sampler.weights[-1][:, 0])) eos.info( 'Perplexity of the last samples after sampling in step {}: {}'. format(step, last_perplexity)) if last_perplexity < 0.05: eos.warn( "Last step's perplexity is very low. This could possibly be improved by running the markov chains that are used to form the initial PDF for a bit longer" ) weights = sampler.weights[:][:, 0] adjusted_weights = np.copy(weights) # replace negative and nan weights by eps adjusted_weights = np.where( np.logical_or(adjusted_weights <= 0, np.isnan(adjusted_weights)), eps, adjusted_weights) eos.info( 'Perplexity of all previous samples after sampling in step {}: {}' .format(step, self._perplexity(adjusted_weights))) pypmc.mix_adapt.pmc.gaussian_pmc(samples, sampler.proposal, adjusted_weights, mincount=0, rb=True, copy=False) # Normalize the weights and remove components with a weight smaller than weight_threshold sampler.proposal.normalize() sampler.proposal.prune(threshold=weight_threshold) # stop adaptation if the perplexity of the last step is larger than the threshold if last_perplexity > final_perplexity_threshold: eos.info( 'Perplexity threshold reached after {} step(s)'.format( step)) break # draw final samples origins = sampler.run(final_N, trace_sort=True) generating_components.append(origins) # rescale proposal components back to their physical bounds for component in sampler.proposal.components: rescaled_mu = self._x_to_par(component.mu) rescaled_sigma = np.array([[ component.sigma[i, j] * (bj[1] - bj[0]) * (bi[1] - bi[0]) / 4 for j, bj in enumerate(self.bounds) ] for i, bi in enumerate(self.bounds)]) component.update(rescaled_mu, rescaled_sigma) # rescale the samples back to their physical bounds if return_final_only: # only returns the final_N final samples samples = np.apply_along_axis(self._x_to_par, 1, sampler.samples[:][-final_N:]) weights = sampler.weights[:][-final_N:, 0] else: # returns all samples samples = np.apply_along_axis(self._x_to_par, 1, sampler.samples[:]) weights = sampler.weights[:][:, 0] perplexity = self._perplexity(np.copy(weights)) eos.info('Perplexity after final samples: {}'.format(perplexity)) return samples, weights, sampler.proposal
def sample(self, N=1000, stride=5, pre_N=150, preruns=3, cov_scale=0.1, observables=None, start_point=None, rng=np.random.mtrand): """ Return samples of the parameters, log(weights), and optionally posterior-predictive samples for a sequence of observables. Obtains random samples of the log(posterior) using an adaptive Markov Chain Monte Carlo with PyPMC. A prerun with adaptations is carried out first and its samples are discarded. :param N: Number of samples that shall be returned :param stride: Stride, i.e., the number by which the actual amount of samples shall be thinned to return N samples. :param pre_N: Number of samples in each prerun. :param preruns: Number of preruns. :param cov_scale: Scale factor for the initial guess of the covariance matrix. :param observables: Observables for which posterior-predictive samples shall be obtained. :type observables: list-like, optional :param start_point: Optional starting point for the chain :type start_point: list-like, optional :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.markov_chain.MarkovChain) :return: A tuple of the parameters as array of size N, the logarithmic weights as array of size N, and optionally the posterior-predictive samples of the observables as array of size N x len(observables). .. note:: This method requiries the PyPMC python module, which can be installed from PyPI. """ import pypmc try: from tqdm.auto import tqdm progressbar = tqdm except ImportError: progressbar = lambda x, **kw: x ind_lower = np.array([-1.0 for bound in self.bounds]) ind_upper = np.array([+1.0 for bound in self.bounds]) ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper) log_target = pypmc.tools.indicator.merge_function_with_indicator( self.log_pdf, ind, -np.inf) # create initial covariance, assuming that each (rescaled) parameter is uniformly distributed on [-1, +1]. sigma = np.diag([1.0 / 3.0 * cov_scale for bound in self.bounds ]) # 1 / 3 is the covariance on the interval [-1, +1] log_proposal = pypmc.density.gauss.LocalGauss(sigma) # create start point, if not provided or rescale a provided start point to [-1, 1] if start_point is None: start_point = np.array( [rng.uniform(-1.0, 1.0) for bound in self.bounds]) else: start_point = self._par_to_x(start_point) # create MC sampler sampler = pypmc.sampler.markov_chain.AdaptiveMarkovChain( log_target, log_proposal, start_point, save_target_values=True, rng=rng) # pre run to adapt markov chains for i in progressbar(range(0, preruns), desc="Pre-runs", leave=False): eos.info('Prerun {} out of {}'.format(i, preruns)) accept_count = sampler.run(pre_N) accept_rate = accept_count / pre_N * 100 eos.info('Prerun {}: acceptance rate is {:3.0f}%'.format( i, accept_rate)) sampler.adapt() sampler.clear() # obtain final samples eos.info('Main run: started ...') sample_total = N * stride sample_chunk = sample_total // 100 sample_chunks = [sample_chunk for i in range(0, 99)] sample_chunks.append(sample_total - 99 * sample_chunk) for current_chunk in progressbar(sample_chunks, desc="Main run", leave=False): accept_count = accept_count + sampler.run(current_chunk) accept_rate = accept_count / (N * stride) * 100 eos.info('Main run: acceptance rate is {:3.0f}%'.format(accept_rate)) # Rescale the parameters back to their original bounds parameter_samples = np.apply_along_axis(self._x_to_par, 1, sampler.samples[:][::stride]) weights = sampler.target_values[:][::stride, 0] if not observables: return (parameter_samples, weights) else: observable_samples = [] for parameters in parameter_samples: for p, v in zip(self.varied_parameters, parameters): p.set(v) observable_samples.append([o.evaluate() for o in observables]) return (parameter_samples, weights, np.array(observable_samples))
def sample_pmc(self, log_proposal, step_N=1000, steps=10, final_N=5000, rng=np.random.mtrand): """ Return samples of the parameters and log(weights) Obtains random samples of the log(posterior) using adaptive importance sampling following the Popoulation Monte Carlo approach with PyPMC. :param step_N: Number of samples that shall be drawn in each adaptation step. :param steps: Number of adaptation steps. :param final_N: Number of samples that shall be drawn after all adaptation steps. :param rng: Optional random number generator (must be compatible with the requirements of pypmc.sampler.importance_sampler.ImportancSampler) :return: A tuple of the parameters as array of length N = pre_N * steps + final_N, the (linear) weights as array of length N, and the final proposal function as pypmc.density.mixture.MixtureDensity. .. note:: This method requires the PyPMC python module, which can be installed from PyPI. """ import pypmc try: from tqdm.auto import tqdm progressbar = tqdm except ImportError: progressbar = lambda x, **kw: x # create log_target ind_lower = np.array([-1.0 for bound in self.bounds]) ind_upper = np.array([+1.0 for bound in self.bounds]) ind = pypmc.tools.indicator.hyperrectangle(ind_lower, ind_upper) log_target = pypmc.tools.indicator.merge_function_with_indicator( self.log_pdf, ind, -np.inf) # rescale log_proposal arguments to [-1, 1] for component in log_proposal.components: rescaled_mu = self._par_to_x(component.mu) rescaled_sigma = np.array([[ 4 * component.sigma[i, j] / (bj[1] - bj[0]) / (bi[1] - bi[0]) for j, bj in enumerate(self.bounds) ] for i, bi in enumerate(self.bounds)]) component.update(rescaled_mu, rescaled_sigma) # create PMC sampler sampler = pypmc.sampler.importance_sampling.ImportanceSampler( log_target, log_proposal, save_target_values=True, rng=rng) generating_components = [] eps = np.finfo(float).eps # carry out adaptions for step in progressbar(range(steps), desc="Adaptions", leave=False): origins = sampler.run(step_N, trace_sort=True) generating_components.append(origins) samples = sampler.samples[:] last_perplexity = self._perplexity( np.copy(sampler.weights[-1][:, 0])) eos.info( 'Perplexity of the last samples after sampling in step {}: {}'. format(step, last_perplexity)) weights = sampler.weights[:][:, 0] adjusted_weights = np.copy(weights) # replace negative and nan weights by eps adjusted_weights = np.where( np.logical_or(adjusted_weights <= 0, np.isnan(adjusted_weights)), eps, adjusted_weights) eos.info( 'Perplexity of all previous samples after sampling in step {}: {}' .format(step, self._perplexity(adjusted_weights))) pypmc.mix_adapt.pmc.gaussian_pmc(samples, sampler.proposal, adjusted_weights, mincount=0, rb=True, copy=False) sampler.proposal.normalize() # draw final samples origins = sampler.run(final_N, trace_sort=True) generating_components.append(origins) samples = np.apply_along_axis( self._x_to_par, 1, sampler.samples[:] ) # Rescale the parameters back to their original bounds weights = sampler.weights[:][:, 0] perplexity = self._perplexity(np.copy(weights)) eos.info('Perplexity after final samples: {}'.format(perplexity)) return samples, weights, sampler.proposal