def sample_pmc(analysis_file, posterior, base_directory='./', step_N=500, steps=10, final_N=5000, perplexity_threshold=1.0, weight_threshold=1e-10, sigma_test_stat=None, initial_proposal='clusters'): """ Samples from a named posterior using the Population Monte Carlo (PMC) methods. The results of the find-cluster command are expected in EOS_BASE_DIRECTORY/POSTERIOR/clusters. The output file will be stored in EOS_BASE_DIRECTORY/POSTERIOR/pmc. :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`. :type analysis_file: str or `eos.AnalysisFile` :param posterior: The name of the posterior. :type posterior: str :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable. :type base_directory: str, optional :param step_N: The number of samples to be used in each adaptation step. These samples will be discarded. Defaults to 500. :type step_N: int > 0, optional :param steps: The number of adaptation steps, which are used to adapt the PMC proposal to the posterior. Defaults to 10. :type steps: int > 0, optional :param final_N: The number of samples to be stored in the output file. Defaults to 5000, :type final_N: int > 0, optional :param perplexity_threshold: The threshold for the perplexity in the last step after which further adaptation steps are to be skipped. Defaults to 1.0. :type perplexity_threshold: 0.0 < float <= 1.0, optional :param weight_threshold: Mixture components with a weight smaller than this threshold are pruned. :type weight_threshold: 0.0 < float <= 1.0, optional. :param sigma_test_stat: If provided, the inverse CDF of -2*log(PDF) will be evaluated, using the provided values as the respective significance. :type sigma_test_stat: list or iterable :param initial_proposal: Specify where the initial proposal should be taken from; 'clusters' (default): use the proposal obtained using `find-clusters`; 'product': use the proposal obtained from `mixture_product`; 'pmc': continue sampling from the previous `sample-pmc` results. :type initial_proposal: str, optional """ output_path = os.path.join(base_directory, posterior, 'pmc') _set_log_file(output_path, 'log', mode='a' if (initial_proposal == 'pmc') else 'w') if type(analysis_file) is not eos.AnalysisFile: _analysis_file = eos.AnalysisFile(analysis_file) else: _analysis_file = analysis_file analysis = _analysis_file.analysis(posterior) rng = _np.random.mtrand.RandomState(1701) if initial_proposal == 'clusters': initial_density = eos.data.MixtureDensity(os.path.join(base_directory, posterior, 'clusters')).density() elif initial_proposal == 'pmc': previous_sampler = eos.data.PMCSampler(os.path.join(base_directory, posterior, 'pmc')) initial_density = previous_sampler.density() elif initial_proposal == 'product': initial_density = eos.data.MixtureDensity(os.path.join(base_directory, posterior, 'product')).density() else: eos.error("Could not initialize proposal in sample_pmc: argument {} is not supported.".format(initial_proposal)) samples, weights, proposal = analysis.sample_pmc(initial_density, step_N=step_N, steps=steps, final_N=final_N, rng=rng, final_perplexity_threshold=perplexity_threshold, weight_threshold=weight_threshold) if initial_proposal == 'pmc': samples = _np.concatenate((previous_sampler.samples, samples), axis=0) weights = _np.concatenate((previous_sampler.weights, weights), axis=0) eos.data.PMCSampler.create(output_path, analysis.varied_parameters, samples, weights, proposal, sigma_test_stat=sigma_test_stat)
def predict_observables(analysis_file, posterior, prediction, base_directory='./', begin=0, end=-1): ''' Predicts a set of observables based on previously obtained PMC samples. The input files are expected in EOS_BASE_DIRECTORY/POSTERIOR/pmc. The output files will be stored in EOS_BASE_DIRECTORY/POSTERIOR/pred-PREDICTION. :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`. :type analysis_file: str or `eos.AnalysisFile` :param posterior: The name of the posterior. :type posterior: str :param prediction: The name of the set of observables to predict. :type prediction: str :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable. :type base_directory: str, optional :param begin: The index of the first sample to use for the predictions. Defaults to 0. :type begin: int :param end: The index beyond the last sample to use for the predictions. Defaults to -1. :type begin: int ''' _parameters = eos.Parameters() if type(analysis_file) is not eos.AnalysisFile: _analysis_file = eos.AnalysisFile(analysis_file) else: _analysis_file = analysis_file observables = _analysis_file.observables(prediction, _parameters) data = eos.data.PMCSampler(os.path.join(base_directory, posterior, 'pmc')) try: from tqdm import tqdm progressbar = tqdm except ImportError: progressbar = lambda x: x parameters = [_parameters[p['name']] for p in data.varied_parameters] observable_samples = [] for i, sample in enumerate(progressbar(data.samples[begin:end])): for p, v in zip(parameters, sample): p.set(v) try: observable_samples.append([o.evaluate() for o in observables]) except RuntimeError as e: eos.error('skipping prediction for sample {i} due to runtime error ({e}): {s}'.format(i=i, e=e, s=sample)) observable_samples.append([_np.nan for o in observables]) observable_samples = _np.array(observable_samples) output_path = os.path.join(base_directory, posterior, 'pred-{}'.format(prediction)) eos.data.Prediction.create(output_path, observables, observable_samples, data.weights[begin:end])
def run_steps(analysis_file, base_directory='./'): """ Runs a list of predefined steps recorded in the analysis file. Each step corresponds to a call to one of the following common tasks: - sample-mcmc - find-cluster - sample-pmc - predict-observables :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`. :type analysis_file: str or `eos.AnalysisFile` """ if type(analysis_file) is not eos.AnalysisFile: _analysis_file = eos.AnalysisFile(analysis_file) else: _analysis_file = analysis_file _analysis_file.run()
def sample_mcmc(analysis_file, posterior, chain, base_directory='./', pre_N=150, preruns=3, N=1000, stride=5, cov_scale=0.1, start_point=None): """ Samples from a named posterior PDF using Markov Chain Monte Carlo (MCMC) methods. The output file will be stored in EOS_BASE_DIRECTORY/POSTERIOR/mcmc-CHAIN. :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`. :type analysis_file: str or `eos.AnalysisFile` :param posterior: The name of the posterior PDF from which to draw the samples. :type posterior: str :param chain: The index assigned to the Markov chain. This value is used to seed the RNG for a reproducible analysis. :type chain: int >= 0 :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable. :type base_directory: str, optional :param pre_N: The number of samples to be used for an adaptation in each prerun steps. These samples will be discarded. :type pre_N: int, optional :param preruns: The number of prerun steps, which are used to adapt the MCMC proposal to the posterior. :type preruns: int, optional :param N: The number of samples to be stored in the output file. Defaults to 1000. :type N: int, optional :param stride: The ratio of samples drawn over samples stored. For every S samples, S - 1 will be discarded. Defaults to 5. :type stride: int, optional :param cov_scale: Scale factor for the initial guess of the covariance matrix. :type cov_scale: float, optional :param start_point: Optional starting point for the chain :type start_point: list-like, optional """ output_path = os.path.join(base_directory, posterior, 'mcmc-{:04}'.format(int(chain))) _set_log_file(output_path, 'log') if type(analysis_file) is not eos.AnalysisFile: _analysis_file = eos.AnalysisFile(analysis_file) else: _analysis_file = analysis_file analysis = _analysis_file.analysis(posterior) rng = _np.random.mtrand.RandomState(int(chain) + 1701) try: samples, weights = analysis.sample(N=N, stride=stride, pre_N=pre_N, preruns=preruns, rng=rng, cov_scale=cov_scale, start_point=start_point) eos.data.MarkovChain.create(output_path, analysis.varied_parameters, samples, weights) except RuntimeError as e: eos.error('encountered run time error ({e}) in parameter point:'.format(e=e)) for p in analysis.varied_parameters: eos.error(' - {n}: {v}'.format(n=p.name(), v=p.evaluate()))