def visualizing_results(*args): from pyRSD.rsdfit.results import EmceeResults r = EmceeResults.from_npz('data/mcmc_result.npz') # 2D kernel density plot r.kdeplot_2d('b1_cA', 'fsigma8', thin=10) savefig('kdeplot.png', size=(8, 6)) # 2D joint plot r.jointplot_2d('b1_cA', 'fsigma8', thin=10) savefig('jointplot.png', size=(8, 6)) # timeline plot r.plot_timeline('fsigma8', 'b1_cA', thin=10) savefig('timeline.png', size=(8, 6)) # correlation between free parameters r.plot_correlation(params='free') savefig('correlation.png', size=(8, 6)) # make a triangle plot r.plot_triangle('fsigma8', 'alpha_perp', 'alpha_par', thin=10) savefig('triangle.png', size=(8, 6))
def result(self): """ The fitting result, either a LBFGSResult or EmceeResult """ try: return self._result except AttributeError: # check if combined mcmc result is there path = os.path.join(self.fitting_dir, 'info', 'combined_result.npz') if os.path.isfile(path): r = EmceeResults.from_npz(path) else: files = glob(os.path.join(self.fitting_dir, '*npz')) if not len(files): raise ValueError("no suitable results files found in directory '%s'" %d) # grab the file modified last times = [os.stat(f).st_mtime for f in files] try: r = LBFGSResults.from_npz(files[np.argmax(times)]) except: raise ValueError("if directory is from mcmc fit, define the `info` directory") self._result = r return self._result
def load_results(filename): """ Load a result from file """ try: result = EmceeResults.from_npz(filename) except: result = LBFGSResults.from_npz(filename) return result
def run_global_mcmc(args, theory_model, data_loader): """ A generator that runs the mcmc solver over a specified coordinate grid of bins Parameters ---------- args : argparse.Namespace the namespace of arguments returned by `parse_global_mcmc` theory_model : lmfit.Model the model class that will be called to return the theoretical model data_loader : callable a function that should take a dictionary specifying the coordinate values as the only argument Returns ------- result : EmceeResults the result of the mcmc run, stored as an `EmceeResults` instance """ # setup the mcmc run params, theory_params, init_values = setup_mcmc(args.param_file) # check for init_from == chain if params['init_from'] == 'chain': from pyRSD.rsdfit.results import EmceeResults r = EmceeResults.from_npz(params['start_chain'].value) best = dict(zip(r.free_names, r.max_lnprob_values())) init_values = np.array([best[k] for k in theory_params.free_names]) # load the data data_kws = data_loader() # make the objective function objective = functools.partial(lnprob, model=theory_model, theory=theory_params, **data_kws) # run emcee result = emcee_fitter.solve(params, theory_params, objective, init_values=init_values) print(result) return result
def load_joint_data_results(kmin, z_weighted, p, ells=[0, 2]): """ Load a set of data joint NGC + SGC fit results. """ from eboss_qso.measurements.utils import make_hash from pyRSD.rsdfit.results import EmceeResults # the data to load kws = {} kws['version'] = 'v1.9f' kws['krange'] = '%s-0.3' % kmin kws['params'] = 'basemodel-N-fnl' kws['zrange'] = '0.8-2.2' kws['z_weighted'] = z_weighted kws['p'] = p if ells == [0]: kws['ells'] = ells hashstr = make_hash(kws) d = os.path.join(os.environ['EBOSS_DIR'], 'fits', 'results', 'data', 'v1.9f') d = os.path.join(d, kws['krange'], kws['params'], kws['zrange']) assert os.path.isdir(d), "'%s' directory not found" % d matches = glob(os.path.join(d, f'QSO-N+S-{hashstr}')) assert len(matches) == 1 match = matches[0] if match is None: raise ValueError("no matches found for joint NGC + SGC data fits!") r = sorted(glob(os.path.join(match, '*.npz')), key=os.path.getmtime, reverse=True) assert len( r) > 0, "no npz results found in directory '%s'" % os.path.normpath(f) return EmceeResults.from_npz(r[0])
def start_from(self, val): if val is None: self._start_from = val return if '$' in val: from pyRSD.rsdfit.parameters.tools import replace_vars val = replace_vars(val, {}) import os if not os.path.exists(val): raise RuntimeError("cannot set `start_from` to `%s`: no such file" %val) if os.path.isdir(val): from glob import glob from pyRSD.rsdfit.results import EmceeResults, LBFGSResults import operator pattern = os.path.join(val, "*.npz") result_files = glob(pattern) if not len(result_files): raise RuntimeError("did not find any chain (`.npz`) files matching pattern `%s`" %pattern) # find the chain file which has the maximum log prob in it and use that max_lnprobs = [] for f in result_files: try: r = EmceeResults.from_npz(f) max_lnprobs.append(r.max_lnprob) except: r = LBFGSResults.from_npz(f) max_lnprobs.append(-r.min_chi2) index, value = max(enumerate(max_lnprobs), key=operator.itemgetter(1)) self._start_from = result_files[index] else: self._start_from = val
def find_init_result(val): """ Return the name of the file holding the maximum probability from a directory """ import os if not os.path.exists(val): raise RuntimeError("cannot set `start_from` to `%s`: no such file" % val) if os.path.isdir(val): from glob import glob from pyRSD.rsdfit.results import EmceeResults, LBFGSResults import operator pattern = os.path.join(val, "*.npz") result_files = glob(pattern) if not len(result_files): raise RuntimeError( "did not find any chain (`.npz`) files matching pattern `%s`" % pattern) # find the chain file which has the maximum log prob in it and use that max_lnprobs = [] for f in result_files: try: r = EmceeResults.from_npz(f) max_lnprobs.append(r.max_lnprob) except: r = LBFGSResults.from_npz(f) max_lnprobs.append(-r.min_chi2) index, value = max(enumerate(max_lnprobs), key=operator.itemgetter(1)) return result_files[index] else: return val
def run_joint_mcmc_fit(kind, iterations, walkers, output, load_kwargs, joint_params=['f_nl']): """ Run a joint mcmc fit to the NGC ans SGC sky regions. Parameters ---------- kind : 'data', 'ezmock' the kind of data to load iterations : int the number of iterations to run walkers : int the number of walkers to use output : str the name of the output file to save load_kwargs : dict the dictionary of keywords to load the previous result for each sample joint_params : list of str the names of parameters that are only use a single value for each sky region. """ # add a console logger rsd_logging.add_console_logger(0) # determine the loader assert kind in ['data', 'ezmock'] if kind == 'data': loader = load_data_results elif kind == 'ezmock': loader = load_ezmock_results # load the previous results samples = ['N', 'S'] drivers = {} for sample in samples: load_kwargs['sample'] = sample drivers[sample] = loader(**load_kwargs) drivers[sample].set_fit_results() # initialize a parameter set to hold combined data set theory = ParameterSet() # the initial state p0 = [] free_names = [] # track names # add params that are fit jointly for param in joint_params: theory[param] = drivers['N'].theory.fit_params[param] p0.append(theory[param].value) free_names.append(param) # slices to set theta properly slices = collections.defaultdict(list) for tag in ['ngc', 'sgc']: tag_ = tag[0].upper() d = drivers[tag_] # the driver # add all free names for i, name in enumerate(d.theory.free_names): # only fit one version of joint params if name in joint_params: slices[tag_].append(joint_params.index(name)) continue # add tagged version of other params slices[tag_].append(len(free_names)) free_names.append(name + '_' + tag) par = d.theory.fit_params[name] p0.append(par.value) theory[name + '_' + tag] = par # the initial state p0 = numpy.array(p0) scales = numpy.array([theory[k].scale for k in free_names]) locs = numpy.array([theory[k].loc for k in free_names]) # the arrays of free parameters theta1 = numpy.array([ drivers['N'].theory.fit_params[name].value for name in drivers['N'].theory.free_names ]) theta2 = numpy.array([ drivers['S'].theory.fit_params[name].value for name in drivers['S'].theory.free_names ]) def mcmc_objective(x): # get separate values theta1[:] = x[slices['N']] theta2[:] = x[slices['S']] # return return drivers['N'].lnprob(theta1) + drivers['S'].lnprob(theta2) def nlopt_objective(x): toret = 0 x = x * scales + locs for tag in ['N', 'S']: d = drivers[tag] toret += d.minus_lnlike(x[slices[tag]], use_priors=True) return toret def nlopt_grad(x, **kwargs): toret = 0 x = x * scales + locs grad = numpy.zeros(len(p0)) for tag in ['N', 'S']: d = drivers[tag] g = d.grad_minus_lnlike(x[slices[tag]], **kwargs) for i, sl in enumerate(slices[tag]): grad[sl] += g[i] return grad * scales # the gradient grad_kws = {} grad_kws['epsilon'] = 0.0001 grad_kws['pool'] = None grad_kws['numerical'] = False grad_kws['numerical_from_lnlike'] = True fprime = functools.partial(nlopt_grad, **grad_kws) # scale the original values p0_scaled = (p0 - locs) / scales def unscaler(x): return x * scales + locs # run minimizer = lbfgs.LBFGS(nlopt_objective, fprime, p0_scaled, unscaler=unscaler) options = { 'factr': 100000.0, 'gtol': 1e-05, 'max_iter': 1000, 'test_convergence': False } result = minimizer.run_nlopt(**options) # the best fit values d = minimizer.data final_p0 = d['curr_state'].X p0 = final_p0 * scales + locs # initialize the sampler ndim = len(p0) p0 = numpy.array( [p0 + 1e-3 * numpy.random.randn(ndim) for i in range(walkers)]) sampler = emcee.EnsembleSampler(walkers, ndim, mcmc_objective) # iterator interface allows us to tap ctrl+c and know where we are niters = iterations burnin = 0 # do the sampling with ChainManager(sampler, niters, walkers, free_names, None) as manager: for niter, result in manager.sample(p0, None): # check if we need to exit due to exception/convergence manager.check_status() # update progress and test convergence manager.update_progress(niter) # re-order the chain to the sorted parameter order order = [] for name in theory.free_names: order.append(free_names.index(name)) try: sampler.chain = sampler.chain[..., order] except: if hasattr(sampler, '_chain'): sampler._chain = sampler.chain[..., order] else: raise # create and save the result result = EmceeResults(sampler, theory, burnin) print(result) result.to_npz(output)