Esempio n. 1
0
def visualizing_results(*args):

    from pyRSD.rsdfit.results import EmceeResults

    r = EmceeResults.from_npz('data/mcmc_result.npz')

    # 2D kernel density plot
    r.kdeplot_2d('b1_cA', 'fsigma8', thin=10)
    savefig('kdeplot.png', size=(8, 6))

    # 2D joint plot
    r.jointplot_2d('b1_cA', 'fsigma8', thin=10)
    savefig('jointplot.png', size=(8, 6))

    # timeline plot
    r.plot_timeline('fsigma8', 'b1_cA', thin=10)
    savefig('timeline.png', size=(8, 6))

    # correlation between free parameters
    r.plot_correlation(params='free')
    savefig('correlation.png', size=(8, 6))

    # make a triangle plot
    r.plot_triangle('fsigma8', 'alpha_perp', 'alpha_par', thin=10)
    savefig('triangle.png', size=(8, 6))
Esempio n. 2
0
 def result(self):
     """
     The fitting result, either a LBFGSResult or EmceeResult
     """
     try:
         return self._result
     except AttributeError:
         
         # check if combined mcmc result is there
         path = os.path.join(self.fitting_dir, 'info', 'combined_result.npz')
         if os.path.isfile(path):
             r = EmceeResults.from_npz(path)
         else:
             files = glob(os.path.join(self.fitting_dir, '*npz'))
             if not len(files):
                 raise ValueError("no suitable results files found in directory '%s'" %d)
             
             # grab the file modified last
             times = [os.stat(f).st_mtime for f in files]
             try:
                 r = LBFGSResults.from_npz(files[np.argmax(times)])
             except:
                 raise ValueError("if directory is from mcmc fit, define the `info` directory")
                     
         self._result = r
         return self._result
Esempio n. 3
0
def load_results(filename):
    """
    Load a result from file
    """
    try:
        result = EmceeResults.from_npz(filename)
    except:
        result = LBFGSResults.from_npz(filename)
    return result
Esempio n. 4
0
def run_global_mcmc(args, theory_model, data_loader):
    """
    A generator that runs the mcmc solver over a specified 
    coordinate grid of bins
    
    Parameters
    ----------
    args : argparse.Namespace
        the namespace of arguments returned by `parse_global_mcmc`
    theory_model : lmfit.Model
        the model class that will be called to return the theoretical model
    data_loader : callable
        a function that should take a dictionary specifying the coordinate
        values as the only argument
        
    Returns
    -------
    result : EmceeResults
        the result of the mcmc run, stored as an `EmceeResults` instance
    """
    # setup the mcmc run
    params, theory_params, init_values = setup_mcmc(args.param_file)
    
    # check for init_from == chain
    if params['init_from'] == 'chain':
        from pyRSD.rsdfit.results import EmceeResults
        r = EmceeResults.from_npz(params['start_chain'].value)
        best = dict(zip(r.free_names, r.max_lnprob_values()))
        init_values = np.array([best[k] for k in theory_params.free_names])
    
    # load the data
    data_kws = data_loader()

    # make the objective function
    objective = functools.partial(lnprob, model=theory_model, theory=theory_params, **data_kws)

    # run emcee
    result = emcee_fitter.solve(params, theory_params, objective, init_values=init_values)
    print(result)
    
    return result
Esempio n. 5
0
def load_joint_data_results(kmin, z_weighted, p, ells=[0, 2]):
    """
    Load a set of data joint NGC + SGC fit results.
    """
    from eboss_qso.measurements.utils import make_hash
    from pyRSD.rsdfit.results import EmceeResults

    # the data to load
    kws = {}
    kws['version'] = 'v1.9f'
    kws['krange'] = '%s-0.3' % kmin
    kws['params'] = 'basemodel-N-fnl'
    kws['zrange'] = '0.8-2.2'
    kws['z_weighted'] = z_weighted
    kws['p'] = p

    if ells == [0]:
        kws['ells'] = ells

    hashstr = make_hash(kws)

    d = os.path.join(os.environ['EBOSS_DIR'], 'fits', 'results', 'data',
                     'v1.9f')
    d = os.path.join(d, kws['krange'], kws['params'], kws['zrange'])
    assert os.path.isdir(d), "'%s' directory not found" % d

    matches = glob(os.path.join(d, f'QSO-N+S-{hashstr}'))
    assert len(matches) == 1
    match = matches[0]

    if match is None:
        raise ValueError("no matches found for joint NGC + SGC data fits!")

    r = sorted(glob(os.path.join(match, '*.npz')),
               key=os.path.getmtime,
               reverse=True)
    assert len(
        r) > 0, "no npz results found in directory '%s'" % os.path.normpath(f)

    return EmceeResults.from_npz(r[0])
Esempio n. 6
0
    def start_from(self, val):
        if val is None:
            self._start_from = val
            return
            
        if '$' in val:
            from pyRSD.rsdfit.parameters.tools import replace_vars
            val = replace_vars(val, {})
        
        import os
        if not os.path.exists(val):
            raise RuntimeError("cannot set `start_from` to `%s`: no such file" %val)
        
        if os.path.isdir(val):
            from glob import glob
            from pyRSD.rsdfit.results import EmceeResults, LBFGSResults
            import operator
            
            pattern = os.path.join(val, "*.npz")
            result_files = glob(pattern)
            if not len(result_files):
                raise RuntimeError("did not find any chain (`.npz`) files matching pattern `%s`" %pattern)
            
            # find the chain file which has the maximum log prob in it and use that
            max_lnprobs = []
            for f in result_files:
                
                try:
                    r = EmceeResults.from_npz(f)
                    max_lnprobs.append(r.max_lnprob)
                except:
                    r = LBFGSResults.from_npz(f)
                    max_lnprobs.append(-r.min_chi2)

            index, value = max(enumerate(max_lnprobs), key=operator.itemgetter(1))
            self._start_from = result_files[index]
        else:
            self._start_from = val
Esempio n. 7
0
def find_init_result(val):
    """
    Return the name of the file holding the maximum probability
    from a directory
    """
    import os
    if not os.path.exists(val):
        raise RuntimeError("cannot set `start_from` to `%s`: no such file" %
                           val)

    if os.path.isdir(val):
        from glob import glob
        from pyRSD.rsdfit.results import EmceeResults, LBFGSResults
        import operator

        pattern = os.path.join(val, "*.npz")
        result_files = glob(pattern)
        if not len(result_files):
            raise RuntimeError(
                "did not find any chain (`.npz`) files matching pattern `%s`" %
                pattern)

        # find the chain file which has the maximum log prob in it and use that
        max_lnprobs = []
        for f in result_files:

            try:
                r = EmceeResults.from_npz(f)
                max_lnprobs.append(r.max_lnprob)
            except:
                r = LBFGSResults.from_npz(f)
                max_lnprobs.append(-r.min_chi2)

        index, value = max(enumerate(max_lnprobs), key=operator.itemgetter(1))
        return result_files[index]
    else:
        return val
Esempio n. 8
0
def run_joint_mcmc_fit(kind,
                       iterations,
                       walkers,
                       output,
                       load_kwargs,
                       joint_params=['f_nl']):
    """
    Run a joint mcmc fit to the NGC ans SGC sky regions.

    Parameters
    ----------
    kind : 'data', 'ezmock'
        the kind of data to load
    iterations : int
        the number of iterations to run
    walkers : int
        the number of walkers to use
    output : str
        the name of the output file to save
    load_kwargs : dict
        the dictionary of keywords to load the previous result for each sample
    joint_params : list of str
        the names of parameters that are only use a single value for each sky region.
    """
    # add a console logger
    rsd_logging.add_console_logger(0)

    # determine the loader
    assert kind in ['data', 'ezmock']
    if kind == 'data':
        loader = load_data_results
    elif kind == 'ezmock':
        loader = load_ezmock_results

    # load the previous results
    samples = ['N', 'S']
    drivers = {}
    for sample in samples:
        load_kwargs['sample'] = sample
        drivers[sample] = loader(**load_kwargs)
        drivers[sample].set_fit_results()

    # initialize a parameter set to hold combined data set
    theory = ParameterSet()

    # the initial state
    p0 = []
    free_names = []  # track names

    # add params that are fit jointly
    for param in joint_params:
        theory[param] = drivers['N'].theory.fit_params[param]
        p0.append(theory[param].value)
        free_names.append(param)

    # slices to set theta properly
    slices = collections.defaultdict(list)

    for tag in ['ngc', 'sgc']:
        tag_ = tag[0].upper()
        d = drivers[tag_]  # the driver

        # add all free names
        for i, name in enumerate(d.theory.free_names):

            # only fit one version of joint params
            if name in joint_params:
                slices[tag_].append(joint_params.index(name))
                continue

            # add tagged version of other params
            slices[tag_].append(len(free_names))
            free_names.append(name + '_' + tag)

            par = d.theory.fit_params[name]
            p0.append(par.value)
            theory[name + '_' + tag] = par

    # the initial state
    p0 = numpy.array(p0)
    scales = numpy.array([theory[k].scale for k in free_names])
    locs = numpy.array([theory[k].loc for k in free_names])

    # the arrays of free parameters
    theta1 = numpy.array([
        drivers['N'].theory.fit_params[name].value
        for name in drivers['N'].theory.free_names
    ])
    theta2 = numpy.array([
        drivers['S'].theory.fit_params[name].value
        for name in drivers['S'].theory.free_names
    ])

    def mcmc_objective(x):

        # get separate values
        theta1[:] = x[slices['N']]
        theta2[:] = x[slices['S']]

        # return
        return drivers['N'].lnprob(theta1) + drivers['S'].lnprob(theta2)

    def nlopt_objective(x):

        toret = 0
        x = x * scales + locs
        for tag in ['N', 'S']:
            d = drivers[tag]
            toret += d.minus_lnlike(x[slices[tag]], use_priors=True)

        return toret

    def nlopt_grad(x, **kwargs):

        toret = 0
        x = x * scales + locs
        grad = numpy.zeros(len(p0))
        for tag in ['N', 'S']:
            d = drivers[tag]
            g = d.grad_minus_lnlike(x[slices[tag]], **kwargs)
            for i, sl in enumerate(slices[tag]):
                grad[sl] += g[i]

        return grad * scales

    # the gradient
    grad_kws = {}
    grad_kws['epsilon'] = 0.0001
    grad_kws['pool'] = None
    grad_kws['numerical'] = False
    grad_kws['numerical_from_lnlike'] = True
    fprime = functools.partial(nlopt_grad, **grad_kws)

    # scale the original values
    p0_scaled = (p0 - locs) / scales

    def unscaler(x):
        return x * scales + locs

    # run
    minimizer = lbfgs.LBFGS(nlopt_objective,
                            fprime,
                            p0_scaled,
                            unscaler=unscaler)
    options = {
        'factr': 100000.0,
        'gtol': 1e-05,
        'max_iter': 1000,
        'test_convergence': False
    }
    result = minimizer.run_nlopt(**options)

    # the best fit values
    d = minimizer.data
    final_p0 = d['curr_state'].X
    p0 = final_p0 * scales + locs

    # initialize the sampler
    ndim = len(p0)
    p0 = numpy.array(
        [p0 + 1e-3 * numpy.random.randn(ndim) for i in range(walkers)])
    sampler = emcee.EnsembleSampler(walkers, ndim, mcmc_objective)

    # iterator interface allows us to tap ctrl+c and know where we are
    niters = iterations
    burnin = 0

    # do the sampling
    with ChainManager(sampler, niters, walkers, free_names, None) as manager:
        for niter, result in manager.sample(p0, None):

            # check if we need to exit due to exception/convergence
            manager.check_status()

            # update progress and test convergence
            manager.update_progress(niter)

    # re-order the chain to the sorted parameter order
    order = []
    for name in theory.free_names:
        order.append(free_names.index(name))
    try:
        sampler.chain = sampler.chain[..., order]
    except:
        if hasattr(sampler, '_chain'):
            sampler._chain = sampler.chain[..., order]
        else:
            raise

    # create and save the result
    result = EmceeResults(sampler, theory, burnin)
    print(result)
    result.to_npz(output)