コード例 #1
0
def run_chi_mcmc_redux(obs, initial, group_class, mcmc_args):
    # This is just the version of the mcmc without the C2 fitting.
    ### Group Bounds are still important

    print("... loading arch libs")
    arch_path = "/Users/MasterD/Google Drive/CCSLab/dev/arch_libs/interp/arch_spec_interp.pkl"
    arch_spec_int = pkl.load(open(arch_path, 'rb'))

    group_dict = {
        "GI": {
            'FEH': -2.5,
            "CARBON": 7.9
        },
        "GII": {
            'FEH': -3.5,
            "CARBON": 5.9
        },
        'GIII': {
            'FEH': -4.3,
            "CARBON": 7.0
        }
    }

    group_bounds = {
        'GI': {
            "T": [4000, 5000],
            "FEH": [-3.5, -1.0],
            'CARBON': [7.0, 9.0]
        },
        'GII': {
            "T": [4000, 5000],
            "FEH": [-4.5, -2.0],
            'CARBON': [-1.0, 1.5]
        },
        'GIII': {
            "T": [4000, 5000],
            "FEH": [-4.5, -3.0],
            'CARBON': [6.0, 7.5]
        }
    }

    print("MCMC params")

    mcmc_args['bounds'] = group_bounds[group_class.split("_")[0]]

    print("Computing beta params")
    CAII_BETA = get_beta_params(obs, [3884, 3923])
    CH_BETA = get_beta_params(obs, [4222, 4322])

    mcmc_args['CAII_alpha'] = CAII_BETA['alpha']
    mcmc_args['CAII_beta'] = CAII_BETA['beta']

    mcmc_args['CH_alpha'] = CH_BETA['alpha']
    mcmc_args['CH_beta'] = CH_BETA['beta']

    pos = initial + 1e-2 * np.random.randn(25, len(initial))
    nwalkers, ndim = pos.shape

    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    chi_likelihood_redux,
                                    args=(obs, arch_spec_int[group_class],
                                          mcmc_args))

    _ = sampler.run_mcmc(pos, 1500)

    return sampler, ndim, mcmc_args
コード例 #2
0
def preform_emcee(time, flux, sigma_sq, ROW):
    diff_time = [x - time[i - 1] for i, x in enumerate(time)][1:]
    print(min(diff_time))
    plt.figure()
    plt.errorbar(time, flux, err)
    plt.savefig('figure/' + str(ROW) + 'LC' + '.pdf')
    #plt.show()

    X = np.arange(-1, 5, .1)  #tau
    Y = np.arange(-2.5, 1.5, .1)  #variance
    X, Y = np.meshgrid(X, Y)
    lprob_dens = lnprob_dens((Y, X), time, flux, err)
    fig = plt.figure()
    lprob_dens = np.array(lprob_dens)
    plt.pcolormesh(X,
                   Y,
                   lprob_dens.reshape(X.shape),
                   shading='gouraud',
                   cmap=cm.rainbow)
    cbar = plt.colorbar()
    cbar.set_label('log(probability)')
    plt.xlabel("Tau")
    plt.ylabel("Variance")
    plt.savefig('figure/' + str(ROW) + 'logprob_density_norm' + '.pdf')
    #plt.show()

    nll = lambda *args: -lnlike(*args)
    ndim, nwalkers = 2, 100

    if sys.argv[5].lower() == 'normal':
        result = [np.log10(V), np.log10(Tau)]
        pos = [
            result + (-0.5 + np.random.randn(ndim)) for i in range(nwalkers)
        ]
    elif sys.argv[5].lower() == 'grid':
        v_grid = np.arange(-1, 0, 0.1)
        t_grid = np.arange(1, 2, 0.1)
        VG, TG = np.meshgrid(v_grid, t_grid)
        result = [
            np.array(thing) for thing in zip(VG.flatten(), TG.flatten())
        ]  # for python 2.7
        pos = [
            result[i] + 1e-7 * np.random.randn(ndim) for i in range(nwalkers)
        ]
    elif sys.argv[5].lower() == 'optimal':
        result = op.minimize(nll, [np.log10(V), np.log10(Tau)],
                             args=(time, flux, err**2))
        pos = [
            result['x'] + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)
        ]
    else:
        print("What the hell do you want to do?")
        print("'grid', 'optimal', or 'normal' search through MCMC?")
        exit()

    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    args=(time, flux, err**2))
    print(np.array(pos).shape)
    sampler.run_mcmc(pos, 100)
    samples = sampler.chain[:, 20:, :].reshape((-1, ndim))

    plt.figure()
    plt.plot(logprobs)
    plt.savefig('figure/' + str(ROW) + sys.argv[5] + 'logprob' + '.pdf')
    #plt.show()

    max_theta = logvals[logprobs.index(max(logprobs))]

    fig = corner.corner(samples,
                        labels=[r"log$_{10}V$", r"log$_{10}\tau$"],
                        truths=[max_theta[0], max_theta[1]])
    fig.savefig("figure/" + str(ROW) + sys.argv[5] + "triangle_np.pdf")

    V_mcmc, Tau_mcmc = map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
                           zip(*np.percentile(samples, [16, 50, 84], axis=0)))
    print('V_mcmc:', V_mcmc, 'Tau_mcmc:', Tau_mcmc, max_theta[0], max_theta[1])
    print('ROW:', ROW, 'Tau:', str(max_theta[1]), 'V:', str(max_theta[0]))
    filename = 'scratch_new/' + str(ROW) + sys.argv[5] + 'object' + '.txt'
    with open(filename, 'w+') as fout:
        fout.write('Object: ' + str(ROW) + ' ' + 'Tau: ' + str(max_theta[1]) +
                   ' ' + 'V: ' + str(max_theta[0]) + '\n')

    sausageplot(max_theta[0], time, flux, max_theta[1], 5, err**2, ROW)
コード例 #3
0
    if not np.isfinite(log_pr):
        return -np.inf
    return log_pr - log_likelihood(parameter, x, y, sigma_y)

# minimizing the -ln L i.e maximizing the  L ,which is objective fun. here
from scipy.optimize import minimize
guess = (1. , 1. ,1.)
soln = minimize(log_likelihood , guess , args=(x, y, sigma_y))

# initializing the Markov Chains of parameters
nwalkers, ndim = 50, 3
pos = soln.x + 1e-5 * np.random.randn(nwalkers, ndim)

# MCMC through emcee lib.
import emcee
sampling_tool = emcee.EnsembleSampler(nwalkers , ndim , 
                                log_posterior , args=(x, y, sigma_y))
sampling_tool.run_mcmc(pos, 4000)
samples = sampling_tool.get_chain()

# Calculating the best fitted value i.e. mean of posterior PDF
a_best=np.median(samples[:,:,0])
b_best=np.median(samples[:,:,1])
c_best=np.median(samples[:,:,2])

# Calculating the one-sigma uncertainties i.e. Standard Deviation
# of posterior PDF
one_sigma_a=np.std(samples[:,:,0])
one_sigma_b=np.std(samples[:,:,1])
one_sigma_c=np.std(samples[:,:,2])

コード例 #4
0
def lnprob(params):
    gp.set_parameter_vector(params)
    lp = lnprior(params)
    # lp = gp.log_prior()
    if not np.isfinite(lp):
        return -np.inf
    return gp.log_likelihood(y) + lp


import emcee
initial = gp.get_parameter_vector()
# initial = np.array(initial_params)
# initial = np.array(soln.x)
ndim, nwalkers = len(initial), 32
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, threads=14)

import time
time_start = time.time()

print("Running first burn-in...")
pos = initial + 1e-4 * np.random.randn(nwalkers, ndim)
pos, prob, _ = sampler.run_mcmc(pos, 3000)

print("Running second burn-in...")
pos = pos[np.argmax(prob)] + 1e-4 * np.random.randn(nwalkers, ndim)
pos, prob, _ = sampler.run_mcmc(pos, 2000)

# print("Running third burn-in...")
# pos = pos[np.argmax(prob)] + 1e-8 * np.random.randn(nwalkers, ndim)
# pos, prob, _  = sampler.run_mcmc(pos, 2000)
コード例 #5
0
    return -0.5 * chisq_sn_cmb([omgM, h, gamma0, gamma1, sigma8])


def lnprob_sn_bao(pars):
    lp = lnprior(pars)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike_sn_bao(pars)


ndim, nwalkers, nsteps = 5, 50, 1000
pos = [[omgM_sn_cmb, h_sn_cmb, gamma0_sn_cmb, gamma1_sn_cmb, sigma8_sn_cmb] +
       1e-4 * np.random.randn(ndim) for i in range(nwalkers)]

# MCMC chain with 50 walkers and 1000 steps
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob_sn_bao, threads=4)
sampler.run_mcmc(pos, nsteps)

# Getting chains
omgM_sn_cmb_chain = sampler.chain[:, :, 0]
h_sn_cmb_chain = sampler.chain[:, :, 1]
gamma0_sn_cmb_chain = sampler.chain[:, :, 2]
gamma1_sn_cmb_chain = sampler.chain[:, :, 3]
sigma8_sn_cmb_chain = sampler.chain[:, :, 4]

# Average and standard deviation between chains
h_sn_cmb_chain_mean = np.mean(h_sn_cmb_chain, axis=0)
h_sn_cmb_chain_std = np.std(h_sn_cmb_chain, axis=0) / np.sqrt(nwalkers)

# Reshaping
omgM_sn_cmb_chain_flat = np.reshape(omgM_sn_cmb_chain, (nwalkers * nsteps, ))
コード例 #6
0
ファイル: event_optimize.py プロジェクト: pennucci/PINT
def main(argv=None):

    parser = argparse.ArgumentParser(
        description=
        "PINT tool for MCMC optimization of timing models using event data.")

    parser.add_argument("eventfile", help="event file to use")
    parser.add_argument("parfile", help="par file to read model from")
    parser.add_argument("gaussianfile",
                        help="gaussian file that defines template")
    parser.add_argument("--ft2", help="Path to FT2 file.", default=None)
    parser.add_argument(
        "--weightcol",
        help="name of weight column (or 'CALC' to have them computed",
        default=None,
    )
    parser.add_argument("--nwalkers",
                        help="Number of MCMC walkers (def 200)",
                        type=int,
                        default=200)
    parser.add_argument(
        "--burnin",
        help="Number of MCMC steps for burn in (def 100)",
        type=int,
        default=100,
    )
    parser.add_argument(
        "--nsteps",
        help="Number of MCMC steps to compute (def 1000)",
        type=int,
        default=1000,
    )
    parser.add_argument("--minMJD",
                        help="Earliest MJD to use (def 54680)",
                        type=float,
                        default=54680.0)
    parser.add_argument("--maxMJD",
                        help="Latest MJD to use (def 57250)",
                        type=float,
                        default=57250.0)
    parser.add_argument("--phs",
                        help="Starting phase offset [0-1] (def is to measure)",
                        type=float)
    parser.add_argument("--phserr",
                        help="Error on starting phase",
                        type=float,
                        default=0.03)
    parser.add_argument(
        "--minWeight",
        help="Minimum weight to include (def 0.05)",
        type=float,
        default=0.05,
    )
    parser.add_argument(
        "--wgtexp",
        help=
        "Raise computed weights to this power (or 0.0 to disable any rescaling of weights)",
        type=float,
        default=0.0,
    )
    parser.add_argument(
        "--testWeights",
        help="Make plots to evalute weight cuts?",
        default=False,
        action="store_true",
    )
    parser.add_argument(
        "--doOpt",
        help="Run initial scipy opt before MCMC?",
        default=False,
        action="store_true",
    )
    parser.add_argument(
        "--initerrfact",
        help=
        "Multiply par file errors by this factor when initializing walker starting values",
        type=float,
        default=0.1,
    )
    parser.add_argument(
        "--priorerrfact",
        help=
        "Multiple par file errors by this factor when setting gaussian prior widths",
        type=float,
        default=10.0,
    )
    parser.add_argument(
        "--usepickle",
        help="Read events from pickle file, if available?",
        default=False,
        action="store_true",
    )

    global nwalkers, nsteps, ftr

    args = parser.parse_args(argv)

    eventfile = args.eventfile
    parfile = args.parfile
    gaussianfile = args.gaussianfile
    weightcol = args.weightcol

    if args.ft2 is not None:
        # Instantiate Fermi observatory once so it gets added to the observatory registry
        get_satellite_observatory("Fermi", args.ft2)

    nwalkers = args.nwalkers
    burnin = args.burnin
    nsteps = args.nsteps
    if burnin >= nsteps:
        log.error("burnin must be < nsteps")
        sys.exit(1)
    nbins = 256  # For likelihood calculation based on gaussians file
    outprof_nbins = 256  # in the text file, for pygaussfit.py, for instance
    minMJD = args.minMJD
    maxMJD = args.maxMJD  # Usually set by coverage of IERS file

    minWeight = args.minWeight
    do_opt_first = args.doOpt
    wgtexp = args.wgtexp

    # Read in initial model
    modelin = pint.models.get_model(parfile)

    # The custom_timing version below is to manually construct the TimingModel
    # class, which allows it to be pickled. This is needed for parallelizing
    # the emcee call over a number of threads.  So far, it isn't quite working
    # so it is disabled.  The code above constructs the TimingModel class
    # dynamically, as usual.
    # modelin = custom_timing(parfile)

    # Remove the dispersion delay as it is unnecessary
    # modelin.delay_funcs['L1'].remove(modelin.dispersion_delay)
    # Set the target coords for automatic weighting if necessary
    if "ELONG" in modelin.params:
        tc = SkyCoord(
            modelin.ELONG.quantity,
            modelin.ELAT.quantity,
            frame="barycentrictrueecliptic",
        )
    else:
        tc = SkyCoord(modelin.RAJ.quantity,
                      modelin.DECJ.quantity,
                      frame="icrs")

    target = tc if weightcol == "CALC" else None

    # TODO: make this properly handle long double
    ts = None
    if args.usepickle:
        try:
            ts = toa.load_pickle(eventfile)
        except IOError:
            pass
    if ts is None:
        # Read event file and return list of TOA objects
        tl = fermi.load_Fermi_TOAs(eventfile,
                                   weightcolumn=weightcol,
                                   targetcoord=target,
                                   minweight=minWeight)
        # Limit the TOAs to ones in selected MJD range and above minWeight
        tl = [
            tl[ii] for ii in range(len(tl))
            if (tl[ii].mjd.value > minMJD and tl[ii].mjd.value < maxMJD and (
                weightcol is None or tl[ii].flags["weight"] > minWeight))
        ]
        log.info("There are %d events we will use" % len(tl))
        # Now convert to TOAs object and compute TDBs and posvels
        ts = toa.TOAs(toalist=tl)
        ts.filename = eventfile
        ts.compute_TDBs()
        ts.compute_posvels(ephem="DE421", planets=False)
        toa.save_pickle(ts)

    if weightcol is not None:
        if weightcol == "CALC":
            weights = np.asarray([x["weight"] for x in ts.table["flags"]])
            log.info("Original weights have min / max weights %.3f / %.3f" %
                     (weights.min(), weights.max()))
            # Rescale the weights, if requested (by having wgtexp != 0.0)
            if wgtexp != 0.0:
                weights **= wgtexp
                wmx, wmn = weights.max(), weights.min()
                # make the highest weight = 1, but keep min weight the same
                weights = wmn + ((weights - wmn) * (1.0 - wmn) / (wmx - wmn))
            for ii, x in enumerate(ts.table["flags"]):
                x["weight"] = weights[ii]
        weights = np.asarray([x["weight"] for x in ts.table["flags"]])
        log.info("There are %d events, with min / max weights %.3f / %.3f" %
                 (len(weights), weights.min(), weights.max()))
    else:
        weights = None
        log.info("There are %d events, no weights are being used." % ts.ntoas)

    # Now load in the gaussian template and normalize it
    gtemplate = read_gaussfitfile(gaussianfile, nbins)
    gtemplate /= gtemplate.mean()

    # Set the priors on the parameters in the model, before
    # instantiating the emcee_fitter
    # Currently, this adds a gaussian prior on each parameter
    # with width equal to the par file uncertainty * priorerrfact,
    # and then puts in some special cases.
    # *** This should be replaced/supplemented with a way to specify
    # more general priors on parameters that need certain bounds
    phs = 0.0 if args.phs is None else args.phs
    fitkeys, fitvals, fiterrs = get_fit_keyvals(modelin,
                                                phs=phs,
                                                phserr=args.phserr)

    for key, v, e in zip(fitkeys[:-1], fitvals[:-1], fiterrs[:-1]):
        if key == "SINI" or key == "E" or key == "ECC":
            getattr(modelin, key).prior = Prior(uniform(0.0, 1.0))
        elif key == "PX":
            getattr(modelin, key).prior = Prior(uniform(0.0, 10.0))
        elif key.startswith("GLPH"):
            getattr(modelin, key).prior = Prior(uniform(-0.5, 1.0))
        else:
            getattr(modelin, key).prior = Prior(
                norm(loc=float(v), scale=float(e * args.priorerrfact)))

    # Now define the requirements for emcee
    ftr = emcee_fitter(ts, modelin, gtemplate, weights, phs, args.phserr)

    # Use this if you want to see the effect of setting minWeight
    if args.testWeights:
        log.info("Checking H-test vs weights")
        ftr.prof_vs_weights(use_weights=True)
        ftr.prof_vs_weights(use_weights=False)
        sys.exit()

    # Now compute the photon phases and see if we see a pulse
    phss = ftr.get_event_phases()
    maxbin, like_start = marginalize_over_phase(phss,
                                                gtemplate,
                                                weights=ftr.weights,
                                                minimize=True,
                                                showplot=False)
    log.info("Starting pulse likelihood: %f" % like_start)
    if args.phs is None:
        fitvals[-1] = 1.0 - maxbin[0] / float(len(gtemplate))
        if fitvals[-1] > 1.0:
            fitvals[-1] -= 1.0
        if fitvals[-1] < 0.0:
            fitvals[-1] += 1.0
        log.info("Starting pulse phase: %f" % fitvals[-1])
    else:
        log.warning("Measured starting pulse phase is %f, but using %f" %
                    (1.0 - maxbin / float(len(gtemplate)), args.phs))
        fitvals[-1] = args.phs
    ftr.fitvals[-1] = fitvals[-1]
    ftr.phaseogram(plotfile=ftr.model.PSR.value + "_pre.png")
    plt.close()
    # ftr.phaseogram()

    # Write out the starting pulse profile
    vs, xs = np.histogram(ftr.get_event_phases(),
                          outprof_nbins,
                          range=[0, 1],
                          weights=ftr.weights)
    f = open(ftr.model.PSR.value + "_prof_pre.txt", "w")
    for x, v in zip(xs, vs):
        f.write("%.5f  %12.5f\n" % (x, v))
    f.close()

    # Try normal optimization first to see how it goes
    if do_opt_first:
        result = op.minimize(ftr.minimize_func, np.zeros_like(ftr.fitvals))
        newfitvals = np.asarray(result["x"]) * ftr.fiterrs + ftr.fitvals
        like_optmin = -result["fun"]
        log.info("Optimization likelihood: %f" % like_optmin)
        ftr.set_params(dict(zip(ftr.fitkeys, newfitvals)))
        ftr.phaseogram()
    else:
        like_optmin = -np.inf

    # Set up the initial conditions for the emcee walkers.  Use the
    # scipy.optimize newfitvals instead if they are better
    ndim = ftr.n_fit_params
    if like_start > like_optmin:
        # Keep the starting deviations small...
        pos = [
            ftr.fitvals +
            ftr.fiterrs * args.initerrfact * np.random.randn(ndim)
            for ii in range(nwalkers)
        ]
        # Set starting params
        for param in [
                "GLPH_1", "GLEP_1", "SINI", "M2", "E", "ECC", "PX", "A1"
        ]:
            if param in ftr.fitkeys:
                idx = ftr.fitkeys.index(param)
                if param == "GLPH_1":
                    svals = np.random.uniform(-0.5, 0.5, nwalkers)
                elif param == "GLEP_1":
                    svals = np.random.uniform(minMJD + 100, maxMJD - 100,
                                              nwalkers)
                    # svals = 55422.0 + np.random.randn(nwalkers)
                elif param == "SINI":
                    svals = np.random.uniform(0.0, 1.0, nwalkers)
                elif param == "M2":
                    svals = np.random.uniform(0.1, 0.6, nwalkers)
                elif param in ["E", "ECC", "PX", "A1"]:
                    # Ensure all positive
                    svals = np.fabs(ftr.fitvals[idx] + ftr.fiterrs[idx] *
                                    np.random.randn(nwalkers))
                    if param in ["E", "ECC"]:
                        svals[svals > 1.0] = 1.0 - (svals[svals > 1.0] - 1.0)
                for ii in range(nwalkers):
                    pos[ii][idx] = svals[ii]
    else:
        pos = [
            newfitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim)
            for i in range(nwalkers)
        ]
    # Set the 0th walker to have the initial pre-fit solution
    # This way, one walker should always be in a good position
    pos[0] = ftr.fitvals

    import emcee

    # Following are for parallel processing tests...
    if 0:

        def unwrapped_lnpost(theta, ftr=ftr):
            return ftr.lnposterior(theta)

        import pathos.multiprocessing as mp

        pool = mp.ProcessPool(nodes=8)
        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        unwrapped_lnpost,
                                        pool=pool,
                                        args=[ftr])
    else:
        sampler = emcee.EnsembleSampler(nwalkers, ndim, ftr.lnposterior)
    # The number is the number of points in the chain
    sampler.run_mcmc(pos, nsteps)

    def chains_to_dict(names, sampler):
        chains = [sampler.chain[:, :, ii].T for ii in range(len(names))]
        return dict(zip(names, chains))

    def plot_chains(chain_dict, file=False):
        npts = len(chain_dict)
        fig, axes = plt.subplots(npts, 1, sharex=True, figsize=(8, 9))
        for ii, name in enumerate(chain_dict.keys()):
            axes[ii].plot(chain_dict[name], color="k", alpha=0.3)
            axes[ii].set_ylabel(name)
        axes[npts - 1].set_xlabel("Step Number")
        fig.tight_layout()
        if file:
            fig.savefig(file)
            plt.close()
        else:
            plt.show()
            plt.close()

    chains = chains_to_dict(ftr.fitkeys, sampler)
    plot_chains(chains, file=ftr.model.PSR.value + "_chains.png")

    # Make the triangle plot.
    samples = sampler.chain[:, burnin:, :].reshape((-1, ndim))
    try:
        import corner

        fig = corner.corner(
            samples,
            labels=ftr.fitkeys,
            bins=50,
            truths=ftr.maxpost_fitvals,
            plot_contours=True,
        )
        fig.savefig(ftr.model.PSR.value + "_triangle.png")
        plt.close()
    except ImportError:
        pass

    # Make a phaseogram with the 50th percentile values
    # ftr.set_params(dict(zip(ftr.fitkeys, np.percentile(samples, 50, axis=0))))
    # Make a phaseogram with the best MCMC result
    ftr.set_params(dict(zip(ftr.fitkeys[:-1], ftr.maxpost_fitvals[:-1])))
    ftr.phaseogram(plotfile=ftr.model.PSR.value + "_post.png")
    plt.close()

    # Write out the output pulse profile
    vs, xs = np.histogram(ftr.get_event_phases(),
                          outprof_nbins,
                          range=[0, 1],
                          weights=ftr.weights)
    f = open(ftr.model.PSR.value + "_prof_post.txt", "w")
    for x, v in zip(xs, vs):
        f.write("%.5f  %12.5f\n" % (x, v))
    f.close()

    # Write out the par file for the best MCMC parameter est
    f = open(ftr.model.PSR.value + "_post.par", "w")
    f.write(ftr.model.as_parfile())
    f.close()

    # Print the best MCMC values and ranges
    ranges = map(
        lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
        zip(*np.percentile(samples, [16, 50, 84], axis=0)),
    )
    log.info("Post-MCMC values (50th percentile +/- (16th/84th percentile):")
    for name, vals in zip(ftr.fitkeys, ranges):
        log.info("%8s:" % name + "%25.15g (+ %12.5g  / - %12.5g)" % vals)

    # Put the same stuff in a file
    f = open(ftr.model.PSR.value + "_results.txt", "w")

    f.write("Post-MCMC values (50th percentile +/- (16th/84th percentile):\n")
    for name, vals in zip(ftr.fitkeys, ranges):
        f.write("%8s:" % name + " %25.15g (+ %12.5g  / - %12.5g)\n" % vals)

    f.write("\nMaximum likelihood par file:\n")
    f.write(ftr.model.as_parfile())
    f.close()

    from six.moves import cPickle as pickle

    pickle.dump(samples, open(ftr.model.PSR.value + "_samples.pickle", "wb"))
コード例 #7
0
ファイル: fit_sample.py プロジェクト: duncandc/galaxy_shapes
def main():

    nchunk = 25  # numnber of steps to take before reinitializing pool

    if len(sys.argv) > 1:
        sample = sys.argv[1]
    else:
        print(
            "The first positional argument must be the galaxy sample, e.g. 'sample_1'."
        )
        sys.exit()

    chain_dir = './chains/'

    # load parameters for sample
    _temp = __import__(sample + '_fitting_params')
    params = _temp.params

    # retreive parameters for the galaxy sample
    mag_lim = params['mag_lim'][0]
    ndim = params['ndim']
    nwalkers = params['nwalkers']
    nthreads = params['nthreads']
    nsteps = params['nsteps']

    # initialize walkers
    pos0 = [
        params['theta0'] + params['dtheta'] * np.random.randn(params['ndim'])
        for i in range(params['nwalkers'])
    ]

    # check multiprocessing arguments
    ncpu = cpu_count()
    print("Using {0} CPU cores out of a possible {1}.".format(nthreads, ncpu))

    # load sdss measurements
    t = Table.read(params['comparison_fname'], format='ascii')
    y = t['frequency']
    yerr = t['err']

    # Set up the backend
    # Don't forget to clear it in case the file already exists
    filename = chain_dir + sample + '_chain.hdf5'
    backend = emcee.backends.HDFBackend(filename)

    if params['continue_chain'] == False:
        backend.reset(nwalkers, ndim)
    else:
        print("Initial number of steps: {0}".format(backend.iteration))
        # retrieve final position of chains
        samples = backend.get_chain()
        pos0 = samples.T[:, :, -1].T

    # run first batch of steps
    print('starting initial pool...')
    pool = Pool(processes=nthreads)
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    backend=backend,
                                    args=(y, yerr, mag_lim),
                                    pool=pool)
    if nchunk > nsteps:
        nsteps0 = nsteps
    else:
        nsteps0 = nchunk
    sampler.run_mcmc(pos0, nsteps0, progress=True)
    print('closing pool...')
    pool.close()

    # loop through the remaining steps
    for i in range(1, nsteps // nchunk):
        print('starting new pool...')
        pool = Pool(processes=nthreads)
        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        lnprob,
                                        backend=backend,
                                        args=(y, yerr, mag_lim),
                                        pool=pool)
        sampler.run_mcmc(None, nchunk, progress=True)
        print('closing pool...')
        pool.close()

    # take ramaining steps
    if nchunk > nsteps:
        nremainder = 0.0
    else:
        nremainder = nsteps % nchunk

    if nremainder > 0:
        print('starting new pool...')
        pool = Pool(processes=nthreads)
        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        lnprob,
                                        backend=backend,
                                        args=(y, yerr, mag_lim),
                                        pool=pool)
        sampler.run_mcmc(None, nremainder, progress=True)
        print('closing pool...')
        pool.close()

    print("Final number of steps: {0}".format(backend.iteration))
コード例 #8
0
def mcmc_negfc_sampling(cube,
                        angs,
                        psfn,
                        ncomp,
                        plsc,
                        initial_state,
                        fwhm=4,
                        annulus_width=8,
                        aperture_radius=1,
                        cube_ref=None,
                        svd_mode='lapack',
                        scaling='temp-mean',
                        algo=pca_annulus,
                        delta_rot=1,
                        fmerit='sum',
                        imlib='opencv',
                        interpolation='lanczos4',
                        collapse='median',
                        nwalkers=1000,
                        bounds=None,
                        a=2.0,
                        burnin=0.3,
                        rhat_threshold=1.01,
                        rhat_count_threshold=1,
                        niteration_min=0,
                        niteration_limit=1e2,
                        niteration_supp=0,
                        check_maxgap=1e4,
                        nproc=1,
                        output_dir='results/',
                        output_file=None,
                        display=False,
                        verbosity=0,
                        save=False):
    r""" Runs an affine invariant mcmc sampling algorithm in order to determine
    the position and the flux of the planet using the 'Negative Fake Companion'
    technique. The result of this procedure is a chain with the samples from the
    posterior distributions of each of the 3 parameters.
    
    This technique can be summarized as follows:
    1) We inject a negative fake companion (one candidate) at a given position
    and characterized by a given flux, both close to the expected values.
    2) We run PCA on an full annulus which pass through the initial guess,
    regardless of the position of the candidate.
    3) We extract the intensity values of all the pixels contained in a
    circular aperture centered on the initial guess.
    4) We calculate the function of merit. The associated chi^2 is given by
    chi^2 = sum(\|I_j\|) where j \in {1,...,N} with N the total number of pixels
    contained in the circular aperture.
    The steps 1) to 4) are looped. At each iteration, the candidate model
    parameters are defined by the emcee Affine Invariant algorithm.
    
    Parameters
    ----------
    cube: numpy.array
        ADI fits cube.
    angs: numpy.array
        The parallactic angle vector.
    psfn: numpy.array
        PSF array. The PSF must be centered and the flux in a 1*FWHM aperture
        must equal 1 (use ``vip_hci.phot.psf_norm``).
    ncomp: int
        The number of principal components.
    plsc: float
        The platescale, in arcsec per pixel.
    annulus_width: float, optional
        The width in pixels of the annulus on which the PCA is performed.
    aperture_radius: float, optional
        The radius in FWHM of the circular aperture.
    nwalkers: int optional
        The number of Goodman & Weare 'walkers'.
    initial_state: numpy.array
        The first guess for the position and flux of the planet, respectively.
        Each walker will start in a small ball around this preferred position.
    cube_ref : numpy ndarray, 3d, optional
        Reference library cube. For Reference Star Differential Imaging.
    svd_mode : {'lapack', 'randsvd', 'eigen', 'arpack'}, str optional
        Switch for different ways of computing the SVD and selected PCs.
        'randsvd' is not recommended for the negative fake companion technique.
    scaling : {'temp-mean', 'temp-standard'} or None, optional
        With None, no scaling is performed on the input data before SVD. With
        "temp-mean" then temporal px-wise mean subtraction is done and with
        "temp-standard" temporal mean centering plus scaling to unit variance
        is done.
    fmerit : {'sum', 'stddev'}, string optional
        Chooses the figure of merit to be used. stddev works better for close in
        companions sitting on top of speckle noise.
    imlib : str, optional
        See the documentation of the ``vip_hci.preproc.frame_rotate`` function.
    interpolation : str, optional
        See the documentation of the ``vip_hci.preproc.frame_rotate`` function.
    collapse : {'median', 'mean', 'sum', 'trimmean', None}, str or None, optional
        Sets the way of collapsing the frames for producing a final image. If
        None then the cube of residuals is used when measuring the function of
        merit (instead of a single final frame).
    bounds: numpy.array or list, default=None, optional
        The prior knowledge on the model parameters. If None, large bounds will
        be automatically estimated from the initial state.
    a: float, default=2.0
        The proposal scale parameter. See notes.
    burnin: float, default=0.3
        The fraction of a walker which is discarded.
    rhat_threshold: float, default=0.01
        The Gelman-Rubin threshold used for the test for nonconvergence.
    rhat_count_threshold: int, optional
        The Gelman-Rubin test must be satisfied 'rhat_count_threshold' times in
        a row before claiming that the chain has converged.
    niteration_min: int, optional
        Steps per walker lower bound. The simulation will run at least this
        number of steps per walker.
    niteration_limit: int, optional
        Steps per walker upper bound. If the simulation runs up to
        'niteration_limit' steps without having reached the convergence
        criterion, the run is stopped.
    niteration_supp: int, optional
        Number of iterations to run after having "reached the convergence".
    check_maxgap: int, optional
        Maximum number of steps per walker between two Gelman-Rubin test.
    nproc: int, optional
        The number of processes to use for parallelization.
    output_dir: str, optional
        The name of the output directory which contains the output files in the 
        case  ``save`` is True.        
    output_file: str, optional
        The name of the output file which contains the MCMC results in the case
        ``save`` is True.
    display: bool, optional
        If True, the walk plot is displayed at each evaluation of the Gelman-
        Rubin test.
    verbosity: 0, 1 or 2, optional
        Verbosity level. 0 for no output and 2 for full information.
    save: bool, optional
        If True, the MCMC results are pickled.
                    
    Returns
    -------
    out : numpy.array
        The MCMC chain.
        
    Notes
    -----
    The parameter ``a`` must be > 1. For more theoretical information concerning
    this parameter, see Goodman & Weare, 2010, Comm. App. Math. Comp. Sci.,
    5, 65, Eq. [9] p70.
    
    The parameter 'rhat_threshold' can be a numpy.array with individual
    threshold value for each model parameter.
    """
    if verbosity == 1 or verbosity == 2:
        start_time = time_ini()
        print("        MCMC sampler for the NEGFC technique       ")
        print(sep)

    # If required, one create the output folder.
    if save:

        output_file_tmp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

        if output_dir[-1] == '/':
            output_dir = output_dir[:-1]
        try:
            os.makedirs(output_dir)
        except OSError as exc:
            if exc.errno == 17 and os.path.isdir(output_dir):
                # errno.EEXIST == 17 -> File exists
                pass
            else:
                raise

    if not isinstance(cube, np.ndarray) or cube.ndim != 3:
        raise ValueError('`cube` must be a 3D numpy array')

    if cube_ref is not None:
        if not isinstance(cube_ref, np.ndarray) or cube_ref.ndim != 3:
            raise ValueError('`cube_ref` must be a 3D numpy array')

    # #########################################################################
    # Initialization of the variables
    # #########################################################################
    dim = 3  # There are 3 model parameters: rad, theta, flux
    itermin = niteration_min
    limit = niteration_limit
    supp = niteration_supp
    maxgap = check_maxgap
    initial_state = np.array(initial_state)

    if itermin > limit:
        itermin = 0

    fraction = 0.3
    geom = 0
    lastcheck = 0
    konvergence = np.inf
    rhat_count = 0
    chain = np.empty([nwalkers, 1, dim])
    isamples = np.empty(0)
    pos = initial_state + np.random.normal(0, 1e-1, (nwalkers, 3))
    nIterations = limit + supp
    rhat = np.zeros(dim)
    stop = np.inf

    if bounds is None:
        bounds = [
            (initial_state[0] - annulus_width / 2.,
             initial_state[0] + annulus_width / 2.),  # radius
            (initial_state[1] - 10, initial_state[1] + 10),  # angle
            (0, 2 * initial_state[2])
        ]  # flux

    sampler = emcee.EnsembleSampler(nwalkers,
                                    dim,
                                    lnprob,
                                    a,
                                    args=([
                                        bounds, cube, angs, plsc, psfn, fwhm,
                                        annulus_width, ncomp, aperture_radius,
                                        initial_state, cube_ref, svd_mode,
                                        scaling, algo, delta_rot, fmerit,
                                        imlib, interpolation, collapse
                                    ]),
                                    threads=nproc)
    start = datetime.datetime.now()

    # #########################################################################
    # Affine Invariant MCMC run
    # #########################################################################
    if verbosity == 2:
        print('\nStart of the MCMC run ...')
        print(
            'Step  |  Duration/step (sec)  |  Remaining Estimated Time (sec)')

    for k, res in enumerate(
            sampler.sample(pos, iterations=nIterations, storechain=True)):
        elapsed = (datetime.datetime.now() - start).total_seconds()
        if verbosity == 2:
            if k == 0:
                q = 0.5
            else:
                q = 1
            print('{}\t\t{:.5f}\t\t\t{:.5f}'.format(
                k, elapsed * q,
                elapsed * (limit - k - 1) * q))

        start = datetime.datetime.now()

        # ---------------------------------------------------------------------
        # Store the state manually in order to handle with dynamical sized chain
        # ---------------------------------------------------------------------
        # Check if the size of the chain is long enough.
        s = chain.shape[1]
        if k + 1 > s:  # if not, one doubles the chain length
            empty = np.zeros([nwalkers, 2 * s, dim])
            chain = np.concatenate((chain, empty), axis=1)
        # Store the state of the chain
        chain[:, k] = res[0]

        # ---------------------------------------------------------------------
        # If k meets the criterion, one tests the non-convergence.
        # ---------------------------------------------------------------------
        criterion = np.amin([
            np.ceil(itermin * (1 + fraction)**geom),
            lastcheck + np.floor(maxgap)
        ])
        if k == criterion:
            if verbosity == 2:
                print('\n   Gelman-Rubin statistic test in progress ...')

            geom += 1
            lastcheck = k
            if display:
                show_walk_plot(chain)

            if save:
                import pickle
                fname = '{d}/{f}_temp_k{k}'.format(d=output_dir,
                                                   f=output_file_tmp,
                                                   k=k)
                data = {
                    'chain': sampler.chain,
                    'lnprob': sampler.lnprobability,
                    'AR': sampler.acceptance_fraction
                }
                with open(fname, 'wb') as fileSave:
                    pickle.dump(data, fileSave)

            # We only test the rhat if we have reached the min # of steps
            if (k + 1) >= itermin and konvergence == np.inf:
                thr0 = int(np.floor(burnin * k))
                thr1 = int(np.floor((1 - burnin) * k * 0.25))

                # We calculate the rhat for each model parameter.
                for j in range(dim):
                    part1 = chain[:, thr0:thr0 + thr1, j].reshape(-1)
                    part2 = chain[:, thr0 + 3 * thr1:thr0 + 4 * thr1,
                                  j].reshape(-1)
                    series = np.vstack((part1, part2))
                    rhat[j] = gelman_rubin(series)
                if verbosity == 1 or verbosity == 2:
                    print('   r_hat = {}'.format(rhat))
                    cond = rhat <= rhat_threshold
                    print('   r_hat <= threshold = {} \n'.format(cond))
                # We test the rhat.
                if (rhat <= rhat_threshold).all():
                    rhat_count += 1
                    if rhat_count < rhat_count_threshold:
                        if verbosity == 1 or verbosity == 2:
                            msg = "Gelman-Rubin test OK {}/{}"
                            print(msg.format(rhat_count, rhat_count_threshold))
                    elif rhat_count >= rhat_count_threshold:
                        if verbosity == 1 or verbosity == 2:
                            print('... ==> convergence reached')
                        konvergence = k
                        stop = konvergence + supp
                else:
                    rhat_count = 0

        # We have reached the maximum number of steps for our Markov chain.
        if k + 1 >= stop:
            if verbosity == 1 or verbosity == 2:
                print('We break the loop because we have reached convergence')
            break

    if k == nIterations - 1:
        if verbosity == 1 or verbosity == 2:
            print("We have reached the limit # of steps without convergence")

    # #########################################################################
    # Construction of the independent samples
    # #########################################################################
    temp = np.where(chain[0, :, 0] == 0.0)[0]
    if len(temp) != 0:
        idxzero = temp[0]
    else:
        idxzero = chain.shape[1]

    idx = int(np.amin([np.floor(2e5 / nwalkers), np.floor(0.1 * idxzero)]))
    if idx == 0:
        isamples = chain[:, 0:idxzero, :]
    else:
        isamples = chain[:, idxzero - idx:idxzero, :]

    if save:
        import pickle
        frame = inspect.currentframe()
        args, _, _, values = inspect.getargvalues(frame)
        input_parameters = {j: values[j] for j in args[1:]}

        output = {
            'isamples': isamples,
            'chain': chain_zero_truncated(chain),
            'input_parameters': input_parameters,
            'AR': sampler.acceptance_fraction,
            'lnprobability': sampler.lnprobability
        }

        if output_file is None:
            output_file = 'MCMC_results'
        with open(output_dir + '/' + output_file, 'wb') as fileSave:
            pickle.dump(output, fileSave)

        msg = "\nThe file MCMC_results has been stored in the folder {}"
        print(msg.format(output_dir + '/'))

    if verbosity == 1 or verbosity == 2:
        timing(start_time)

    return chain_zero_truncated(chain)
コード例 #9
0
#plot.show()
#quit()
#
nDim = 3 + nTemplates_eD
nWalkers = 500

p0 = [coefficients + 5e-4 * np.random.randn(nDim) for i in range(nWalkers)]
for walker in p0:
    for idx, par in enumerate(walker):
        if par <= 0:
            walker[idx] = 1

sampler = emcee.EnsembleSampler( nWalkers, nDim, lnprob, 
                                kwargs={'observables': observedTOF,
                                        'standoffs': standoffs,
                                        'tofbinnings': tofRunBins,
                                        'tofranges': tof_range,
                                        'templates': shapeTemplates},
                                        threads=8)
fout = open('burninchain.dat','w')


burninSteps = 10000
for i,samplerOut in enumerate(sampler.sample(p0, iterations=burninSteps)):
    burninPos, burninProb, burninRstate = samplerOut
    if i%50 == 0:
        print('burn-in step {} of {}'.format(i, burninSteps))
    if i%10 == 0: # only save every 10th step
        fout = open('burninchain.dat','a')
        for k in range(burninPos.shape[0]):
            fout.write('{} {} {}\n'.format(k, burninPos[k], burninProb[k]))
コード例 #10
0

if ifmcmc:
    print("enabling Ensemble sampler.")
    # pos0=[para_guess + 1.0e-7*np.random.randn(ndim) for j in range(nwalkers)]
    pos0 = [
        np.array([
            np.random.uniform(low=para_limits[idim][0],
                              high=para_limits[idim][1])
            for idim in range(ndim)
        ]) for iwalker in range(nwalkers)
    ]

    with Pool() as pool:
        sampler = emcee.EnsembleSampler(
            nwalkers, ndim, lnpost,
            pool=pool)  #, args=(para_limits, obj_obs, xpdv, ypdv))

        # # burn-in
        print("start burning in. nburn:", nburn)
        for j, result in enumerate(
                sampler.sample(pos0, iterations=nburn, thin=10)):
            display_bar(j, nburn)
            pass
        sys.stdout.write("\n")
        pos, _, _ = result
        sampler.reset()

        # actual iteration
        print("start iterating. nsteps:", nsteps)
        for j, result in enumerate(sampler.sample(pos, iterations=nsteps)):
コード例 #11
0
def run_emcee_sampler(lnprobf, initial_center, model, verbose=True,
                      postargs=[], postkwargs={}, prob0=None,
                      nwalkers=None, nburn=[16], niter=32,
                      walker_factor=4,
                      nthreads=1, pool=None, hdf5=None, interval=1,
                      **kwargs):
    """Run an emcee sampler, including iterations of burn-in and re -
    initialization.  Returns the production sampler.

    :param lnprobfn:
        The posterior probability function.

    :param initial_center:
        The initial center for the sampler ball

    :param model:
        An instance of a models.ProspectorParams object.

    :param postargs:
        Positional arguments for ``lnprobfn``.

    :param postkwargs:
        Keyword arguments for ``lnprobfn``.

    :param nwalkers:
        The number of walkers to use.  If None, use the nearest power of two to
        ``ndim * walker_factor``.

    :param niter:
        Number of iterations for the production run

    :param nburn:
        List of the number of iterations to run in each round of brun-in (for
        removing stuck walkers)

    :param pool: (optional)
        A ``Pool`` object, either from ``multiprocessing`` or from
        ``emcee.mpi_pool``.

    :param hdf5: (optional)
        H5py.File object that will be used to store the chain in the datasets
        ``"chain"`` and ``"lnprobability"``.  If not set, the chin will instead
        be stored as a numpy array in the returned sampler object

    :param interval:
        Fraction of the full run at which to flush to disk, if using hdf5 for
        output.
    """
    # Get dimensions
    ndim = model.ndim
    if nwalkers is None:
        nwalkers = int(2 ** np.round(np.log2(ndim * walker_factor)))
    if verbose:
        print('number of walkers={}'.format(nwalkers))

    # Initialize sampler
    esampler = emcee.EnsembleSampler(nwalkers, ndim, lnprobf,
                                     args=postargs, kwargs=postkwargs,
                                     threads=nthreads, pool=pool)
    # Burn in sampler
    initial, in_cent, in_prob = emcee_burn(esampler, initial_center, nburn, model,
                                           prob0=prob0, verbose=verbose, **kwargs)
    # Production run
    esampler.reset()
    if hdf5 is not None:
        # Set up hdf5 backend
        sdat = hdf5.create_group('sampling')
        chain = sdat.create_dataset("chain", (nwalkers, niter, ndim))
        lnpout = sdat.create_dataset("lnprobability", (nwalkers, niter))
        # blob = hdf5.create_dataset("blob")
        storechain = False
    else:
        storechain = True

    # Main loop over iterations of the MCMC sampler
    if verbose:
        print('starting production')
    for i, result in enumerate(esampler.sample(initial, iterations=niter,
                                               storechain=storechain)):
        if hdf5 is not None:
            chain[:, i, :] = result[0]
            lnpout[:, i] = result[1]
            if (np.mod(i+1, int(interval*niter)) == 0) or (i+1 == niter):
                # do stuff every once in awhile
                # this would be the place to put some callback functions
                # e.g. [do(result, i, esampler) for do in things_to_do]
                # like, should probably store the random state too.
                hdf5.flush()
    if verbose:
        print('done production')

    return esampler, in_cent, in_prob
コード例 #12
0
ファイル: fit_massprof.py プロジェクト: alishakundert/bmpmod
def fit_mcmc(ne_data,
             tspec_data,
             nemodel,
             clustermeta,
             ml_results,
             Ncores=params.Ncores,
             Nwalkers=params.Nwalkers,
             Nsteps=params.Nsteps,
             Nburnin=params.Nburnin):
    '''
    Perform a MCMC analysis on the free parameters of the cluster total
    gravitating mass model, utilizing the ensemble sampler of emcee.

    Args:
    -----

    ne_data (astropy table): observed gas density profile
      in the form established by set_prof_data()
    tspec_data (astropy table): observed temperature profile
      in the form established by set_prof_data()

    nemodel (dictionary): dictionary storing the gas density profile model as
        output in fit_density()
    clustermeta (dictionary): dictionary of cluster and analysis info produced
        by set_prof_data()

    ml_results (array): maximum-likelihood parameter estimation for mass model
        free params of the form [c_ml, rs_ml, normsersic_ml]

    Ncores (int): number of cores overwhich to run MCMC analysis
    Nwalkers (int): number of MCMC ensemble walkers
    Nsteps (int): number of steps each walker takes
    Nburnin (int): number of steps considered to be a part of the burn-in
        period of the chain; these burn-in steps will be excluded from the
        final MCMC parameter estimation

    Returns:
    --------
    samples (array): MCMC samples of posterior distribution; of the form:
                col 1: c
                col 2: rs
                col 3: log(normsersic)
            NB: length of samples array set by Nwalkers * Nsteps

    References:
    -----------
    emcee: https://github.com/dfm/emcee
        + general setup for using emcee to fit a model to data:
            http://dfm.io/emcee/current/user/line/

    '''

    # initialize walkers - result comes from ML fit before

    if clustermeta['incl_mstar'] == 1:
        ndim, nwalkers = 3, Nwalkers
    elif clustermeta['incl_mstar'] == 0:
        ndim, nwalkers = 2, Nwalkers

    pos = [ml_results + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]

    # sampler
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnprob,
                                    args=(tspec_data['radius'],
                                          tspec_data['tspec'],
                                          tspec_data['tspec_err'], ne_data,
                                          tspec_data, nemodel, clustermeta),
                                    threads=Ncores)
    # WHY ARE THE ARGS THE WAY THEY ARE???

    # # run ensemble sampler for given number of steps
    # start=time.time()
    # sampler.run_mcmc(pos, Nsteps)
    # end=time.time()
    # print end-start

    for i, result in enumerate(sampler.sample(pos, iterations=Nsteps)):
        if 100. * ((float(i + 1.)) / Nsteps) % 10 == 0:
            print 'MCMC progress: ' + "{0:5.1%}".format(float(i + 1.) / Nsteps)

    samples = sampler.chain[:, Nburnin:, :].reshape((-1, ndim))
    # length of samples = walkers*steps

    # check acceptance rate: goal between 0.2-0.5
    # print 'acceptance rate of walkers:'
    # print sampler.acceptance_fraction

    # check autocorrelation time
    try:
        print 'autocorrelation time:', sampler.acor
    except:
        print 'autocorrelation time cannot be calculated'
    print ''

    # print emcee.autocorr.integrated_time()

    return samples, sampler
コード例 #13
0
    def train(self, X, y, do_optimize=True, **kwargs):
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper)

        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
            if self.y_std == 0:
                raise ValueError("Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel.pars) + 1,
                                            self.loglikelihood)
            sampler.random_state = self.rng.get_state()
            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            self.hypers = self.gp.kernel[:].tolist()
            self.hypers.append(self.noise)
            self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.pars = np.exp(sample[:-1])
            noise = np.exp(sample[-1])
            model = GaussianProcess(kernel,
                                    normalize_output=self.normalize_output,
                                    normalize_input=self.normalize_input,
                                    noise=noise,
                                    lower=self.lower,
                                    upper=self.upper,
                                    rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True
コード例 #14
0
    def __init__(self,
                 X,
                 Y,
                 Sigma,
                 theta0,
                 Niter=100,
                 covfunction=covariance.SquaredExponential,
                 Xstar=None,
                 cXstar=None,
                 mu=None,
                 muargs=(),
                 prior=None,
                 priorargs=(),
                 scale0=None,
                 a=2.0,
                 threads=1,
                 nacor=10,
                 nsample=50,
                 sampling='True'):

        if (scale0 != None):
            assert (len(theta0) == len(scale0)) ,\
                "Lengths of theta0 and scale0 must be identical."
            self.pos = concatenate((theta0, reshape(scale0, (len(scale0), 1))),
                                   axis=1)
            self.sc0 = True
            scale = scale0[0]
        else:
            self.pos = theta0
            self.sc0 = False
            scale = None

        gp.GaussianProcess.__init__(self,
                                    X,
                                    Y,
                                    Sigma,
                                    covfunction,
                                    theta0[0, :],
                                    Xstar,
                                    cXstar,
                                    mu,
                                    muargs,
                                    prior,
                                    gradprior=None,
                                    priorargs=priorargs,
                                    thetatrain='False',
                                    scale=scale,
                                    scaletrain='False')
        self.theta0 = theta0
        self.scale0 = scale0
        self.covfunction = covfunction
        self.Niter = Niter
        self.a = a
        self.threads = threads
        self.nacor = nacor
        self.nsample = nsample
        self.sampling = sampling
        (self.nwalkers, self.ndim) = shape(self.pos)

        if (sampling == 'True'):
            try:
                import emcee
            except ImportError:
                print(
                    "Error: MCMCGaussianProcess requires the python package emcee."
                )
                print(
                    "emcee can be installed from http://github.com/dfm/emcee")
                raise SystemExit
            try:
                import acor
            except ImportError:
                print(
                    "Error: MCMCGaussianProcess requires the python package acor."
                )
                print("acor can be installed from http://github.com/dfm/acor")
                raise SystemExit

            self.sampler = emcee.EnsembleSampler(
                self.nwalkers,
                self.ndim,
                mcmc_log_likelihood,
                args=(self.sc0, self.X, self.Y_mu, self.Sigma, covfunction,
                      prior, priorargs),
                a=a,
                threads=threads)
コード例 #15
0
ファイル: hierarchical.py プロジェクト: hddm-devs/kabuki
    def sample_emcee(self, nwalkers=500, samples=10, dispersion=.1, burn=5, thin=1, stretch_width=2., anneal_stretch=True, pool=None):
        import emcee
        import pymc.progressbar as pbar

        # This is the likelihood function for emcee
        lnprob = LnProb(self)

        # init
        self.mcmc()

        # get current values
        stochs = self.get_stochastics()
        start = [node_descr['node'].value for name, node_descr in stochs.iterrows()]
        ndim = len(start)

        def init_from_priors():
            p0 = np.empty((nwalkers, ndim))
            i = 0
            while i != nwalkers:
                self.mc.draw_from_prior()
                try:
                    self.mc.logp
                    p0[i, :] = [node_descr['node'].value for name, node_descr in stochs.iterrows()]
                    i += 1
                except pm.ZeroProbability:
                    continue
            return p0

        if hasattr(self, 'emcee_dispersions'):
            scale = np.empty_like(start)
            for i, (name, node_descr) in enumerate(stochs.iterrows()):
                knode_name = node_descr['knode_name'].replace('_subj', '')
                scale[i] = self.emcee_dispersions.get(knode_name, 0.1)
        else:
            scale = 0.1

        p0 = np.random.randn(ndim * nwalkers).reshape((nwalkers, ndim)) * scale * dispersion + start
        #p0 = init_from_priors()

        # instantiate sampler passing in the pymc likelihood function
        sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=stretch_width, pool=pool)

        bar = pbar.progress_bar(burn + samples)
        i = 0

        annealing = np.linspace(stretch_width, 2, burn)
        sys.stdout.flush()

        for pos, prob, state in sampler.sample(p0, iterations=burn):
            if anneal_stretch:
                sampler.a = annealing[i]
            i += 1
            bar.update(i)

        #print("\nMean acceptance fraction during burn-in: {}".format(np.mean(sampler.acceptance_fraction)))
        sampler.reset()

        # sample
        try:
            for p, lnprob, lnlike in sampler.sample(pos,
                                                    iterations=samples,
                                                    thin=thin):
                i += 1
                bar.update(i)
        except KeyboardInterrupt:
            pass
        finally:
            print(("\nMean acceptance fraction during sampling: {}".format(np.mean(sampler.acceptance_fraction))))
            # restore state
            for val, (name, node_descr) in zip(start, stochs.iterrows()):
                node_descr['node'].set_value(val)

            # Save samples back to pymc model
            self.mc.sample(1, progress_bar=False) # This call is to set up the chains
            for pos, (name, node) in enumerate(stochs.iterrows()):
                node['node'].trace._trace[0] = sampler.flatchain[:, pos]

            return sampler
コード例 #16
0
def bayesian_odds_ratio(airmasses, filters, astrometric_error=0.020, zshift=2.1):
	plot_points = True
	plot_walkers = False
	intercept_fixed = True
	np.random.seed(0)
	tanZList, RList = calcR(airmasses, filters, zshift=zshift)
	n = len(tanZList)
	def lnlike(theta, x, y, yerr, type):
		if type=="flat":
			b = theta
			model = 0.0 * x + b
		if type=="slope":
			if intercept_fixed == True:
				m = theta
				model = m * x + 0.0
			else:
				m, b = theta
				model = m * x + b
		inv_sigma2 = 1.0/(yerr**2.)
		return -0.5*(np.sum(((y-model)**2.*inv_sigma2 - np.log(inv_sigma2))))
	def lnprior(theta, type):
		if type=="flat":
			b = theta
			if (-1.0 < b < 1.0):
				return 0.0
			return -np.inf
		if type=="slope":
			if intercept_fixed == True:
				m = theta
				if (-1.0 < m < 1.0):
					return 0.0
				return -np.inf
			else:
				m, b = theta
				if (-1.0 < m < 1.0) and (-1.0 < b < 1.0):
					return 0.0
				return -np.inf
	def lnprob(theta, x, y, yerr, type=None):
		if type=="flat" or type=="slope":
			lp = lnprior(theta, type)
			if not np.isfinite(lp):
				return -np.inf
			return lp + lnlike(theta, x, y, yerr, type)
		else:
			print "must specify flat or slope"
			return np.nan
	nll = lambda *args: -lnprob(*args)
	nsteps, nwalkers = 500, 100
	x = np.copy(tanZList)
	y = np.copy(RList)
	yerr = np.sqrt((astrometric_error**2.)+(astrometric_error**2.))
	offset = yerr * np.random.randn(n) + 0.0
	pm = np.random.choice([-1.0,1.0], size=n, replace=True)
	y += offset
	if plot_points == True:
		fig1 = plt.figure(1)
		#plt.plot(tanZList, RList, 'o', color=colors[0])
		plt.errorbar(x, y, yerr=yerr, fmt='.', color=colors[1])
	if intercept_fixed == True:
		ndim = 1
		result = scipy.optimize.minimize(nll, [-0.001], args=(x, y, yerr, "slope"), method="Nelder-Mead")
		m_ml = result["x"]
	else:
		ndim = 2
		result = scipy.optimize.minimize(nll, [-0.001, 0.0], args=(x, y, yerr, "slope"), method="Nelder-Mead")
		m_ml, b_ml = result["x"]
	pos = [result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
	sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr, "slope"))
	sampler.run_mcmc(pos, nsteps)
	samples = sampler.chain[:, 50:, :].reshape((-1, ndim))
	ms = samples[np.random.randint(len(samples), size=100)][:,0]
	if intercept_fixed == True:
		m_mcmc_slope = map(lambda v: (v[1]), zip(*np.percentile(samples, [16, 50, 84], axis=0)))
	else:
		m_mcmc_slope, b_mcmc_slope = map(lambda v: (v[1]), zip(*np.percentile(samples, [16, 50, 84], axis=0)))
	if plot_walkers == True:
		fig2 = plt.figure(2)
		ax1 = plt.subplot(211)
		for i in range(nwalkers):
			ax1.plot(sampler.chain[i,:,0],color='k',alpha=0.05)
			ax1.axhline(y=m_mcmc_slope, xmin=0, xmax=nsteps, color='r')
		#ax2 = plt.subplot(412)
		#for i in range(nwalkers):
		#	ax2.plot(sampler.chain[i,:,1],color='k',alpha=0.05)
		#	ax2.axhline(y=lnf_mcmc_slope, xmin=0, xmax=nsteps, color='r')
	xs = np.arange(min(tanZList), max(tanZList), 0.01)
	if plot_points == True:
		plt.figure(1)
		if intercept_fixed == True:
			plt.plot(xs, m_mcmc_slope*xs + 0.0, color=colors[2], lw=3)
			for m in samples[np.random.randint(len(samples), size=100)]:
				plt.plot(xs, m*xs + 0.0, color=colors[2], lw=1, alpha=0.2)
		else:
			plt.plot(xs, m_mcmc_slope*xs + b_mcmc_slope, color=colors[2], lw=3)
			for m, b in samples[np.random.randint(len(samples), size=100)]:
				plt.plot(xs, m*xs + b, color=colors[2], lw=1, alpha=0.2)
	ndim = 1
	result = scipy.optimize.minimize(nll, [0.0], args=(x, y, yerr, "flat"), method="Nelder-Mead")
	b_ml = result["x"]
	pos = [result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
	sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr, "flat"))
	sampler.run_mcmc(pos, nsteps)
	samples = sampler.chain[:, 50:, :].reshape((-1, ndim))
	ms = samples[np.random.randint(len(samples), size=100)][:,0]
	b_mcmc_flat = map(lambda v: (v[1]), zip(*np.percentile(samples, [16, 50, 84], axis=0)))
	xs = np.arange(min(tanZList), max(tanZList), 0.01)
	if plot_points == True:
		plt.figure(1)
		plt.plot(xs, 0.0*xs + b_mcmc_flat, color=colors[4], lw=3)
		for b in samples[np.random.randint(len(samples), size=100)]:
			plt.plot(xs, 0.0*xs + b, color=colors[4], lw=1, alpha=0.2)
	if plot_walkers == True:
		plt.figure(2)
		ax3 = plt.subplot(212)
		for i in range(nwalkers):
			ax3.plot(sampler.chain[i,:,0],color='k',alpha=0.05)
			ax3.axhline(y=b_mcmc_flat, xmin=0, xmax=nsteps, color='r')
		#ax4 = plt.subplot(414)
		#for i in range(nwalkers):
		#	ax4.plot(sampler.chain[i,:,1],color='k',alpha=0.05)
		#	ax4.axhline(y=lnf_mcmc_flat, xmin=0, xmax=nsteps, color='r')
		plt.savefig('walkers_modelcompariosn_test_' + filters[0] + '_' + str(max(airmasses)-min(airmasses)).replace(".", "") + '_' + str(astrometric_error).replace(".", "")  + '_' + str(zshift).replace(".", "")  + '_' + str(n) + '.png')
		plt.clf()
	if intercept_fixed == True:
		model_slope = m_mcmc_slope * x + 0.0
	else:
		model_slope = m_mcmc_slope * x + b_mcmc_slope
	model_flat = 0.0 * x + b_mcmc_flat
	inv_sigma2 = 1.0/(yerr**2.)
	slope_loglikelihood = (-0.5*(np.sum(((y-model_slope)**2.*inv_sigma2 - np.log(inv_sigma2)))))
	flat_loglikelihood = (-0.5*(np.sum(((y-model_flat)**2.*inv_sigma2 - np.log(inv_sigma2)))))
	bayes_ratio = np.e**(slope_loglikelihood - flat_loglikelihood)
	if plot_points == True:
		plt.figure(1)
		#plt.text(0.1, 0.05, str(bayes_ratio), ha='left', va='center')
		#plt.text(0.1, 0.00, str(slope_loglikelihood), ha='left', va='center')
		#plt.text(0.1, -0.05, str(flat_loglikelihood), ha='left', va='center')
		plt.xticks(np.arange(-0.5, 2.5, 0.25),size=14)
		plt.yticks(np.arange(-0.4, 0.4, 0.10),size=14)
		plt.xlim(-0.01,2.3)
		plt.ylim(-0.35, 0.35)
		plt.xlabel(r'$\tan (Z)$',size=14)
		plt.ylabel(r'$\Delta R_{||}$ (arcsec)',size=14)
		plt.savefig('offset_tanZ_modelcompariosn_' + filters[0] + '_' + str(max(airmasses)-min(airmasses)).replace(".", "") + '_' + str(astrometric_error).replace(".", "")  + '_' + str(zshift).replace(".", "")  + '_' + str(n) + '.png')
		plt.clf()
	if not np.isfinite(bayes_ratio):
		print "Redshift: ", zshift, " Airmass Range: ", min(airmasses), " - ", max(airmasses), " Number of Observations: ", n, " Astrometric Error: ", astrometric_error
		print min(x), max(x), min(y), max(y)
		print slope_loglikelihood, flat_loglikelihood
		print bayes_ratio
	return bayes_ratio
コード例 #17
0
def run_emcee(func,
              pos0,
              dpos=None,
              nwalkers=200,
              nsamps=200,
              nburn=50,
              verbose=False,
              conv_F=50,
              conv_perc=0.01,
              **kwargs):
    """ Function to run the emcee sampler on a given likelihood function.

    Parameters
    ----------
    func: function
        Likelihood function to sample, this must obey the requirements of the
        `emcee` module.
    pos: list(float)
        A best guess of the parameter values to initiate the sampler.
    dpos: list(float)
        A best guess of the parameter uncertainties to guide the initial steps
        (optional,default=None).
    nwalkers: int
        Number of independent walkers to start (optional, default=100).
    nsamps: int
        Number of samples to be taken by each walker (optional, default=500).
    nbrun: int
        Number of samples to be taken as burn-in, and discarded before returning
        the chain.

    Returns
    -------
        Dictionary containing the parameter chains.
    """
    if verbose:
        print("Sampling")
    ndim = len(pos0)

    #Set up initial displacement amplitudes
    if dpos is None:
        dpos = 1e-2 * np.ones(ndim)
    dp = np.zeros(ndim)
    for i, d in enumerate(dpos):
        if ((d is None) or (d <= 0)):
            dp[i] = 1e-2
        else:
            dp[i] = d * 0.1
    # initial positions of the walkers
    pos = [pos0 + dp * np.random.randn(ndim) for i in range(nwalkers)]
    # Do MCMC sampling, with early stopping if the convergence criterion is
    # met.

    sampler = emcee.EnsembleSampler(nwalkers, ndim, func, **kwargs)
    # This will be useful to testing convergence
    old_tau = np.inf
    # Now we'll sample for up to nsamps steps
    autocorr = []
    for _ in sampler.sample(pos, iterations=nsamps):
        # Only check convergence every 100 steps
        if sampler.iteration % 100:
            continue
        # Compute the autocorrelation time so far
        # Using tol=0 means that we'll always get an estimate even
        # if it isn't trustworthy
        tau = sampler.get_autocorr_time(tol=0)
        autocorr.append((sampler.iteration, np.mean(tau)))
        print(autocorr[-1])
        # Check convergence
        converged = np.all(tau * conv_F < sampler.iteration)
        converged &= np.all(np.abs(old_tau - tau) / tau < conv_perc)
        if converged:
            break
        old_tau = tau
    autocorr = np.array(autocorr)
    samples = sampler.chain[:, nburn:, :].reshape((-1, ndim))
    return {'chains': samples, 'autocorr': autocorr}
コード例 #18
0
def bayesian_fit(func, data_x, data_y,
        lnprior_parameters, init_params,
        n_walkers=10, n_iterations=1000, varnames = None):
    """Main function to perform the fit

    Takes the function and data to fit as an argument and gives back a dataframe with the sampling of the fitting parameters. Except for an initial transitory phase where the algorithm converges to a steady state (the length of which should be checked, but should be no more than a few hundred steps), this sampling gives the probability distribution of the fitting parameters given the data and priors.

    Parameters:
        func (function): function to be fitted that has exactly two arguments, input x and a list of parameters. See functions_library for examples.
        data_x (num array): data array of input to the function
        data_y (num array): data array of the outputs of the function to data_x
        lnprior_parameters (list of functions): list of the prior functions corresponding to the list of parameters to be fitted. Those represent initial probability distributions (on a logarithmic scale, with -np.inf for zero probability) of each parameter. In practice, for any data that is not mostly noise, this should not influence much the outcome (but ideally that fact should be checked). It should give a vague idea of the expected scale of the parameter or of a possible range (for instance maybe the parameter must be positive to make sense). See the example of priors defined below.
        init_params (list of functions): functions used to generate the initial parameters for each Markov chain. Usually they are chosen randomly around a value that roughly makes sense for the data.
        n_walkers (int>0): number of Markov chains to use
        n_iterations (int>0): number of iterations for each chain
        varnames (list of str): names of the parameters to fit

    Returns:
       Pandas dataframe: dataframe containing the values of all the parameters (each represented by a data column) for each walker and each chain (represented with a multiindindex, see https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html)
    """

    if len(lnprior_parameters) != len(init_params):
        raise Exception('The length of lnprior_parameters and init_params do not match!') 
    if varnames:
        if len(varnames) != len(init_params):
            raise Exception('The length of varnames does not match the length of init_params!')

    def lnprior(theta):
        """Prior function in the Bayesian inference, in log scale
        """
        sigma = theta[-1]
        lnprior_sum = lnprior_parameters[-1](sigma)
        for p, lnprior_p in zip(theta[:-1], lnprior_parameters[:-1]):
            lnprior_sum += lnprior_p(p)
        return lnprior_sum

    def lnlike(theta, x, y):
        """Likelihood function in the Bayesian inference, in log scale
        """
        sigma = theta[-1]
        ymod = func(x, theta[:-1])
        #return -0.5 * np.sum( ((y-ymod)/sigma)**2 + 2*np.log(sigma) )
        return -0.5 * np.sum( 
                (((y.real - ymod.real)**2 + (y.imag-ymod.imag)**2)/sigma**2) 
                + 2*np.log(sigma) 
                )

    def lnprob(theta, x, y):
        """Unnormalized Bayesian probability in log scale
        """
        lp = lnprior(theta)
        if not np.isfinite(lp):
            return -np.inf
        else:
            return lnprior(theta) + lnlike(theta, x, y)

    params_0 = [
            np.array([ip() for ip in init_params])
            for i in range(n_walkers)]

    ndim = len(init_params)
    sampler = emcee.EnsembleSampler(n_walkers, ndim, lnprob, 
            args=(data_x, data_y))
    sampler.run_mcmc(params_0, n_iterations)

    if varnames is None:
        varnames = [ 'p{}'.format(i) for i in range(ndim-1) ]
        varnames.append('sigma')
    varnames = list(varnames)

    iterations = range(n_iterations)
    walkers = range(n_walkers)
    index = pd.MultiIndex.from_product([walkers, iterations], 
            names=('Walker', 'Iteration'))
    samples_df = pd.DataFrame(
            sampler.chain.reshape((n_walkers*n_iterations, len(varnames))),
                index=index, columns=varnames)

    return samples_df
コード例 #19
0
def run_mcmc(model_path,
             Q_uvb,
             ions_to_use,
             true_Q=18,
             uvb='KS18',
             figname='testT.pdf',
             same_error=False):
    # run_mcmc(model_Q= model, ions_to_use= ions)
    # ------------------ here is a way to run code
    truths = [-4, -1]  # (lognH, logZ, logT) true values
    number_of_ions = len(ions_to_use)

    data_col_all = get_true_model(model_path, Q=true_Q)
    # converting astropy table row to a list
    data_col = []
    for name in ions_to_use:
        data_col.append(data_col_all[name][0])

    np.random.seed(0)
    if same_error:
        sigma_col = 0.2 * np.ones(number_of_ions)
    else:
        sigma_col = np.random.uniform(0.1, 0.3, number_of_ions)

    print(np.log10(data_col), sigma_col)

    interp_logf = get_interp_func(model_path=model_path,
                                  ions_to_use=ions_to_use,
                                  Q_uvb=Q_uvb,
                                  uvb=uvb)

    # Here we'll set up the computation. emcee combines multiple "walkers",
    # each of which is its own MCMC chain. The number of trace results will
    # be nwalkers * nsteps

    ndim = 2  # number of parameters in the model
    nwalkers = 50  # number of MCMC walkers
    nsteps = 5000  # number of MCMC steps to take

    # set theta near the maximum likelihood, with
    n_guess = np.random.uniform(-5, -3, nwalkers)
    z_guess = np.random.uniform(-2, 0, nwalkers)
    starting_guesses = np.vstack(
        (n_guess, z_guess)).T  # initialise at a tiny sphere

    # Here's the function call where all the work happens:
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    log_posterior,
                                    args=(interp_logf, np.log10(data_col),
                                          sigma_col))
    sampler.run_mcmc(starting_guesses, nsteps, progress=True)

    # find out number of steps
    tau = sampler.get_autocorr_time(
    )  # number of steps needed to forget the starting position
    #print(tau)
    thin = int(np.mean(tau) /
               2)  # use this number for flattning the sample as done below
    #thin = 100
    flat_samples = sampler.get_chain(discard=thin * 20, thin=5, flat=True)
    # we are discarding some initial steps roughly 5 times the autocorr_time steps
    # then we thin by about half the autocorrelation time steps for plotting => one does not have to do this step

    labels = ['log nH', 'log Z']
    #uvb_q= int((model_Q.split('try_Q')[-1]).split('.fits')[0])

    if Q_uvb == true_Q:
        fig = corner.corner(flat_samples,
                            labels=labels,
                            truths=truths,
                            quantiles=[0.16, 0.5, 0.84],
                            show_titles=True,
                            title_kwargs={"fontsize": 12})
    else:
        fig = corner.corner(flat_samples,
                            labels=labels,
                            quantiles=[0.16, 0.5, 0.84],
                            show_titles=True,
                            title_kwargs={"fontsize": 12})

    fig.savefig(figname)

    for i in range(ndim):
        mcmc = np.percentile(flat_samples[:, i], [16, 50, 84])
        q = np.diff(mcmc)
        print(labels[i], '=', mcmc[1], q[0], q[1])

    return flat_samples, ndim
コード例 #20
0
    def calc_mcmc(self, nwalkers, niter, nburn, split_traintest):
        """ Running the MCMC
        :param nwalkers: Number of walkers, recommended at least 100
        :param niter: Number of iterations, recommend at least 500
        :param nburn: Number of iterations for burn-in phase, recommend at least 100
        :return: None; updates and saves class variables
        """
        self.kernel = self.kernel_gp(kernelname=kernelname)
        self.ndim_gp = len(self.kernel)  # same as gpdim
        self.residual_blr = np.zeros_like(self.y)
        # Set up the sampler:
        self.nwalkers = nwalkers
        self.niter = niter
        self.ndim = np.int(self.ndim_gp + self.ndim_blr + 2)
        sampler = emcee.EnsembleSampler(self.nwalkers, self.ndim, self.lnprob)
        # Initialize the walkers:
        if george.__version__ < '0.3.0':
            p0_gp = np.log(self.kernel.pars)
        else:
            p0_gp = self.kernel.get_parameter_vector()
        alpha0 = np.mean(self.y)
        beta0 = np.zeros((self.ndim_blr))
        sigma0 = np.std(self.y) / 2.
        p0_comb = np.hstack((alpha0, sigma0, beta0, p0_gp))
        print("Initial proposal (alpha, sigma, beta vec, GP vec):")
        print(p0_comb)
        print("Initial log posterior function call")
        print(self.lnprob(p0_comb))
        p0 = [
            p0_comb + 1e-4 * np.random.randn(self.ndim)
            for i in range(self.nwalkers)
        ]

        print("Estimating MCMC time...")
        start_time = dt.datetime.now()
        _, _, _ = sampler.run_mcmc(p0, 10)
        # Reset the chain to remove the burn-in samples.
        sampler.reset()
        burn_time = (dt.datetime.now() - start_time).seconds
        print('Estimated time till completed: {} seconds '.format(
            burn_time * (self.niter + nburn) / 10.))

        print("Running burn-in...")
        p0, _, state = sampler.run_mcmc(p0, nburn)
        # Reset the chain to remove the burn-in samples.
        sampler.reset()

        print("Running MCMC ...")
        pos, prob, state = sampler.run_mcmc(p0, self.niter, rstate0=state)
        # Save the mean acceptance fraction:
        af = sampler.acceptance_fraction
        self.accept_fr = af
        # Get the best model parameters and their respective errors:
        self.sampler_chain = sampler.chain
        self.sampler_flatchain = sampler.flatchain
        maxprob_index = np.argmax(prob)
        self.pos_fit = pos
        self.prob_fit = prob
        # save parameters with largest probability:
        self.params_fit = pos[maxprob_index]
        self.params_mean = np.mean(pos, axis=0)
        # save percentile of posterior distribution:
        self.params_50per = [
            np.percentile(sampler.flatchain[:, i], 50)
            for i in range(self.ndim)
        ]
        self.params_2per = [
            np.percentile(sampler.flatchain[:, i], 2) for i in range(self.ndim)
        ]
        self.params_16per = [
            np.percentile(sampler.flatchain[:, i], 16)
            for i in range(self.ndim)
        ]
        self.params_84per = [
            np.percentile(sampler.flatchain[:, i], 84)
            for i in range(self.ndim)
        ]
        self.params_97per = [
            np.percentile(sampler.flatchain[:, i], 98)
            for i in range(self.ndim)
        ]
        # save standard deviation:
        self.errors_fit = np.asarray(
            [sampler.flatchain[:, i].std() for i in range(self.ndim)])
        # save parameters:
        self.alpha_fit, self.alpha_err = self.params_fit[0], self.errors_fit[0]
        self.sigma_fit, self.sigma_err = self.params_fit[1], self.errors_fit[1]
        self.beta_fit, self.beta_err = self.params_fit[
            2:self.ndim_blr + 2], self.errors_fit[2:self.ndim_blr + 2]
        p_fit, p_err = self.params_fit[self.ndim_blr +
                                       2:], self.errors_fit[self.ndim_blr + 2:]
        self.beta_fit = self.beta_fit

        # Calculate models and residuals for train data
        self.mu_blr = self.predict_blr(self.X_blr, self.alpha_fit,
                                       self.beta_fit)  # BLR Model
        self.residual_blr = (self.y - self.mu_blr)
        self.mu_blr = self.mu_blr
        self.residual_blr = self.residual_blr
        """
        #gp = george.GP(self.kernel, mean=np.mean(self.residual_blr))
        gp = george.GP(self.kernel, mean=0.)
        if george.__version__ < '0.3.0':
            gp.kernel.pars = np.exp(p_fit)
            self.gp_fit = gp.kernel.pars
        else:
            gp.kernel.set_parameter_vector(p_fit)
            self.gp_fit = gp.kernel.get_parameter_vector()
        gp.compute(self.X_gp, self.sigma_fit)
        self.mu_gp, cov_gp = gp.predict(self.residual_blr, self.X_gp) # GP Model
        self.std_gp = np.sqrt(np.diag(cov_gp)) # standard deviation of GP
        self.y_model = self.mu_gp + self.mu_blr # Final Model 
        """
        self.y_model = self.mu_blr  # Final Model

        # Print some MCMC results and additionally saved in text file:
        print("---- MCMC Results and Parameters ----")
        self.results_file.write('---- MCMC Results and Parameters ----\n')
        print("Mean acceptance fraction:", np.mean(af))
        self.results_file.write('Mean acceptance fraction: {0} \n'.format(
            np.mean(af)))
        #print("Kernel: ", gp.kernel)
        #self.results_file.write('Kernel: {0} \n'.format(gp.kernel))
        print("alpha, err:", round(self.alpha_fit, 2),
              round(self.alpha_err, 2))
        self.results_file.write('alpha: {0} , err: {1} \n'.format(
            round(self.alpha_fit, 2), round(self.alpha_err, 2)))
        for i in range(len(self.beta_fit)):
            print('beta' + str(i) + ' , err:', round(self.beta_fit[i], 2),
                  round(self.beta_err[i], 2))
            self.results_file.write('beta {0}: {1} , err: {2} \n'.format(
                str(i), round(self.beta_fit[i], 2), round(self.beta_err[i],
                                                          2)))
        print('sigma, err:', round(self.sigma_fit, 2),
              round(self.sigma_err, 2))
        self.results_file.write('sigma: {0} , err: {1} \n'.format(
            round(self.sigma_fit, 2), round(self.sigma_err, 2)))
        print("Model lnlikelihood: ", prob[maxprob_index])
        self.results_file.write('Model lnlikelihood: {0} \n'.format(
            prob[maxprob_index]))
        """
        print("Std GP: ", np.mean(self.std_gp))
        self.results_file.write('Std GP: {0} \n'.format(np.mean(self.std_gp)))
        if george.__version__ < '0.3.0':
            print("GP lnlikelihood:", gp.lnlikelihood(self.y))
            self.results_file.write('GP lnlikelihood: {0} \n'.format(gp.lnlikelihood(self.y)))
        else:
            print("GP lnlikelihood:", gp.log_likelihood(self.y))
            self.results_file.write('GP lnlikelihood: {0} \n'.format(gp.log_likelihood(self.y)))
        """

        # Calculate models and residuals for test data
        if split_traintest > 0.:
            self.mu_blr_test = self.predict_blr(self.X_blr_test,
                                                self.alpha_fit,
                                                self.beta_fit)  # BLR Model
            self.residual_blr_test = (self.y_test - self.mu_blr_test)
            """
            #gp = george.GP(self.kernel, mean=np.mean(self.residual_blr_test))
            gp = george.GP(self.kernel, mean=0.)
            gp.compute(self.X_gp_test, self.sigma_fit)
            self.mu_gp_test, _ = gp.predict(self.residual_blr_test, self.X_gp_test)  # GP Model 
            self.y_model_test = self.mu_gp_test + self.mu_blr_test  # Final Model 
            """
            self.y_model_test = self.mu_blr_test
        else:
            self.mu_blr_test, self.mu_gp_test, self.residual_blr_test, self.y_model_test = np.zeros(
                4)
コード例 #21
0
def fit_isochrone_mcmc(obs_file,
                       nwalkers,
                       burn_in,
                       nsteps,
                       thin,
                       guess=False,
                       magcut=17.0):
    seed = np.random.randint(2**25, 2**30)
    print('-------------------------------------------------------------')
    print('Starting MCMC fitting...')
    print('-------------------------------------------------------------')

    obs = np.genfromtxt(obs_file, names=True)

    #remove nans
    cond1 = np.isfinite(obs['Gmag'])
    cond2 = np.isfinite(obs['BPmag'])
    cond3 = np.isfinite(obs['RPmag'])
    cond4 = obs['Gmag'] < magcut

    ind = np.where(cond1 & cond2 & cond3 & cond4)

    obs = obs[ind]

    obs_oc = np.copy(obs[['Gmag', 'BPmag', 'RPmag']])
    obs_oc.dtype.names = ['Gmag', 'G_BPmag', 'G_RPmag']
    obs_oc_er = np.copy(obs[['e_Gmag', 'e_BPmag', 'e_RPmag']])
    obs_oc_er.dtype.names = ['Gmag', 'G_BPmag', 'G_RPmag']
    weight = obs['P'] * obs_oc['Gmag'].min() / obs_oc['Gmag']

    # load full isochrone grid data and arrays of unique Age and Z values
    grid_dir = './grids/'
    mod_grid, age_grid, z_grid = load_mod_grid(grid_dir, isoc_set='MIST-GAIA')
    filters = ['Gmag', 'G_BPmag', 'G_RPmag']
    refmag = 'Gmag'

    labels = ['age', 'dist', 'met', 'Ebv', 'Rv', 'bin', 'alpha']

    prange = np.array([[6.5, 10.3], [0.1, 10.], [2e-06, 0.048], [0.1, 2.0],
                       [2, 4.], [0., 0.8], [1.5, 3.5]])

    ndim = prange.shape[0]

    midpoint = (prange[:, 1] - prange[:, 0]) / 2. + prange[:, 0]
    ndim = prange.shape[0]

    # define uniformly distributed walker starting positions
    pos = []
    lik = []
    for i in range(nwalkers):
        pars = []
        for k in range(ndim):
            pars.append(np.random.uniform(prange[k, 0], prange[k, 1]))
        pos.append(np.array(pars))
        lik.append(
            lnlikelihood(pars, obs_oc, obs_oc_er, filters, refmag, prange,
                         weight))

    # If there is initial guess generate walkers around it
#    scale=(prange[:,1]-prange[:,0])/10.
#    if guess:
#        pos = [guess + scale*np.random.randn(ndim) for i in range(nwalkers)]

    start_time = timeit.default_timer()

    # setup sampler
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnlikelihood,
                                    a=1.1,
                                    args=(obs_oc, obs_oc_er, filters, refmag,
                                          prange, weight),
                                    threads=mp.cpu_count() - 1,
                                    live_dangerously=True)
    # run sampler in the burn in phase
    sampler.run_mcmc(pos, burn_in)

    # process samples
    samples = sampler.chain[:, :, :].reshape((-1, ndim))

    # get best values and confidence intervals
    best_vals = np.array(
        map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samples, [15, 50, 84], axis=0))))

    # get best solution from maximul likelihood sampled

    best_sol = sampler.flatchain[sampler.flatlnprobability.argmax()]

    # reset sampler
    sampler.reset()

    # setup sampler after burn-in
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    lnlikelihood,
                                    a=1.1,
                                    args=(obs_oc, obs_oc_er, filters, refmag,
                                          prange, weight, seed),
                                    threads=mp.cpu_count() - 1,
                                    live_dangerously=True)

    print('done burn in phase...')
    print('')
    print('Best solution of burn in phase: ', best_sol)
    print('Average solution of burn in phase: ', best_vals[:, 0])

    # redefine initial positions based on burn in results
    #    scale = 0.01*best_vals[:,0]
    #    pos = [best_vals[:,0] + scale*np.random.randn(ndim) for i in range(nwalkers)]

    # run sampler for final sample
    sampler.run_mcmc(pos, nsteps, thin=thin)

    # get final best solution
    best_sol = sampler.flatchain[sampler.flatlnprobability.argmax()]

    print('Finished sampling')

    samples = sampler.chain[:, nsteps / 2 / thin:, :].reshape((-1, ndim))
    best_vals = np.array(
        map(lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samples, [5, 50, 95], axis=0))))

    print("Mean acceptance fraction: {0:.3f}".format(
        np.mean(sampler.acceptance_fraction)))

    print('Elapsed time: ', (timeit.default_timer() - start_time) / 60.,
          ' minutes')

    ###############################################################
    # print results

    fig = corner.corner(samples,
                        labels=labels,
                        levels=(0.68, 0.95),
                        smooth=True)

    for i in range(ndim):
        print(labels[i], best_sol[i], '-', best_vals[i, 1], '+', best_vals[i,
                                                                           2])

    print('')
    print('From sample averages:')
    for i in range(ndim):
        print(labels[i], best_vals[i, 0], '-', best_vals[i, 1], '+',
              best_vals[i, 2])

    # plot chains
    fig, axes = plt.subplots(ndim, figsize=(10, 7), sharex=True)
    samples = sampler.chain
    for i in range(ndim):
        ax = axes[i]
        for k in range(nwalkers):
            ax.plot(np.array(samples[k, :, i]), "k", alpha=0.1)
        ax.set_ylabel(labels[i])

    # plot averages
    fig, axes = plt.subplots(ndim, figsize=(10, 7), sharex=True)
    for i in range(ndim):
        ax = axes[i]
        for k in range(nwalkers):
            avgs = np.cumsum(np.array(
                samples[k, :, i])) / (np.arange(len(samples[k, :, i])) + 1)
            ax.plot(avgs, "k", alpha=0.1)
        ax.set_ylabel(labels[i])

    print('-------------------------------------------------------------')
    print(' Final result')
    print('-------------------------------------------------------------')
    print('   '.join('%0.3f' % v for v in best_vals[:, 1]))
    print('   '.join('%0.3f' % v
                     for v in (best_vals[:, 1] + best_vals[:, 2]) / 3))

    return best_sol, (best_vals[:, 1] + best_vals[:, 2]) / 3
コード例 #22
0
pos = sol + 1e-4 * np.random.randn(12, 4)  #Defino la cantidad de caminantes.
nwalkers, ndim = pos.shape

#%%
# Set up the backend
os.chdir(path_datos_global + '/Resultados_cadenas/')
filename = "sample_HS_SN_4params.h5"
backend = emcee.backends.HDFBackend(filename)
backend.reset(nwalkers,
              ndim)  # Don't forget to clear it in case the file already exists
textfile_witness = open('witness_2.txt', 'w+')
textfile_witness.close()
#%%
#Initialize the sampler
sampler = emcee.EnsembleSampler(nwalkers,
                                ndim,
                                log_probability,
                                backend=backend)
max_n = 10000
# This will be useful to testing convergence
old_tau = np.inf

# Now we'll sample for up to max_n steps
for sample in sampler.sample(pos, iterations=max_n, progress=True):
    # Only check convergence every 100 steps
    if sampler.iteration % 5:  #100 es cada cuanto chequea convergencia
        continue

    os.chdir(path_datos_global + '/Resultados_cadenas/')
    textfile_witness = open('witness_2.txt', 'w')
    textfile_witness.write('Número de iteración: {} \t'.format(
        sampler.iteration))
コード例 #23
0
ファイル: tl.py プロジェクト: lunaczp/GalaxyCodeBases
    return log_prior(theta) + log_likelihood(theta, x, y, e, sigma_B)

ndim = 2 + len(x)  # number of parameters in the model
nwalkers = 50  # number of MCMC walkers
nburn  = 100000  # "burn-in" period to let chains stabilize
nsteps = 150000  # number of MCMC steps to take

# set theta near the maximum likelihood, with 
np.random.seed(0)
starting_guesses = np.zeros((nwalkers, ndim))
starting_guesses[:, :2] = np.random.normal(theta1, 1, (nwalkers, 2))
starting_guesses[:, 2:] = np.random.normal(0.5, 0.1, (nwalkers, ndim - 2))

import emcee
import multiprocessing as mp
sampler = emcee.EnsembleSampler(nwalkers, ndim, log_posterior, args=[x, y, e, 50], threads=mp.cpu_count() )
sampler.run_mcmc(starting_guesses, nsteps)

sample = sampler.chain  # shape = (nwalkers, nsteps, ndim)
sample = sampler.chain[:, nburn:, :].reshape(-1, ndim)

theta3 = np.mean(sample[:, :2], axis=0)
g = np.mean(sample[:, 2:], 0)
outliers = (g < 0.5)
#plt.show()

plt.errorbar(x, y, e, fmt='.k', ecolor='gray')
plt.plot(xfit, theta1[0] + theta1[1] * xfit, color='gray',label="y = %sx + %s"%(theta1[1],theta1[0]) )
plt.plot(xfit, theta2[0] + theta2[1] * xfit, color='green',label="Huber: y = %sx + %s"%(theta2[1],theta2[0]) )
plt.plot(xfit, theta3[0] + theta3[1] * xfit, color='navy',label="MCMC: y = %sx + %s"%(theta3[1],theta3[0]) )
plt.plot(x[outliers], y[outliers], 'ro', ms=20, mfc='none', mec='red')
def parallel(i):
    sampler = emcee.EnsembleSampler(nwalkers, ndim, WD_MCMC_func.ln_prob, 
                                    args=[mass, age, pml, pmb, factor, l, b,
                                          mass_Q, age_Q, pml_Q, pmb_Q, factor_Q, l_Q, b_Q,
                                          NOT_FIT_UVW, NOT_FIT_INDEX, FIXV])
コード例 #25
0
ファイル: wpvpfmcmc.py プロジェクト: abhishek-jana/HODProject
    backend = emcee.backends.HDFBackend(filename)
    backend.reset(nwalkers, ndim)
    #print ("Running burn-in...")
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, args=(total_data, total_cov),pool=pool,backend=backend)
    #pos, _, _ = sampler.run_mcmc(pos, 100, progress = True, store = False)
    #sampler.reset()
    print("Running production...")
    sampler.run_mcmc(pos, 5000,store=True, progress=True)
'''

# Resume from saved chain

with Pool() as pool:
    filename = "wpvpfmcmctest.h5"
    backend = emcee.backends.HDFBackend(filename)
    print("Initial size: {0}".format(backend.iteration))
    sampler = emcee.EnsembleSampler(nwalkers,
                                    ndim,
                                    log_probability,
                                    args=(total_data, total_cov),
                                    pool=pool,
                                    backend=backend)
    print("Running production...")
    sampler.run_mcmc(None, 10000, store=True, progress=True)
    print("Final size: {0}".format(backend.iteration))

print("Mean acceptance fraction: {0:.3f}".format(
    np.mean(sampler.acceptance_fraction)))

print(np.median(sampler.flatchain, axis=0))
コード例 #26
0
ファイル: prob10.py プロジェクト: natchin76/Assignmentg4
    else:
        return -np.inf
def log_prob(theta,x,y,sigma_y):
    if  np.isinf(log_prior(theta)):
        return -np.inf
    else:
        return log_prior(theta)-log_lik(theta, x,y,sigma_y)

#obtain a,b,c which minimize (-log-likelihood) 
guess=(1,1,1)
soln=minimize(log_lik,guess,args=(x,y,sigma_y))   

#50 markov chains each starting from near the maxima of prob distribution. Each chain is in 3D
nwalk,ndim=50,3
pos=soln.x+1e-4*np.random.randn(nwalk,ndim)
sampler=emcee.EnsembleSampler(nwalk,ndim,log_prob,args=(x,y,sigma_y))
sampler.run_mcmc(pos,4000)
samples=sampler.get_chain()
plt.figure(figsize=(16,3))
plt.subplot(311)
plt.plot(samples[:,:,0])     #a
plt.xlabel('step no') 
plt.ylabel('a')
plt.subplot(312)
plt.plot(samples[:,:,1],)  #b
plt.xlabel('step no') 
plt.ylabel('b')
plt.subplot(313)
plt.plot(samples[:,:,2],)  #c
plt.xlabel('step no') 
plt.ylabel('c')
コード例 #27
0
#Save the inital parameters
save_file_init = open(DIR_TO_SAVE+str(65)+"_init_hyperparams.txt", "w")
for i in range(len(kernel_labels)):
	save_file_init.write(str(kernel_labels[i])+": "+str(inital_params[i]) + ' ' + str(initial_bounds[i]) + '\n')
save_file_init.close()

#Weight function 
def lnprob(p):
    model.set_parameter_vector(p)
    return model.log_likelihood(y, quiet=True) + model.log_prior()

#MCMC for parameter optimization
initial = model.get_parameter_vector()
ndim, nwalkers = len(initial), 32
p0 = initial + 1e-6 * np.random.randn(nwalkers, ndim)
sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)
print("Initial ln-likelihood: {0:.2f}".format(model.log_likelihood(y)))
print("Running burn-in...")
p0, _, _ = sampler.run_mcmc(p0, 500) #Define the number of samples to take in the burn-in
#sampler.reset()

print("Running production...")
sampler.run_mcmc(p0,1000); #Define the number of samples

samples = sampler.flatchain
model.set_parameter_vector(np.percentile(samples,[50], axis=0)[0])

#Compute the predictions
model.recompute()
print("\nFinal ln-likelihood: {0:.2f}".format(model.log_likelihood(y)))
period = model.get_parameter_vector()[1]
コード例 #28
0
        PARAM_MAXES,
        method='global-differential-evolution',
    )

    print(f'Maximum likelihood parameters are: {best_fit}')

    ndim = PARAM_MINS.size
    initial_position = best_fit + WALKER_DISPERSION * np.random.randn(
        NWALKERS, ndim)

    chain_filename = generate_chain_filename()
    backend = emcee.backends.HDFBackend(chain_filename)
    backend.reset(NWALKERS, ndim)

    print('Beginning MCMC fit...')

    with pp.ProcessPool(NUM_PROCESSES) as pool:
        sampler = emcee.EnsembleSampler(NWALKERS,
                                        ndim,
                                        log_prob,
                                        args=(sim_stack, ),
                                        backend=backend,
                                        pool=pool)
        sampler.run_mcmc(initial_position, NSTEPS, progress=True)
        pool.close()
        pool.join()
        pool.clear()
        pool.terminate()
        pool.restart()
    print(bcolors.OKGREEN + '--- Analysis complete ---' + bcolors.ENDC)
コード例 #29
0
def main():
    ################################################################################
    ##########              READ IN THE RAW PHOTOMETRY          ####################
    #################################################################################
    numecl = 0
    plnm = 'WASP_101'
    verbose = 'false'
    fpath = '/Users/rahuljayaraman/Documents/Miscellany/Research (Tucker Group)/Python (Transits)/' + plnm
    aorlist = os.listdir(fpath)

    #aorlist= [item for item in aorlist if not item.startswith('.')]
    #aorlist=aor_from_list(plnm, 1)
    #aorlist=[50494976]
    aorlist = ['62158336', '62159360']
    #aorlist=np.delete(aorlist, [0,1, len(aorlist)-1])
    for aor in aorlist:
        print(aor)
        aor = str(aor)
        prisec = 'primary'
        ramp_style = 'none'
        fpathout = fpath + aor + '/apr_fits/' + ramp_style + '/'
        directory = os.path.dirname(fpathout)
        if not os.path.exists(directory):
            os.makedirs(directory)

        #dd=np.load('/Users/Brian/Desktop/Tucker_Group/t_1/outputs/'+plnm+'/'+aor)
        dd = np.load(fpath + '/' + aor + 'extraction.npz')
        t = dd['time']
        all_lc = dd['lc']
        #hp=dd['hp']
        cp = dd['cp']
        exptime = dd['exptime']
        framtime = 0.1
        orbparams = dd['op']
        holdpos = dd['hold_pos']
        npix = dd['beta_np']
        chnum = dd['ch']
        red_all = []
        orbparams[6] = 2456164.6934  #only for wasp-101b

        ################################################################################
        pred_ecl_time = get_pred_time(orbparams, t, prisec)
        print(orbparams)
        print(pred_ecl_time - t[0])

        freeparams = [pred_ecl_time - t[0], orbparams[2]]

        if prisec == 'secondary':
            freeparams[1] = 0.0011
            ldc = []
        else:
            ldc = find_coeffs(
                orbparams[10], orbparams[9], orbparams[8], 2,
                'quadratic')  #(temp, log_g, metallicity, channel, type_limb)

        for apr in range(0, all_lc.shape[1]):

            directory = os.path.dirname(fpathout)
            if not os.path.exists(directory):
                os.makedirs(directory)
            lc = np.squeeze(all_lc[:, apr] * 2.35481)
            time = (t - t[0])
            time = np.squeeze(time)
            norm = np.nanmedian(lc)
            #print('Photon Noise limit is: ',(np.sqrt(norm*1.002)/(norm*1.002)))

            err = 1.1 * lc**0.5
            lc = lc / norm
            err = err / norm
            err = np.ones(len(lc)) * 0.0045

            xpos = holdpos[:, 0]
            ypos = holdpos[:, 1]
            npix = dd['beta_np']

            ################################################################################
            ##########              NORMALIZE THE PIXEL VALUES          ####################
            ################################################################################
            timelength = len(t)
            #cp1=cp[1:4, 1:4, :]
            cp1 = cp
            dep_ind = cp1.shape[0] * cp1.shape[1]
            cp2 = np.reshape(cp1, (dep_ind, timelength))
            cp3 = cp2  #[:,start:end]
            for p in range(0, len(time)):
                norm = np.sum(cp3[:, p])
                cp3[:, p] /= norm
    ################################################################################
    ##########                  FILTER THE DATA                 ####################
    ################################################################################
    #fpathout='/Users/Brian/Desktop/Tucker_Group/Spitzer/mapping_files/outputs/'+plnm+'/'+aor+'/apr_fits/'
            filt_file = fpathout + 'post_filter_' + str(apr) + '.npz'

            #print(filt_file)
            if os.path.isfile(filt_file):
                if verbose == 'true': print('Found Filter File')
                ff = np.load(filt_file)
                lc = ff['lc']
                #cp3=ff['cp3']
                time = ff['time']
                xpos = ff['xpos']
                ypos = ff['ypos']
                npix = ff['npix']
                err = ff['err']
                found = 'true'

            else:
                found = 'false'
                if verbose == 'true': print('In Filter')
                lc, cp3, time, xpos, ypos, npix, err = filter_data(
                    lc, cp3, time, xpos, ypos, npix, dep_ind, err)
                if verbose == 'true': print('Out of Filter')

            plt.figure()
            plt.title(plnm + ' Ch: ' + str(chnum) + '\n' + str(aor) + '_' +
                      str(apr))
            plt.axvline(x=pred_ecl_time - t[0])
            plt.axvline(x=pred_ecl_time - orbparams[4] * 0.5 - t[0],
                        color='r',
                        linestyle='dashed')
            plt.axvline(x=pred_ecl_time + orbparams[4] * 0.5 - t[0],
                        color='r',
                        linestyle='dashed')
            plt.scatter(time, lc, s=1)
            if prisec == 'secondary': plt.ylim(0.95, 1.05)
            else: plt.ylim(0.95, 1.03)

            #plt.xlim(time[0], np.amax(time))
            plt.savefig(fpathout + 'raw_lc_plot_' + str(apr))
            if verbose == 'true':
                plt.draw()
                plt.pause(1200)
            plt.close('all')

            # time2=np.multiply(time, time)
            # time=time[np.newaxis]
            # time2=time2[np.newaxis]
            # t2hours=time2*24.0**2.0
            # thours=time*24.0

            ################################################################################
            ##########                  TRIM THE DATA                 ####################
            ################################################################################
            trim_time = 0.  #in minutes
            if trim_time != 0.:
                trim_time = trim_time / (60. * 24.0)  #convert to days
                start_index = int(trim_time / (exptime / 86400.0))
                end_ind = np.squeeze(lc)
                end_ind = end_ind.size

                print(exptime)

                lc = lc[start_index:end_ind]
                time = np.squeeze(time[start_index:end_ind])
                xpos = xpos[start_index:end_ind]
                ypos = ypos[start_index:end_ind]
                npix = npix[start_index:end_ind]
                err = err[start_index:end_ind]
                plt.figure()
                plt.scatter(time, lc, s=1)
                plt.draw()
################################################################################
##########             FIND NEIGHBORS                ####################
################################################################################

            if found == 'true':
                gw = ff['gw']
                nbr = ff['nbr']
            else:
                if verbose == 'true': print('In Find NBR')
                gw, nbr = find_nbr_qhull(xpos,
                                         ypos,
                                         npix,
                                         sm_num=50,
                                         a=1.0,
                                         b=1.7777,
                                         c=1.0,
                                         print_space=10000.)
                if verbose == 'true': print('Out of Find NBR')
            np.savez(fpathout + 'post_filter_' + str(apr),
                     lc=lc,
                     cp3=cp3,
                     time=time,
                     xpos=xpos,
                     ypos=ypos,
                     npix=npix,
                     err=err,
                     gw=gw,
                     nbr=nbr,
                     orbparams=orbparams,
                     pred_ecl_time=pred_ecl_time)
            ################################################################################
            ##########                  FIT THE DATA                 ####################
            ################################################################################

            if prisec == 'secondary':
                freeparams = [pred_ecl_time - t[0], orbparams[2], 0.005,
                              0.05]  #the last 2 free params are ramp terms
            else:
                if ramp_style == 'linear':
                    freeparams = [
                        pred_ecl_time - t[0], orbparams[2], 0.00001, 1.000001
                    ]
                if ramp_style == 'exp':
                    freeparams = [
                        pred_ecl_time - t[0], orbparams[2], 0.005, 0.05
                    ]
                if ramp_style == 'none':
                    freeparams = [pred_ecl_time - t[0], orbparams[2], 1.0, 1.0]
            params, m = initialize_model(np.squeeze(time), freeparams,
                                         orbparams, prisec, ldc)
            fluxcurve = m.light_curve(params)
            fit_params, pcov, infodict, flag, sucess = leastsq(
                nnbr_res,
                freeparams,
                args=(time, lc, err, gw, nbr, params, m, prisec, ramp_style),
                full_output=1)
            print('apr# ' + str(apr), fit_params)
            file_name = fpathout + 'apr_fit_' + str(apr)
            fileObject = open(file_name, 'wb')
            pickle.dump([lc, time, err, gw, nbr, fit_params], fileObject)
            fileObject.close()

            ################################################################################
            ##########                  PLOT THE FIT                ####################
            ################################################################################
            if prisec == 'secondary':
                params.t_secondary = fit_params[0]
                params.fp = fit_params[1]
            else:
                params.t0 = fit_params[0]
                params.rp = fit_params[1]
            eclipse_model = m.light_curve(params)
            ramp = ramp_model([fit_params[2], fit_params[3]], time, ramp_style)
            lc2 = np.squeeze(lc / eclipse_model / ramp)

            w1 = lc2[nbr]
            w2 = np.multiply(w1, gw)
            w3 = np.sum(w2, 1)
            w4 = np.divide(lc2, w3)
            w5 = w4 * eclipse_model
            resids = (w4 - 1.)  #/err
            res2 = (lc / eclipse_model - 1.0) / err

            pltbins = 64

            blc = bin_anything(w5, pltbins)
            btime = bin_anything(time, pltbins)

            if prisec == 'secondary':
                phase = 0.5 + (time + t[0] - pred_ecl_time) / orbparams[5]
            if prisec == 'primary':
                phase = 0.0 + (time + t[0] - pred_ecl_time) / orbparams[5]
            bphase = bin_anything(phase, pltbins)

            plt.figure()
            plt.title(plnm + ' Ch: ' + str(chnum) + '\n' + str(aor) + '_' +
                      str(apr))
            plt.scatter(bphase, blc, s=10)
            #plt.scatter(time, lc, alpha=0.1, color='b', s=1)
            plt.plot(np.squeeze(phase), eclipse_model, color='r')
            if prisec == 'secondary':
                plt.ylim(0.9975, 1.0035)
                plt.text(
                    0.47, 1.003, 'T_center O-C (s): ' + str(
                        round((fit_params[0] + t[0] - pred_ecl_time) * 86400.,
                              1)) + '                   Depth: ' +
                    str(round(fit_params[1] * 1.0e6, 0)) + ' ppm')
                plt.text(0.49, 1.0025,
                         'SDNR:  ' + str(round(np.std(resids), 6)))
            else:
                plt.ylim(0.983, 1.005)
                plt.text(
                    0.43, 0.9925, 'T_center O-C (s): ' + str(
                        round((fit_params[0] + t[0] - pred_ecl_time) * 86400.,
                              1)))
                plt.text(
                    0.43, 0.990, 'Transit Depth: ' +
                    str(round(fit_params[1]**2. * 100, 4)) + ' %')
                plt.text(0.43, 0.9875,
                         'SDNR:  ' + str(round(np.std(resids), 6)))
            plt.xlabel('Phase Units')
            plt.ylabel('Relative Flux')

            plt.savefig(fpathout + 'apr_fit_plot_' + str(apr))
            if verbose == 'true':
                plt.draw()
                plt.pause(1.2)

################################################################################
##########                 Get Red Noise                    ####################
################################################################################
            sdnr, beta_red = est_rednoise(resids, framtime, fpathout, aor, apr,
                                          plnm, chnum, prisec)
            if red_all == []:
                red_all = np.ones(shape=(all_lc.shape[1], 5)) * 1000.
            red_all[apr, :] = [
                sdnr, beta_red * sdnr, beta_red,
                round(fit_params[1] * 1.e6, 1), fit_params[0]
            ]

        best = np.nanargmin(red_all, axis=0)
        best = best[1]

        np.save(fpathout + aor + '_summary', red_all)
        np.savetxt(fpathout + aor + '_summary', red_all)
        if verbose == 'true': print(best)

        ################################################################################
        ##########                 Load the best apr results        ####################
        ################################################################################

        filename = fpathout + 'apr_fit_' + str(best)
        fileObject = open(filename, 'rb')
        lc, time, err, gw, nbr, fit_params = pickle.load(fileObject)
        err = err * red_all[best, 2]

        print('Best Beta_red', red_all[best, 2])
        params, m = initialize_model(np.squeeze(time), freeparams, orbparams,
                                     prisec, ldc)

        ################################################################################
        ##########                        run_mcmc                 ####################
        ################################################################################
        theta = fit_params
        ndim, nwalkers = len(theta), 20
        sampler = emcee.EnsembleSampler(nwalkers,
                                        ndim,
                                        lnprob,
                                        args=(time, lc, err, gw, nbr, params,
                                              m, prisec, ramp_style))
        pos = [theta + 1.e-4 * np.random.randn(ndim) for i in range(nwalkers)]
        sampler.run_mcmc(pos, 1500)

        samples = sampler.chain[:, 50:, :].reshape((-1, ndim))
        np.save(fpathout + aor + '_samples', samples)
        if prisec == 'primary':
            fig = corner.corner(samples, labels=["t0", "rp", "a1",
                                                 "a2"])  #, "A/R", "inc"])
        else:
            fig = corner.corner(samples, labels=["t0", "Fp", "a1",
                                                 "a2"])  #, "A/R", "inc"])
        fig.savefig(fpathout + aor + '_corner_' + str(best) + '.png')
        #plt.show(block=False)
        #plt.pause(0.5)

        #Derive error bars
        t0_mcmc, rp_mcmc, a1_mcmc, a2_mcmc = map(
            lambda v: (v[1], v[2] - v[1], v[1] - v[0]),
            zip(*np.percentile(samples, [16, 50, 84], axis=0)))
        print(rp_mcmc, t0_mcmc)
        np.savez(fpathout + aor + '_mcmc_results',
                 rp_mcmc=rp_mcmc,
                 t0_mcmc=t0_mcmc,
                 a1_mcmc=a1_mcmc,
                 a2_mcmc=a2_mcmc,
                 best=best)
        phase = 0.0 + (time + t[0] - pred_ecl_time) / orbparams[5]
        bphase = bin_anything(phase, pltbins)

        plt.figure()
        for t0, rp, a1, a2 in samples[np.random.randint(len(samples),
                                                        size=100)]:
            params.rp = rp
            params.t0 = t0
            ecl_mod = m.light_curve(params)

            plt.plot(phase, ecl_mod, color='k', alpha=0.05)

            ramp = ramp_model([a1, a2], time, ramp_style)
            lc2 = np.squeeze(lc / ecl_mod / ramp)
            w1 = lc2[nbr]
            w2 = np.multiply(w1, gw)
            w3 = np.sum(w2, 1)
            w4 = np.divide(lc2, w3)
            w5 = w4 * ecl_mod
            resids = (w4 - 1.)  #/err
            res2 = (lc / ecl_mod - 1.0) / err

            blc = bin_anything(w5, pltbins)
            btime = bin_anything(time, pltbins)
        plt.scatter(bphase, blc, s=8, alpha=0.5)
        plt.xlabel("Phase Units")
        plt.ylabel("Relative Flux")
        plt.title(plnm + ' Ch: ' + str(chnum))
        plt.show()
        #plt.savefig('/Users/Brian/Desktop/W79_summary/'+str(chnum)+'_mcmc_fit')

    return None
コード例 #30
0
def emcee(nsteps=500,
          ndim=8,
          nwalkers=16,
          walker_1=50.0,
          walker_2=30.0,
          walker_3=-6,
          walker_4=1.65e-5,
          walker_5=60,
          walker_6=40,
          walker_7=.2,
          walker_8=.8,
          sigma_1=50.0,
          sigma_2=50.0,
          sigma_3=2,
          sigma_4=8e-6,
          sigma_5=20,
          sigma_6=10,
          sigma_7=.5,
          sigma_8=.5,
          restart=True):
    '''Perform MCMC affine invariants
    :param nsteps:              The number of iterations
    :param ndim:                number of dimensions
    :param nwalkers:            number of walkers
    :param walker_1:            the first parameter for the 1st dimension - r_in
    :param walker_2:            the first parameter for the 2nd dimension - delta_r
    :param walker_3:            the first parameter for the 3rd dimension - log_m_disk
    :param walker_4:            the first parameter for the 4th dimension - f_star
    :param walker_5:            the first parameter for the 5th dimension - position_angle
    :param walker_6:            the first parameter for the 6th dimension - inclination
    :param walker_7:            the first parameter for the 7th dimension - xoffs for disk
    :param walker_8:            the first parameter for the 8th dimension - yoffs for disk
    :param sigma_1:             sigma for walker_1
    :param sigma_2:             sigma for walker_2
    :param sigma_3:             sigma for walker_3
    :param sigma_4:             sigma for walker_4
    :param sigma_5:             sigma for walker_5
    :param sigma_6:             sigma for walker_6
    :param sigma_7:             sigma for walker_7
    :param sigma_8:             sigma for walker_8
    '''
    #r_out = r_in + delta_r
    '''walker_1_array = [walker_1]
    walker_2_array = [walker_2]
    walker_3_array = [walker_3]
    walker_4_array = [walker_4]
    walker_5_array = [walker_5]
    walker_6_array = [walker_6]
    p0 = [walker_1, walker_2, walker_3, walker_4, walker_5, walker_6]'''
    #chi_array = [np.sum(((y_data_1) - (walker_1_array*x_data_1+walker_2_array))**2/sigma_data_1**2)]
    if restart == False:
        p0 = np.random.normal(loc=(walker_1, walker_2, walker_3, walker_4,
                                   walker_5, walker_6, walker_7, walker_8),
                              size=(nwalkers, ndim),
                              scale=(sigma_1, sigma_2, sigma_3, sigma_4,
                                     sigma_5, sigma_6, sigma_7, sigma_8))
    else:
        #read from csv file
        dg = pd.read_csv("chain_25steps_new8params.csv")
        p0 = np.zeros([nwalkers, ndim])
        for i in range(nwalkers):
            p0[i, 0] = dg['r_in'].iloc[-(nwalkers - i + 1)]
            p0[i, 1] = dg['delta_r'].iloc[-(nwalkers - i + 1)] - p0[
                i, 0]  #future versions should be delta_r
            p0[i, 2] = dg['m_disk'].iloc[-(nwalkers - i + 1)]
            p0[i, 3] = dg['f_star'].iloc[-(nwalkers - i + 1)]
            p0[i, 4] = dg['position_angle'].iloc[-(nwalkers - i + 1)]
            p0[i, 5] = dg['inclination'].iloc[-(nwalkers - i + 1)]
            p0[i, 6] = dg['xoffs'].iloc[-(nwalkers - i + 1)]
            p0[i, 7] = dg['yoffs'].iloc[-(nwalkers - i + 1)]
        #p0 = (loc=(walker_1, walker_2, walker_3, walker_4, walker_5, walker_6), size=(nwalkers, ndim))
    import emcee
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob)  #threads=10, a=4.0
    run = sampler.sample(p0, iterations=nsteps, storechain=True)
    steps = []
    for i, result in enumerate(run):
        pos, lnprobs, blob = result

        new_step = [np.append(pos[k], lnprobs[k]) for k in range(nwalkers)]
        steps += new_step
        #print(pos)
        print(lnprobs)
        sys.stdout.write("Completed step {} of {}  \r".format(i, nsteps))
        sys.stdout.flush()

    #steps = steps[5000:]
    df = pd.DataFrame(steps)
    df.columns = [
        'r_in', 'delta_r', 'm_disk', 'f_star', 'position_angle', 'inclination',
        'xoffs', 'yoffs', 'lnprob'
    ]
    df.to_csv('chain_475steps_new8params.csv')
    '''max_lnprob = df['lnprob'].max()
    max_m = df.x[df.lnprob.idxmax()]
    max_b = df.y[df.lnprob.idxmax()]'''

    print(np.shape(sampler.chain))
    '''samples = sampler.chain[:, 1000:, :].reshape((-1, ndim))
    fig = corner.corner(samples, labels=["$m$", "$b$"],truths=[max_m, max_b])
    fig.savefig("triangle1.png")'''
    '''print(max_lnprob)
    print(max_m)
    print(max_b)'''
    print("Finished MCMC.")
    print("Mean acceptance fraction: {0:.3f}".format(
        np.mean(sampler.acceptance_fraction)))

    #plt.close()

    cmap_light = sns.diverging_palette(220, 20, center='dark', n=nwalkers)
    #colors = ['red', 'blue', 'green', 'purple', 'yellow', 'black']
    fig, ax = plt.subplots()
    for i in range(nwalkers):
        #c = colors[i]
        ax.plot(df['r_in'][i::nwalkers],
                df['delta_r'][i::nwalkers],
                linestyle='-',
                marker='.',
                alpha=0.5)
    plt.show(block=False)

    w1m = df['r_in'][0::nwalkers]
    w2m = df['delta_r'][1::nwalkers]
    fig, (ax0, ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8) = plt.subplots(ndim + 1)
    x = np.arange(0, len(w1m))
    print(np.shape(x), np.shape(w1m))
    print(np.shape(x), np.shape(w2m))
    for i in range(0, nwalkers):
        ax0.plot(x, df['r_in'][i::nwalkers])
        ax1.plot(x, df['delta_r'][i::nwalkers])
        ax2.plot(x, df['m_disk'][i::nwalkers])
        ax3.plot(x, df['f_star'][i::nwalkers])
        ax4.plot(x, df['position_angle'][i::nwalkers])
        ax5.plot(x, df['inclination'][i::nwalkers])
        ax6.plot(x, df['xoffs'][i::nwalkers])
        ax7.plot(x, df['yoffs'][i::nwalkers])
        ax8.plot(x, df['lnprob'][i::nwalkers])
    fig.suptitle(
        'r_in, delta_r, m_disk, f_star, position_angle, inclination, xoffs, yoffs, lnprob'
    )
    plt.show(block=False)

    print(np.shape(x), np.shape(w1m))