def fit(theta_init, x, ys, yerr, plims):
    """
    fitting the GP. theta_init should be in linear space, not log.
    as should plims
    """
    theta_init = np.log(theta_init)
    DIR = "../figs"
    sampler = MCMC(theta_init, x, ys, yerr, plims, 200, 500, "test", DIR)
    make_plot(sampler, x, ys, yerr, 1, DIR, traces=True)
def fit(
    x,
    y,
    yerr,
    id,
    p_init,
    plims,
    burnin=500,
    run=1500,
    npts=48,
    cutoff=100,
    sine_kernel=False,
    acf=False,
    runMCMC=True,
    plot=False,
):
    """
    takes x, y, yerr and initial guesses and priors for period and does
    the full GP MCMC.
    Tuning parameters include cutoff (number of days), npts (number of points
    per bin).
    """

    # measure ACF period
    if acf:
        corr_run(x, y, yerr, id, "/Users/angusr/Python/GProtation/code")

    if sine_kernel:
        print "sine kernel"
        theta_init = [np.exp(-5), np.exp(7), np.exp(0.6), np.exp(-16), p_init]
        print theta_init
        from GProtation import MCMC, make_plot
    else:
        print "cosine kernel"
        theta_init = [1e-2, 1.0, 1e-2, p_init]
        print "theta_init = ", np.log(theta_init)
        from GProtation_cosine import MCMC, make_plot

    xb, yb, yerrb = bin_data(x, y, yerr, npts)  # bin data
    m = xb < cutoff  # truncate

    theta_init = np.log(theta_init)
    DIR = "cosine"
    if sine_kernel:
        DIR = "sine"

    print theta_init
    if runMCMC:
        sampler = MCMC(theta_init, xb[m], yb[m], yerrb[m], plims, burnin, run, id, DIR)

    # make various plots
    if plot:
        with h5py.File("%s/%s_samples.h5" % (DIR, str(int(id)).zfill(4)), "r") as f:
            samples = f["samples"][...]
        m = x < cutoff
        mcmc_result = make_plot(samples, x[m], y[m], yerr[m], id, DIR, traces=False, triangle=False, prediction=True)
Exemple #3
0
def recover_injections(id, x, y, yerr, path, burnin, run, nwalkers=32,
                       plot=True):
    """
    Take x, y, yerr, calculate ACF period for initialisation and do MCMC.
    npts: number of points per period.
    id: star id.
    x, y, yerr: time, flux and error arrays.
    path: path where you want to save the output.
    burnin: the number of burnin steps.
    run: the number of steps to run for.
    nwalkers: the number of walkers.
    plot: if True then plots of posteriors and chains will be made.
    """

    # initialise with pgram
    try:
        p_init = np.genfromtxt("{0}/{1}_pgramresult.txt".format(path, id))
    except:
        p_init = periodograms(id, x, y, yerr, path, plot=True)

    if p_init < .5:  # small periods raise an error with george.
            p_init = 1.

    # If using lnprob, plims = [pmin, pmax] for a uniform prior.
    # If using Gprob, plims = [mu, sigma] for a Gaussian prior.
#     plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init])
    plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init])
#     plims = np.log([p_init, p_init*.1])  # mean, sigma

    print("Initial period and limits:", p_init, np.exp(plims))

    # assign theta_init
    theta_init = np.log([np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16),
                         p_init])
    print("\n", "log(theta_init) = ", theta_init)
    print("theta_init = ", np.exp(theta_init), "\n")

    # plot initialisation
    t = np.exp(theta_init)
    k = t[0] * ExpSquaredKernel(t[1]) * ExpSine2Kernel(t[2], t[3])
    gp = george.GP(k)
    gp.compute(x, yerr)
    xs = np.linspace(x[0], x[-1], 1000)
    mu, cov = gp.predict(y, xs)
    plt.clf()
    plt.errorbar(x, y, yerr=yerr, **reb)
    plt.plot(xs, mu, color=cols.blue)
    plt.savefig("{0}/{1}_init".format(path, id))

    # set up MCMC
    ndim, nwalkers = len(theta_init), nwalkers
    p0 = [theta_init+1e-4*np.random.rand(ndim) for i in range(nwalkers)]
    args = (x, y, yerr, plims)

    # time the lhf call
    start = time.time()
    print("lnprob = ", lnprob(theta_init, x, y, yerr, plims))
#     print("lnprob = ", Gprob(theta_init, x, y, yerr, plims))
    end = time.time()
    tm = end - start
    print("1 lhf call takes ", tm, "seconds")
    print("burn in will take", tm * nwalkers * burnin, "s")
    print("run will take", tm * nwalkers * run, "s")
    print("total = ", (tm*nwalkers*run + tm*nwalkers*burnin)/60, \
          "mins,", (tm*nwalkers*run + tm*nwalkers*burnin)/3600, "hours")

    # run MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args)
#     sampler = emcee.EnsembleSampler(nwalkers, ndim, Gprob, args=args)
    print("burning in...")
    start = time.time()
    p0, lp, state = sampler.run_mcmc(p0, burnin)
    sampler.reset()
    print("production run...")
    p0, lp, state = sampler.run_mcmc(p0, run)
    end = time.time()
    print("actual time = ", (end - start)/60, "mins")

    # save samples
    f = h5py.File("%s/%s_samples.h5" % (path, id), "w")
    data = f.create_dataset("samples", np.shape(sampler.chain))
    data[:, :] = np.array(sampler.chain)
    f.close()

    # make various plots
    if plot:
        with h5py.File("%s/%s_samples.h5" % (path, id), "r") as f:
            samples = f["samples"][...]
        mcmc_result = make_plot(samples, x, y, yerr, id, path, traces=True,
                                tri=True, prediction=True)
Exemple #4
0
def mcmc_fit(x, y, yerr, p_init, p_max, id, RESULTS_DIR, truths, burnin=500,
             nwalkers=12, nruns=10, full_run=500, diff_threshold=.5,
             n_independent=1000):
    """
    Run the MCMC
    """

    try:
        print("Total number of points  = ", sum([len(i) for i in x]))
        print("Number of light curve sections = ", len(x))
    except TypeError:
        print("Total number of points  = ", len(x))

    theta_init = np.log([np.exp(-12), np.exp(7), np.exp(-1), np.exp(-17),
                         p_init])
    runs = np.zeros(nruns) + full_run
    ndim = len(theta_init)

    print("p_init = ", p_init, "days, log(p_init) = ", np.log(p_init),
          "p_max = ", p_max)
    args = (x, y, yerr, np.log(p_init), p_max)

    # Time the LHF call.
    start = time.time()
    mod = MyModel(x, y, yerr, np.log(p_init), p_max)
    print("lnlike = ", mod.lnlike_split(theta_init), "lnprior = ",
          mod.Glnprior(theta_init), "\n")
    end = time.time()
    tm = end - start
    print("1 lhf call takes ", tm, "seconds")
    print("burn in will take", tm * nwalkers * burnin, "s")
    print("each run will take", tm * nwalkers * runs[0]/60, "mins")
    print("total = ", (tm * nwalkers * np.sum(runs) + tm * nwalkers *
                       burnin)/60, "mins")

    # Run MCMC.
    mod = MyModel(x, y, yerr, np.log(p_init), p_max)
    model = emcee3.SimpleModel(mod.lnlike_split, mod.Glnprior)
    p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)]
    ensemble = emcee3.Ensemble(model, p0)
    moves = emcee3.moves.KDEMove()
    sampler = emcee3.Sampler(moves)
#     sampler = emcee3.Sampler()

    print("burning in...")
    total_start = time.time()
    ensemble = sampler.run(ensemble, burnin)

    flat = sampler.get_coords(flat=True)
    logprob = sampler.get_log_probability(flat=True)
    ensemble = emcee3.Ensemble(model, p0)

    # repeating MCMC runs.
    autocorr_times, mean_ind, mean_diff = [], [], []
    sample_array = np.zeros((nwalkers, sum(runs), ndim))
    for i, run in enumerate(runs):
        print("run {0} of {1}".format(i, len(runs)))
        print("production run, {0} steps".format(int(run)))
        start = time.time()
        ensemble = sampler.run(ensemble, run)
        end = time.time()
        print("time taken = ", (end - start)/60, "minutes")

        f = h5py.File(os.path.join(RESULTS_DIR, "{0}.h5".format(id)), "w")
        data = f.create_dataset("samples",
                                np.shape(sampler.get_coords(flat=True)))
        data[:, :] = sampler.get_coords(flat=True)
        f.close()

        print("samples = ", np.shape(sampler.get_coords(flat=True)))
        results = make_plot(sampler, x, y, yerr, id, RESULTS_DIR, truths,
                            traces=True, tri=True, prediction=True)
        nsteps, _ = np.shape(sampler.get_coords(flat=True))
        conv, autocorr_times, ind_samp, diff = \
                evaluate_convergence(sampler.get_coords(flat=True),
                                     autocorr_times, diff_threshold,
                                     n_independent)
        mean_ind.append(ind_samp)
        mean_diff.append(diff)
        print("Converged?", conv)
        if conv:
            break

    total_end = time.time()
    total_time = total_end - total_start
    print("Total time taken = ", total_time/60., "minutes", total_time/3600.,
          "hours")

    with open(os.path.join(RESULTS_DIR, "{0}_time.txt".format(id)), "w") as f:
        f.write("{}".format(total_time))

#     col = "b"
#     if conv:
#         col = "r"
#     if autocorr_times:
#         plt.clf()
#         plt.plot(autocorr_times, color=col)
#         plt.savefig(os.path.join(RESULTS_DIR, "{0}_acorr".format(id)))
#         plt.clf()
#         plt.plot(mean_ind, color=col)
#         plt.savefig(os.path.join(RESULTS_DIR, "{0}_ind".format(id)))
#         plt.clf()
#         plt.plot(mean_diff, color=col)
#         plt.savefig(os.path.join(RESULTS_DIR, "{0}_diff".format(id)))
    return
Exemple #5
0
                                                 skip_header=2).T
    for i, kid in enumerate(kids[1:]):
        x, y, yerr = load_data(str(int(kid)))
        x -= x[0]
        m = x < 5
        x, y, yerr = x[m], y[m], yerr[m]

        # initialise
        init_file = "../data/%s_init.txt" % int(kid)
        if os.path.exists(init_file): theta_init = np.genfromtxt(init_file).T
        else: theta_init = [1, 1, 1, np.log(p[i]), 1.]

        k = theta_init[0] * ExpSquaredKernel(theta_init[1]) \
                * ExpSine2Kernel(theta_init[2], theta_init[3])
        gp = george.GP(k)
        gp.compute(x, np.sqrt(theta_init[4]**2+yerr**2))

        # predict
        xs = np.linspace(x[0], x[-1], 1000)
        mu, cov = gp.predict(y, xs)
        plt.clf()
        plt.errorbar(x, y, yerr=yerr, **reb)
        plt.plot(xs, mu)
        plt.show()

        plims = [2., 30.]
        DIR = "../figs"
        sampler = MCMC(theta_init, x, y, yerr, plims, 100, 200, int(kid), DIR)
        make_plot(sampler, x, y, yerr, int(kid), DIR, traces=True)
        assert 0
def fit(x,
        y,
        yerr,
        id,
        p_init,
        plims,
        DIR,
        burnin=500,
        run=1500,
        npts=48,
        cutoff=100,
        sine_kernel=False,
        acf=False,
        runMCMC=True,
        plot=False):
    """
    takes x, y, yerr and initial guesses and priors for period and does
    the full GP MCMC.
    Tuning parameters include cutoff (number of days), npts (number of points
    per bin).
    DIR is where to save output
    """

    # measure ACF period
    if acf:
        corr_run(x, y, yerr, id, "/Users/angusr/Python/GProtation/code")

    if sine_kernel:
        print("sine kernel")
        theta_init = [np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16), p_init]
        print(theta_init)
        from GProtation import MCMC, make_plot
    else:
        print("cosine kernel")
        theta_init = [1e-2, 1., 1e-2, p_init]
        print("theta_init = ", np.log(theta_init))
        from GProtation_cosine import MCMC, make_plot

    xb, yb, yerrb = bin_data(x, y, yerr, npts)  # bin data
    m = xb < cutoff  # truncate

    theta_init = np.log(theta_init)

    print(theta_init)
    if runMCMC:
        sampler = MCMC(theta_init, xb[m], yb[m], yerrb[m], plims, burnin, run,
                       id, DIR)

    # make various plots
    if plot:
        with h5py.File("%s/%s_samples.h5" % (DIR, id), "r") as f:
            samples = f["samples"][...]
        m2 = x < cutoff
        mcmc_result = make_plot(samples,
                                xb[m],
                                yb[m],
                                yerrb[m],
                                x[m2],
                                y[m2],
                                yerr[m2],
                                str(int(id)).zfill(4),
                                DIR,
                                traces=True,
                                tri=True,
                                prediction=True)
Exemple #7
0
def recover_injections(id, x, y, yerr, fn, burnin, run, interval, tol,
                       npts=10, nwalkers=32, plot=True):
    """
    Take x, y, yerr, calculate ACF period for initialisation and do MCMC.
    npts: number of points per period.
    """

    p_init, acf_smooth, lags, _, _, _, _, _, _ = simple_acf(x, y)

    print("acf period = ", p_init)

    if p_init < .1:  # prevent unphysical periods
            p_init = 10.

    # Format data
    plims = np.log([p_init - tol*p_init, p_init + tol*p_init])

    print(p_init, np.exp(plims))

    sub = int(p_init / float(npts) * 48)  # 10 points per period
    ppd = 48. / sub
    ppp = ppd * p_init
    print("sub = ", sub, "points per day =", ppd, "points per period =", ppp)
    # subsample
    xsub, ysub, yerrsub = x[::sub], y[::sub], yerr[::sub]
    xb, yb, yerrb = x, y, yerr
    xb, yb, yerrb = x[:100], y[:100], yerr[:100]
    plt.clf()
    plt.plot(xb, yb, "k.")
    plt.savefig("gptest")

    theta_init = np.log([np.exp(-5), np.exp(7), np.exp(.6), np.exp(-16),
                         p_init])

    print("\n", "log(theta_init) = ", theta_init)
    print("theta_init = ", np.exp(theta_init), "\n")

    # set up MCMC
    ndim, nwalkers = len(theta_init), nwalkers
    p0 = [theta_init+1e-4*np.random.rand(ndim) for i in range(nwalkers)]
    args = (xb, yb, yerrb, plims)
    lp = lnprob

    # time the lhf call
    start = time.time()
    print("lnprob = ", lp(theta_init, xb, yb, yerrb, plims))
    end = time.time()
    tm = end - start
    print("1 lhf call takes ", tm, "seconds")
    print("burn in will take", tm * nwalkers * burnin, "s")
    print("run will take", tm * nwalkers * run, "s")
    print("total = ", (tm * nwalkers * run + tm * nwalkers * burnin)/60,
          "mins")

    # run MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lp, args=args)
    print("burning in...")
    start = time.time()
    p0, lp, state = sampler.run_mcmc(p0, burnin)
    sampler.reset()
    print("production run...")
    p0, lp, state = sampler.run_mcmc(p0, run)
    end = time.time()
    print("actual time = ", end - start)

    # save samples
    f = h5py.File("%s_samples.h5" % id, "w")
    data = f.create_dataset("samples", np.shape(sampler.chain))
    data[:, :] = np.array(sampler.chain)
    f.close()

    # make various plots
    if plot:
        with h5py.File("%s_samples.h5" % id, "r") as f:
            samples = f["samples"][...]
        mcmc_result = make_plot(samples, xsub, ysub, yerrsub, id, fn,
                                traces=True, tri=True, prediction=True)
Exemple #8
0
# print "acf period, err = ", p_init

# # load samples
# with h5py.File("%s/%s_samples.h5" % (DIR, str(int(id)).zfill(4)),
#                "r") as f:
#     samples = f["samples"][...]
# nwalkers, nsteps, ndims = np.shape(samples)
# flat = np.reshape(samples, (nwalkers * nsteps, ndims))
# mcmc_result = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
#                   zip(*np.percentile(flat, [16, 50, 84], axis=0)))

# bin and truncate data
# npts = int(p_init[0] / 10. * 48)  # 10 points per period
# xb, yb, yerrb = bin_data(x, y, yerr, npts)  # bin data
# m = xb < cutoff  # truncate
m = x < 10

DIR = "sine"
for id in range(100):
    # load samples
    with h5py.File("%s/%s_samples.h5" % (DIR, str(int(id)).zfill(4)),
                   "r") as f:
        samples = f["samples"][...]
    nwalkers, nsteps, ndims = np.shape(samples)
    flat = np.reshape(samples, (nwalkers * nsteps, ndims))
    mcmc_result = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
                      zip(*np.percentile(flat, [16, 50, 84], axis=0)))
    # make various plots
    make_plot(samples, x[m], y[m], yerr[m], id, DIR, traces=False, tri=True,
              prediction=False)
Exemple #9
0
def recover_injections(id,
                       x,
                       y,
                       yerr,
                       path,
                       burnin,
                       run,
                       nwalkers=32,
                       plot=True):
    """
    Take x, y, yerr, calculate ACF period for initialisation and do MCMC.
    npts: number of points per period.
    id: star id.
    x, y, yerr: time, flux and error arrays.
    path: path where you want to save the output.
    burnin: the number of burnin steps.
    run: the number of steps to run for.
    nwalkers: the number of walkers.
    plot: if True then plots of posteriors and chains will be made.
    """

    # initialise with pgram
    try:
        p_init = np.genfromtxt("{0}/{1}_pgramresult.txt".format(path, id))
    except:
        p_init = periodograms(id, x, y, yerr, path, plot=True)

    if p_init < .5:  # small periods raise an error with george.
        p_init = 1.

    # If using lnprob, plims = [pmin, pmax] for a uniform prior.
    # If using Gprob, plims = [mu, sigma] for a Gaussian prior.


#     plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init])
    plims = np.log([p_init - .5 * p_init, p_init + 2 * p_init])
    #     plims = np.log([p_init, p_init*.1])  # mean, sigma

    print("Initial period and limits:", p_init, np.exp(plims))

    # assign theta_init
    theta_init = np.log(
        [np.exp(-5), np.exp(7),
         np.exp(.6), np.exp(-16), p_init])
    print("\n", "log(theta_init) = ", theta_init)
    print("theta_init = ", np.exp(theta_init), "\n")

    # plot initialisation
    t = np.exp(theta_init)
    k = t[0] * ExpSquaredKernel(t[1]) * ExpSine2Kernel(t[2], t[3])
    gp = george.GP(k)
    gp.compute(x, yerr)
    xs = np.linspace(x[0], x[-1], 1000)
    mu, cov = gp.predict(y, xs)
    plt.clf()
    plt.errorbar(x, y, yerr=yerr, **reb)
    plt.plot(xs, mu, color=cols.blue)
    plt.savefig("{0}/{1}_init".format(path, id))

    # set up MCMC
    ndim, nwalkers = len(theta_init), nwalkers
    p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)]
    args = (x, y, yerr, plims)

    # time the lhf call
    start = time.time()
    print("lnprob = ", lnprob(theta_init, x, y, yerr, plims))
    #     print("lnprob = ", Gprob(theta_init, x, y, yerr, plims))
    end = time.time()
    tm = end - start
    print("1 lhf call takes ", tm, "seconds")
    print("burn in will take", tm * nwalkers * burnin, "s")
    print("run will take", tm * nwalkers * run, "s")
    print("total = ", (tm*nwalkers*run + tm*nwalkers*burnin)/60, \
          "mins,", (tm*nwalkers*run + tm*nwalkers*burnin)/3600, "hours")

    # run MCMC
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=args)
    #     sampler = emcee.EnsembleSampler(nwalkers, ndim, Gprob, args=args)
    print("burning in...")
    start = time.time()
    p0, lp, state = sampler.run_mcmc(p0, burnin)
    sampler.reset()
    print("production run...")
    p0, lp, state = sampler.run_mcmc(p0, run)
    end = time.time()
    print("actual time = ", (end - start) / 60, "mins")

    # save samples
    f = h5py.File("%s/%s_samples.h5" % (path, id), "w")
    data = f.create_dataset("samples", np.shape(sampler.chain))
    data[:, :] = np.array(sampler.chain)
    f.close()

    # make various plots
    if plot:
        with h5py.File("%s/%s_samples.h5" % (path, id), "r") as f:
            samples = f["samples"][...]
        mcmc_result = make_plot(samples,
                                x,
                                y,
                                yerr,
                                id,
                                path,
                                traces=True,
                                tri=True,
                                prediction=True)