Example #1
0
def test_bounds(seed=42):
    bounds = [(-1.0, 0.3), (-2.0, 5.0)]
    kernel = terms.RealTerm(log_a=0.1, log_c=0.5, bounds=bounds)
    b0 = kernel.get_parameter_bounds()
    assert all(np.allclose(a, b) for a, b in zip(b0, bounds))

    kernel = terms.RealTerm(log_a=0.1,
                            log_c=0.5,
                            bounds=dict(zip(["log_a", "log_c"], bounds)))
    assert all(
        np.allclose(a, b) for a, b in zip(b0, kernel.get_parameter_bounds()))
Example #2
0
def test_predict(seed=42):
    np.random.seed(seed)
    x = np.linspace(1, 59, 300)
    t = np.sort(np.random.uniform(10, 50, 100))
    yerr = np.random.uniform(0.1, 0.5, len(t))
    y = np.sin(t)

    kernel = terms.RealTerm(0.1, 0.5)
    for term in [(0.6, 0.7, 1.0), (0.1, 0.05, 0.5, -0.1)]:
        kernel += terms.ComplexTerm(*term)
    gp = GP(kernel)

    gp.compute(t, yerr)
    K = gp.get_matrix(include_diagonal=True)
    Ks = gp.get_matrix(x, t)
    true_mu = np.dot(Ks, np.linalg.solve(K, y))
    true_cov = gp.get_matrix(x, x) - np.dot(Ks, np.linalg.solve(K, Ks.T))

    mu, cov = gp.predict(y, x)

    _, var = gp.predict(y, x, return_var=True)
    assert np.allclose(mu, true_mu)
    assert np.allclose(cov, true_cov)
    assert np.allclose(var, np.diag(true_cov))

    mu0, cov0 = gp.predict(y, t)
    mu, cov = gp.predict(y)
    assert np.allclose(mu0, mu)
    assert np.allclose(cov0, cov)
Example #3
0
def autocorr_ml(y, thin=1, c=5.0):
    """Compute the autocorrelation using a GP model."""
    # Compute the initial estimate of tau using the standard method
    init = autocorr_new(y, c=c)
    z = y[:, ::thin]
    N = z.shape[1]

    # Build the GP model
    tau = max(1.0, init / thin)
    bounds = [(-5.0, 5.0), (-np.log(N), 0.0)]
    kernel = terms.RealTerm(
        np.log(0.9 * np.var(z)),
        -np.log(tau),
        bounds=bounds
    )
    kernel += terms.RealTerm(
        np.log(0.1 * np.var(z)),
        -np.log(0.5 * tau),
        bounds=bounds,
    )

    gp = celerite.GP(kernel, mean=np.mean(z))
    gp.compute(np.arange(z.shape[1]))

    # Define the objective
    def nll(p):
        # Update the GP model
        gp.set_parameter_vector(p)
        # Loop over the chains and compute the likelihoods
        v, g = zip(*(gp.grad_log_likelihood(z0, quiet=True) for zo in z))
        # Combine the datasets
        return -np.sum(v), -np.sum(g, axis=0)

    # Optimize the model
    p0 = gp.get_parameter_vector()
    bounds = gp.get_parameter_bounds()
    soln = minimize(nll, p0, jac=True, bounds=bounds)
    gp.set_parameter_vector(soln.x)

    # Compute the maximum likelihood tau
    a, c = kernel.coefficients[:2]
    tau = thin * 2 * np.sum(a / c) / np.sum(a)

    return tau
Example #4
0
def find_celerite_MAP(t,
                      y,
                      yerr,
                      sigma0=0.1,
                      tau0=100,
                      prior='None',
                      set_bounds=True,
                      sig_lims=[0.02, 0.7],
                      tau_lims=[1, 550],
                      verbose=False):

    kernel = terms.RealTerm(log_a=2 * np.log(sigma0), log_c=np.log(1.0 / tau0))
    gp = celerite.GP(kernel, mean=np.mean(y))
    gp.compute(t, yerr)

    # set initial params
    initial_params = gp.get_parameter_vector()
    if verbose:
        print(initial_params)

    # set boundaries
    if set_bounds:
        if verbose:
            print('sig_lims:', sig_lims, 'tau_lims:', tau_lims)
        tau_bounds, sigma_bounds = tau_lims, sig_lims
        loga_bounds = (2 * np.log(min(sigma_bounds)),
                       2 * np.log(max(sigma_bounds)))
        logc_bounds = (np.log(1 / max(tau_bounds)),
                       np.log(1 / min(tau_bounds)))
        bounds = [loga_bounds, logc_bounds]

    else:  # - inf to + inf
        bounds = gp.get_parameter_bounds()
    if verbose:
        print(bounds)

    # wrap the neg_log_posterior for a chosen prior
    def neg_log_like(params, y, gp):
        return neg_log_posterior(params, y, gp, prior, 'celerite')

    # find MAP solution
    r = minimize(neg_log_like,
                 initial_params,
                 method="L-BFGS-B",
                 bounds=bounds,
                 args=(y, gp))
    gp.set_parameter_vector(r.x)
    res = gp.get_parameter_dict()

    tau_fit = np.exp(-res['kernel:log_c'])
    sigma_fit = np.exp(res['kernel:log_a'] / 2)
    if verbose:
        print('sigma_fit', sigma_fit, 'tau_fit', tau_fit)
    return sigma_fit, tau_fit, gp
 def generate_celerite(self,
                       no_chains=4,
                       length=2000000,
                       log_c1=-6.0,
                       log_c2=-2.0):
     # from https://dfm.io/posts/autocorr/ Foreman-Mackey
     # Build the celerite model:
     import celerite
     from celerite import terms
     kernel = terms.RealTerm(log_a=0.0, log_c=log_c1)
     kernel += terms.RealTerm(log_a=0.0, log_c=log_c2)
     # The true autocorrelation time can be calculated analytically:
     true_tau = sum(2 * np.exp(t.log_a - t.log_c) for t in kernel.terms)
     true_tau /= sum(np.exp(t.log_a) for t in kernel.terms)
     self.known_autocorr_time = True
     self.autocorr_time_true = true_tau
     # Simulate a set of chains:
     gp = celerite.GP(kernel)
     t = np.arange(length)
     gp.compute(t)
     self.x = gp.sample(size=no_chains)
Example #6
0
def test_log_likelihood(method, seed=42):
    np.random.seed(seed)
    x = np.sort(np.random.rand(10))
    yerr = np.random.uniform(0.1, 0.5, len(x))
    y = np.sin(x)

    kernel = terms.RealTerm(0.1, 0.5)
    gp = GP(kernel, method=method)
    with pytest.raises(RuntimeError):
        gp.log_likelihood(y)

    for term in [(0.6, 0.7, 1.0)]:
        kernel += terms.ComplexTerm(*term)
        gp = GP(kernel, method=method)

        assert gp.computed is False

        with pytest.raises(ValueError):
            gp.compute(np.random.rand(len(x)), yerr)

        gp.compute(x, yerr)
        assert gp.computed is True
        assert gp.dirty is False

        ll = gp.log_likelihood(y)
        K = gp.get_matrix(include_diagonal=True)
        ll0 = -0.5 * np.dot(y, np.linalg.solve(K, y))
        ll0 -= 0.5 * np.linalg.slogdet(K)[1]
        ll0 -= 0.5 * len(x) * np.log(2*np.pi)
        assert np.allclose(ll, ll0)

    # Check that changing the parameters "un-computes" the likelihood.
    gp.set_parameter_vector(gp.get_parameter_vector())
    assert gp.dirty is True
    assert gp.computed is False

    # Check that changing the parameters changes the likelihood.
    gp.compute(x, yerr)
    ll1 = gp.log_likelihood(y)
    params = gp.get_parameter_vector()
    params[0] += 0.1
    gp.set_parameter_vector(params)
    gp.compute(x, yerr)
    ll2 = gp.log_likelihood(y)
    assert not np.allclose(ll1, ll2)

    gp[1] += 0.1
    assert gp.dirty is True
    gp.compute(x, yerr)
    ll3 = gp.log_likelihood(y)
    assert not np.allclose(ll2, ll3)
Example #7
0
def test_pickle(with_general, seed=42):
    solver = celerite.CholeskySolver()
    np.random.seed(seed)
    t = np.sort(np.random.rand(500))
    diag = np.random.uniform(0.1, 0.5, len(t))
    y = np.sin(t)

    if with_general:
        U = np.vander(t - np.mean(t), 4).T
        V = U * np.random.rand(4)[:, None]
        A = np.sum(U * V, axis=0) + 1e-8
    else:
        A = np.empty(0)
        U = np.empty((0, 0))
        V = np.empty((0, 0))

    alpha_real = np.array([1.3, 1.5])
    beta_real = np.array([0.5, 0.2])
    alpha_complex_real = np.array([1.0])
    alpha_complex_imag = np.array([0.1])
    beta_complex_real = np.array([1.0])
    beta_complex_imag = np.array([1.0])

    def compare(solver1, solver2):
        assert solver1.computed() == solver2.computed()
        if not solver1.computed():
            return
        assert np.allclose(solver1.log_determinant(),
                           solver2.log_determinant())
        assert np.allclose(solver1.dot_solve(y), solver2.dot_solve(y))

    s = pickle.dumps(solver, -1)
    solver2 = pickle.loads(s)
    compare(solver, solver2)

    solver.compute(0.0, alpha_real, beta_real, alpha_complex_real,
                   alpha_complex_imag, beta_complex_real, beta_complex_imag, A,
                   U, V, t, diag)
    solver2 = pickle.loads(pickle.dumps(solver, -1))
    compare(solver, solver2)

    # Test that models can be pickled too.
    kernel = terms.RealTerm(0.5, 0.1)
    kernel += terms.ComplexTerm(0.6, 0.7, 1.0)
    gp1 = GP(kernel)
    gp1.compute(t, diag)
    s = pickle.dumps(gp1, -1)
    gp2 = pickle.loads(s)
    assert np.allclose(gp1.log_likelihood(y), gp2.log_likelihood(y))
Example #8
0
def test_predict(method, seed=42):
    np.random.seed(seed)
    x = np.sort(np.random.rand(10))
    yerr = np.random.uniform(0.1, 0.5, len(x))
    y = np.sin(x)

    kernel = terms.RealTerm(0.1, 0.5)
    for term in [(0.6, 0.7, 1.0)]:
        kernel += terms.ComplexTerm(*term)
    gp = GP(kernel, method=method)
    gp.compute(x, yerr)

    mu0, cov0 = gp.predict(y, x)
    mu, cov = gp.predict(y)
    assert np.allclose(mu0, mu)
    assert np.allclose(cov0, cov)
Example #9
0
def trappist1_variability(times):
    alpha = 0.973460343001
    log_a = np.log(np.exp(-26.88111923) * alpha)
    log_c = -1.0890621571818671
    # log_sigma = -5.6551601053314622

    # kernel = (terms.JitterTerm(log_sigma=log_sigma) +
    #           terms.RealTerm(log_a=log_a, log_c=log_c))

    kernel = terms.RealTerm(log_a=log_a, log_c=log_c)

    gp = celerite.GP(kernel, mean=0, fit_white_noise=True, fit_mean=True)
    gp.compute(times)

    sample = gp.sample()
    sample -= np.median(sample)
    return sample + 1
Example #10
0
def k296_variability(times):
    alpha = 0.854646217641
    log_a = np.log(np.exp(-13.821195) * alpha)
    log_c = -1.0890621571818671
    # log_sigma = -7.3950524

    # kernel = (terms.JitterTerm(log_sigma=log_sigma) +
    #           terms.RealTerm(log_a=log_a, log_c=log_c))

    kernel = terms.RealTerm(log_a=log_a, log_c=log_c)

    gp = celerite.GP(kernel, mean=0, fit_white_noise=True, fit_mean=True)
    gp.compute(times)

    sample = gp.sample()
    sample -= np.median(sample)
    return sample + 1
Example #11
0
def test_build_gp(method, seed=42):
    kernel = terms.RealTerm(0.5, 0.1)
    kernel += terms.ComplexTerm(0.6, 0.7, 1.0)
    gp = GP(kernel, method=method)

    assert gp.vector_size == 5
    p = gp.get_parameter_vector()
    assert np.allclose(p, [0.5, 0.1, 0.6, 0.7, 1.0])

    gp.set_parameter_vector([0.5, 0.8, 0.6, 0.7, 2.0])
    p = gp.get_parameter_vector()
    assert np.allclose(p, [0.5, 0.8, 0.6, 0.7, 2.0])

    with pytest.raises(ValueError):
        gp.set_parameter_vector([0.5, 0.8, -0.6])

    with pytest.raises(ValueError):
        gp.set_parameter_vector("face1")
Example #12
0
def test_pickle(method, seed=42):
    solver = get_solver(method)
    np.random.seed(seed)
    t = np.sort(np.random.rand(500))
    diag = np.random.uniform(0.1, 0.5, len(t))
    y = np.sin(t)

    alpha_real = np.array([1.3, 1.5])
    beta_real = np.array([0.5, 0.2])
    alpha_complex_real = np.array([1.0])
    alpha_complex_imag = np.array([0.1])
    beta_complex_real = np.array([1.0])
    beta_complex_imag = np.array([1.0])

    def compare(solver1, solver2):
        assert solver1.computed() == solver2.computed()
        if not solver1.computed():
            return
        assert np.allclose(solver1.log_determinant(),
                           solver2.log_determinant())
        assert np.allclose(solver1.dot_solve(y),
                           solver2.dot_solve(y))

    s = pickle.dumps(solver, -1)
    solver2 = pickle.loads(s)
    compare(solver, solver2)

    if method != "sparse":
        solver.compute(
            alpha_real, beta_real, alpha_complex_real, alpha_complex_imag,
            beta_complex_real, beta_complex_imag, t, diag
        )
        solver2 = pickle.loads(pickle.dumps(solver, -1))
        compare(solver, solver2)

    # Test that models can be pickled too.
    kernel = terms.RealTerm(0.5, 0.1)
    kernel += terms.ComplexTerm(0.6, 0.7, 1.0)
    gp1 = GP(kernel, method=method)
    gp1.compute(t, diag)
    s = pickle.dumps(gp1, -1)
    gp2 = pickle.loads(s)
    assert np.allclose(gp1.log_likelihood(y), gp2.log_likelihood(y))
Example #13
0
def test_product(seed=42):
    np.random.seed(seed)
    t = np.sort(np.random.uniform(0, 5, 100))
    tau = t[:, None] - t[None, :]

    k1 = terms.RealTerm(log_a=0.1, log_c=0.5)
    k2 = terms.ComplexTerm(0.2, -3.0, 0.5, 0.01)
    k3 = terms.SHOTerm(1.0, 0.2, 3.0)

    K1 = k1.get_value(tau)
    K2 = k2.get_value(tau)
    K3 = k3.get_value(tau)

    assert np.allclose((k1 + k2).get_value(tau), K1 + K2)
    assert np.allclose((k3 + k2).get_value(tau), K3 + K2)
    assert np.allclose((k1 + k2 + k3).get_value(tau), K1 + K2 + K3)

    for (a, b), (A, B) in zip(
            product((k1, k2, k3, k1 + k2, k1 + k3, k2 + k3), (k1, k2, k3)),
            product((K1, K2, K3, K1 + K2, K1 + K3, K2 + K3), (K1, K2, K3))):
        assert np.allclose((a * b).get_value(tau), A * B)
Example #14
0
def test_bounds(seed=42):
    bounds = [(-1.0, 0.3), (-2.0, 5.0)]
    kernel = terms.RealTerm(log_a=0.1, log_c=0.5, bounds=bounds)
    b0 = kernel.get_parameter_bounds()
    assert all(np.allclose(a, b) for a, b in zip(b0, bounds))

    kernel = terms.RealTerm(log_a=0.1,
                            log_c=0.5,
                            bounds=dict(zip(["log_a", "log_c"], bounds)))
    assert all(
        np.allclose(a, b) for a, b in zip(b0, kernel.get_parameter_bounds()))


@pytest.mark.parametrize("k", [
    terms.RealTerm(log_a=0.1, log_c=0.5),
    terms.RealTerm(log_a=0.1, log_c=0.5) +
    terms.RealTerm(log_a=-0.1, log_c=0.7),
    terms.ComplexTerm(log_a=0.1, log_c=0.5, log_d=0.1),
    terms.ComplexTerm(log_a=0.1, log_b=-0.2, log_c=0.5, log_d=0.1),
    terms.SHOTerm(log_S0=0.1, log_Q=-1, log_omega0=0.5),
    terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_omega0=0.5),
    terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_omega0=0.5) +
    terms.RealTerm(log_a=0.1, log_c=0.4),
    terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_omega0=0.5) *
    terms.RealTerm(log_a=0.1, log_c=0.4),
])
def test_jacobian(k, eps=1.34e-7):
    if not terms.HAS_AUTOGRAD:
        with pytest.raises(ImportError):
            jac = k.get_coeffs_jacobian()
Example #15
0
def fit_SB1_gp(time, rv, rv_err, emcee=False):
    ###########################################################################
    # The first step will be to iteratively plot starting params until a good
    # initila fit is found.

    # initilse default values
    ###########################################################################
    fs = 0.0
    fc = 0.1
    K1 = 9
    T_tr = 0.0
    P = 1.0
    V0 = 2
    dV0 = 0.0
    A_red_noise = 2
    f_red_noise = 0.2
    happy = 'n'

    e, w = 0, 0

    ###################
    # Static parameters
    ###################
    # Start loop
    plt.ion()
    plt.show()
    while happy == 'n':
        plt.cla()

        #####################
        # Dynamic parameters
        #####################
        K1 = float(input('K1 [{}]:'.format(K1)) or K1)
        e = float(input('e [{}]:'.format(e)) or e)
        w = float(input('w [{}]:'.format(w)) or w)
        fc = np.sqrt(e / (1 + np.tan(w)**2))
        fs = e - fc**2
        T_tr = float(input('T_tr [{}]:'.format(T_tr)) or T_tr)
        P = float(input('P [{}]:'.format(P)) or P)
        V0 = float(input('V0 [{}]:'.format(V0)) or V0)
        dV0 = float(input('dV0 [{}]:'.format(dV0)) or dV0)

        mean_model = _MeanModel(K1=K1,
                                fs=fs,
                                fc=fc,
                                P=P,
                                T0=T_tr,
                                V0=V0,
                                dV0=dV0,
                                A_red_noise=A_red_noise,
                                f_red_noise=f_red_noise,
                                bounds=_bounds)

        plt.errorbar(time, rv, yerr=rv_err, fmt='ko')
        plt.plot(np.linspace(min(time), max(time), 1000),
                 mean_model.get_value(np.linspace(min(time), max(time), 1000)),
                 'r')
        plt.xlabel('Time [d]')
        plt.ylabel('RV [km/s]')
        plt.grid()
        plt.draw()
        plt.pause(0.001)

        happy = input("Are you happy (y/n)? [n] : ") or 'n'

    kernel = terms.RealTerm(log_a=3.0937, log_c=-2.302)
    gp = celerite.GP(kernel, mean=mean_model, fit_mean=True)

    gp.compute(time, rv_err)
    print("\n\nLoglikliehoods\n~~~~~~~~~~~~~~\nInitial log-likelihood: {:.3f}".
          format(gp.log_likelihood(rv)))
    #return gp

    # Make the maximum likelihood prediction
    time_model = np.linspace(min(time), max(time), 1000)
    gp.compute(time_model, yerr=np.interp(time_model, time, rv_err))
    mu, var = gp.predict(np.interp(time_model, time, rv),
                         t=time_model,
                         return_var=True)
    std = np.sqrt(abs(var))
    color = "#ff7f0e"
    plt.close()
    plt.errorbar(time, rv, yerr=rv_err, fmt=".k", capsize=0)
    plt.plot(time_model, mu, color=color)
    plt.fill_between(time_model,
                     mu + std,
                     mu - std,
                     color=color,
                     alpha=0.3,
                     edgecolor="none")
    plt.ylabel(r"$RV [km/s]$")
    plt.xlabel(r"$Time [d]$")
    plt.title("maximum likelihood prediction")
    plt.grid()

    # Define a cost function
    def neg_log_like(params, rv, gp):
        gp.set_parameter_vector(params)
        return -gp.log_likelihood(rv)

    # Fit for the maximum likelihood parameters
    initial_params = gp.get_parameter_vector()
    bounds = gp.get_parameter_bounds()

    gp.compute(time, rv_err)

    soln = minimize(
        neg_log_like,
        initial_params,  #jac=grad_neg_log_like,
        method="L-BFGS-B",
        bounds=bounds,
        args=(rv, gp),
        options={
            'gtol': 1e-6,
            'disp': True
        })
    print("  Final log-likelihood: {:.3f}".format(-soln.fun))
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    for i in range(len(gp.get_parameter_names())):
        print('{}:        {:.3f}'.format(gp.get_parameter_names()[i],
                                         soln.x[i]))

    if emcee:

        def log_probability(params):
            gp.set_parameter_vector(params)
            lp = gp.log_prior()
            if not np.isfinite(lp):
                return -np.inf

            try:
                return gp.log_likelihood(time) + lp
            except:
                return -np.inf

        import emcee

        initial = np.array(soln.x)
        ndim, nwalkers = len(initial), 32
        sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability)

        print("Running burn-in...")
        p0 = initial + 1e-8 * np.random.randn(nwalkers, ndim)
        p0, lp, _ = sampler.run_mcmc(p0, 500)

        import corner
        names = gp.get_parameter_names()
        cols = mean_model.get_parameter_names()
        inds = np.array([names.index("mean:" + k) for k in cols])

        corner.corner(sampler.flatchain[:, inds], truths=initial, labels=cols)
Example #16
0
plot_setup.setup(auto=True)

K = 10
J = np.arange(2, 64, 8)
N = 2**np.arange(6, 16)

alpha_error = np.empty((K, len(J), len(N)))
logdet_error = np.empty((K, len(J), len(N)))
logdet_error[:, :, :] = np.nan

for k in range(K):
    t = np.sort(np.random.uniform(0, N.max() * 0.8, N.max()))
    yerr = np.random.uniform(1.0, 1.5, len(t))

    for ix, j in enumerate(J):
        kernel = terms.RealTerm(np.random.uniform(-1, 1),
                                np.random.uniform(-5, -1))
        kernel += terms.RealTerm(np.random.uniform(-1, 1),
                                 np.random.uniform(-5, -1))
        while (len(kernel.coefficients[0]) + 2 * len(kernel.coefficients[2]) <
               2 * j):
            kernel += terms.SHOTerm(
                log_S0=np.random.uniform(-1, 1),
                log_omega0=np.random.uniform(-5, 0),
                log_Q=np.random.uniform(0, 1),
            )
        kernel += terms.JitterTerm(np.random.uniform(-1, 1))
        assert (len(kernel.coefficients[0]) +
                2 * len(kernel.coefficients[2]) == 2 * j)

        gp = celerite.GP(kernel)
Example #17
0
from celerite import plot_setup
plot_setup.setup()

# Set up the dimensions of the problem
N = 2**np.arange(6, 20)
times = np.empty((len(N), 3))
times[:] = np.nan

# Simulate a "dataset"
np.random.seed(42)
t = np.sort(np.random.rand(np.max(N)))
yerr = np.random.uniform(0.1, 0.2, len(t))
y = np.sin(t)

# Set up the GP model
kernel = terms.RealTerm(1.0, 0.1) + terms.ComplexTerm(0.1, 2.0, 1.6)
gp = GP(kernel)

for i, n in enumerate(N):
    times[i, 0] = benchmark("gp.compute(t[:{0}], yerr[:{0}])".format(n),
                            "from __main__ import gp, t, yerr")

    gp.compute(t[:n], yerr[:n])
    times[i, 1] = benchmark("gp.log_likelihood(y[:{0}])".format(n),
                            "from __main__ import gp, y")

    if n <= 4096:
        times[i, 2] = benchmark(
            """
C = gp.get_matrix(t[:{0}])
C[np.diag_indices_from(C)] += yerr[:{0}]**2
Example #18
0
from celerite2 import terms


@pytest.fixture
def data():
    # Generate fake data
    np.random.seed(40582)
    x = np.sort(np.random.uniform(0, 10, 50))
    t = np.sort(np.random.uniform(-1, 12, 100))
    diag = np.random.uniform(0.1, 0.3, len(x))
    y = np.sin(x)
    return x, diag, y, t


test_terms = [
    cterms.RealTerm(log_a=np.log(2.5), log_c=np.log(1.1123)),
    cterms.RealTerm(log_a=np.log(12.345), log_c=np.log(1.5)) +
    cterms.RealTerm(log_a=np.log(0.5), log_c=np.log(1.1234)),
    cterms.ComplexTerm(log_a=np.log(10.0),
                       log_c=np.log(5.6),
                       log_d=np.log(2.1)),
    cterms.ComplexTerm(
        log_a=np.log(7.435),
        log_b=np.log(0.5),
        log_c=np.log(1.102),
        log_d=np.log(1.05),
    ),
    cterms.SHOTerm(log_S0=np.log(1.1),
                   log_Q=np.log(0.1),
                   log_omega0=np.log(1.2)),
    cterms.SHOTerm(log_S0=np.log(1.1),
def getEclipseTimes(fnames, coords, obsname, myLoc=None):
    '''
    Searches <myLoc> for .log files, and uses them to get the times of the eclipses.

    The technique for this is to make a smoothed plot of the numerical gradient, and look for two mirrored peaks - one
    where the lightcurve enters eclipse (showing as a trough in gradient), and one for egress (showing as a peak in
    gradient). Ideally, they will be mirrors of each other, with the same width and height (though one will be the negative
    of the other).

    A double gaussian is fitted to it using a gaussian process, and the midpoint between their peaks is taken to be the
    eclipse time. To characterise the error of the eclipse time, an MCMC is used to sample the found fit. This is beefy,
    and takes a while, but the Hessian error matrix we were getting out of scipy.optimize was heavily dependant on initial
    conditions, so was untrustworthy.


    Arguments:
    ----------
    coords: str
        The RA and Dec of the stars in the eclipses you're fitting. Note that all data being fitted is assumed to be for
        the same object, hence the RA and Dec used in each log file is the same. i.e., make sure you're not fitting data
        for more than one object at once!
        Note: must be readable by astropy!

    obsname: str
        The observatory name. See coord.EarthLocation.get_site_names() for a list. If a site is not in the registry,
        this string is assumed to be longitude and latitude, and will be attempted again.

    myLoc: str, default None
        The directory to search for eclipses. If None, searches the current working directory.

    Returns:
    --------
    None, but creates a file with eclipse times in it.
    '''
    plt.ion()
    printer("\n\n--- Getting eclipse times from the data ---")

    star = coord.SkyCoord(
        coords,
        frame='icrs',
        unit=(units.hour, units.deg)
    )

    # Where are we working?
    if myLoc == None:
        myLoc = path.curdir
        printer("Defaulting to current directory: {}".format(myLoc))


    # Make the ephemeris dir, if needed
    ephem_dir = path.join(myLoc, "EPHEMERIS")

    # Make the ephemeris directory, where I'll put my stuff
    print("Putting the ephemeris data in {}".format(ephem_dir))
    try:
        mkdir(ephem_dir)
        print("Created the directory!")
    except:
        print("The directory already exists!")

    # Where am I looking for prior data, and saving my new data?
    oname = 'eclipse_times.txt'
    oname = path.join(ephem_dir, oname)



    source_key, tl = read_ecl_file(oname)

    # # What am I using to get new data from?
    # printer("Grabbing log files...")
    # fnames = list(glob.iglob('{}/**/*.log'.format(myLoc), recursive=True))
    # fnames = sorted(fnames)
    #
    # if len(fnames) == 0:
    #     printer("I couldn't find any log files in:")
    #     printer("{}".format(myLoc))
    #     raise FileNotFoundError("No log files in {}".format(myLoc))

    # List the files we found
    printer("Getting eclipse times from these log files: ")
    for i, fname in enumerate(fnames):
        printer("  {:>2d} - {}".format(i, fname))
    printer('  ')

    temp_file = open('eclipse_times.tmp', 'w')

    for lf in fnames:
        printer("  Looking at the file {}".format(lf))
        # lets make the file reading more robust
        log = Hlog.read(lf)
        if log == {}:
            printer("  Failed to get data from Hlog.read function, skipping this file.")
            continue
        aps = log.apnames

        printer("File: {}".format(lf))
        if len(aps['1']) < 2:
            printer("-> Not looking for eclipses in {}, as only one aperture in the file.".format(lf))
            continue

        # Get the first CCD lightcurve, and correct it to the barycentric time
        try:
            inspect = log.tseries('2', '1') / log.tseries('2', aps['1'][1])
        except:
            inspect = log.tseries('1', '1') / log.tseries('1', aps['1'][1])
        printer("Correcting observations from MJD to Barycentric MJD")
        printer("  -> Location: {}".format(obsname))
        printer("  -> Star: {}".format(star))
        inspect_corr = tcorrect(inspect, star, obsname)
        # Discard the first 10 observations, as they're often junk
        inspect_corr = inspect_corr[10:]

        x, y = smooth_derivative(inspect_corr, 9, 5)
        yerr = 0.001*np.ones_like(x)

        fig, ax = plt.subplots(2, figsize=[16,8], sharex=True)
        ax[0].set_title("{}".format(lf))
        ax[0].plot(x, y)

        ax[1].set_title('Lightcurve:')
        inspect_corr.mplot(ax[1])

        gauss = PlotPoints(fig)
        gauss.connect()
        plt.tight_layout()

        plt.show(block=True)

        try:
            lowerlim = gauss.lowerlim
        except:
            lowerlim = x.min()

        try:
            upperlim = gauss.upperlim
        except:
            upperlim = x.max()

        # Apply upper/lower limits
        mask = (x < upperlim) * (x > lowerlim)
        mask = np.where(mask==1)

        y    = y[mask]
        yerr = yerr[mask]
        x    = x[mask]

        if gauss.flag:
            printer("-> No eclipse taken from {}".format(lf))
            continue

        kwargs = gauss.gaussPars()
        # hold values close to initial guesses
        bounds = dict(
            t0=(kwargs['t0']-kwargs['sep']/8, kwargs['t0']+kwargs['sep']/8),
            sep=(0.9*kwargs['sep'], 1.1*kwargs['sep']),
            log_sigma2=(np.log(kwargs['sep']**2/10000), np.log(kwargs['sep']**2/25)),
            peak=(0.9*kwargs['peak'], 1.1*kwargs['peak'])
        )
        kwargs['bounds'] = bounds

        mean_model = TwoGaussians(**kwargs)

        mean, median, std = sigma_clipped_stats(y)
        delta_t = np.mean(np.diff(x))*5
        kernel = terms.RealTerm(log_a=np.log(std**2), log_c=-np.log(delta_t))
        gp = celerite.GP(kernel, mean=mean_model, fit_mean=True)
        gp.compute(x, yerr)
        # print("  Initial log-likelihood: {0}".format(gp.log_likelihood(y)))


        # Fit for the maximum likelihood parameters
        initial_params = gp.get_parameter_vector()
        bounds = gp.get_parameter_bounds()


        # Find a solution using Stu's minimisation method
        soln = minimize(neg_log_like, initial_params, jac=grad_neg_log_like,
                        method="L-BFGS-B", bounds=bounds, args=(y, gp))
        if not soln.success:
            printer('  Warning: may not have converged')
            printer(soln.message)

        print("solution from minimise:")
        print(soln)

        gp.set_parameter_vector(soln.x)
        mean_model.set_parameter_vector(gp.get_parameter_vector()[2:])

        out = soln['x']
        t_ecl = out[2]

        printer("\n\nUsing MCMC to characterise error at peak likelihood...")


        # Use an MCMC model, starting from the solution we found, to model the errors
        ndim     = 6
        nwalkers = 100

        # Initial positions.
        p0 = np.random.rand(ndim * nwalkers).reshape((nwalkers, ndim))
        scatter = 0.0005 # Scatter by ~ 40s in time
        p0 *= scatter
        p0 -= (scatter/2)
        p0 = np.transpose(np.repeat(out, nwalkers).reshape((ndim, nwalkers))) + p0

        try:
            # Construct a sampler
            pool = Pool()
            sampler = emcee.EnsembleSampler(
                nwalkers, ndim,
                log_like,
                args=[y, gp],
                pool=pool
            )

            # Burn in
            width=40
            nsteps = 1000
            for i, result in enumerate(sampler.sample(p0, iterations=nsteps)):
                n = int((width+1) * float(i) / nsteps)
                # print(result[0])
                sys.stdout.write("\r  Sampling...    [{}{}]".format('#'*n, ' '*(width - n)))
            pool.close()
            pos, prob, state = result

            t_ecl = np.mean(sampler.flatchain[:,2])
            err = np.std(sampler.flatchain[:,2])
            sep = np.mean(sampler.flatchain[:,3])

            temp_file.write("{},{},{}".format(float(t_ecl), float(err), lf))
            printer("Got a solution: {:.7f}+/-{:.7f}\n".format(t_ecl, err))

            # Make the maximum likelihood prediction
            mu, var = gp.predict(y, x, return_var=True)
            std = np.sqrt(var)

            # Plot the data
            color = "#ff7f0e"
            plt.close('all')
            fig, ax = plt.subplots(2, 1, sharex=True)
            ax[0].plot(x, y, '.', label='Data')
            ax[0].plot(x, mu, color=color, label='Data GP interpolation')
            ax[0].fill_between(x, mu+std, mu-std, color=color, alpha=0.3, edgecolor="none")
            ax[0].plot(x, mean_model.get_value(x), linestyle='--', color='blue', label='MCMC result')
            # mean_model.set_parameter_vector(soln.x[2:])
            # ax[0].plot(x, mean_model.get_value(x), linestyle='--', color='red', label='scipy result')
            ax[0].axvline(t_ecl, color='magenta', label='Eclipse time')

            inspect_corr.mplot(ax[1])
            ax[1].set_title('Lightcurve')
            ax[1].axvline(t_ecl, color='magenta')
            ax[1].axvline(t_ecl+(sep/2.), color='red')
            ax[1].axvline(t_ecl-(sep/2.), color='red')

            ax[0].set_xlim(x[0], x[-1])
            ax[0].set_title("maximum likelihood prediction - {}".format(lf.split('/')[-1]))
            ax[0].legend()
            plt.tight_layout()
            print("  Plotting fit...")
            plt.show(block=False)

            cont = input("  Save these data? y/n: ")
            if cont.lower() == 'y':
                figname = lf
                figname = figname.replace('/', '_').replace(".log", ".png")
                figname = path.join("EPHEMERIS", figname)
                printer("Saved the ephemeral fit to:\n   {}".format(figname))
                plt.savefig(figname)

                locflag = input("    What is the source of these data: ")

                key = '-1' # This ensures that if source_key is empty, the new data are pushed to index '0'
                for key in source_key:
                    if locflag == source_key[key]:
                        locflag = key
                        break
                if locflag != key:
                    key = str(int(key)+1)
                    source_key[key] = locflag
                    locflag = key
                tl.append(['0', float(t_ecl), float(err), locflag])
                printer("Saved the data: {}".format(['0', float(t_ecl), float(err), locflag]))
            else:
                printer("  Did not store eclipse time from {}.".format(lf))

            plt.close()
            printer("")
        except celerite.solver.LinAlgError:
            printer('  Celerite failed to factorize or solve matrix. This can happen when the data are poorly fitted by the double gaussian!')
            printer("  Skipping this file.")
            input("> ")

    printer("\nDone all the files!")

    write_ecl_file(source_key, tl, oname)
    plt.ioff()

    #TODO:
    # Temporary placeholder. Think about this.
    # - Get the rounded ephemeris fit from the period and T0 supplied?
    # - Might be best to force the user to do this manually, to make it more reliable?
    printer("This string might help:\ncode {}".format(path.abspath(oname)))
    printer("Please open the file, and edit in the eclipse numbers for each one.")
    input("Hit enter when you've done this!")

    remove('eclipse_times.tmp')
Example #20
0
def fit_drw(x,
            y,
            yerr,
            nburn=500,
            nsamp=2000,
            color="#ff7f0e",
            plot=True,
            verbose=True):

    # Sort data
    ind = np.argsort(x)
    x = x[ind]
    y = y[ind]
    yerr = yerr[ind]

    # Model priors
    min_precision = np.min(yerr.value)
    amplitude = np.max(y.value) - np.min(y.value)
    amin = np.log(0.1 * min_precision)
    amax = np.log(10 * amplitude)

    baseline = x[-1] - x[0]
    min_cadence = np.min(np.diff(x.value))
    cmin = np.log(1 / (10 * baseline.value))
    cmax = np.log(1 / min_cadence)

    bounds_drw = dict(log_a=(-15.0, 5.0), log_c=(cmin, cmax))
    kernel = terms.RealTerm(log_a=0,
                            log_c=np.mean([cmin, cmax]),
                            bounds=bounds_drw)

    # Jitter?
    #bounds_jitter = dict(log_sigma=(-25.0, 10.0))
    #kernel_jit = terms.JitterTerm(log_sigma=1.0, bounds=bounds_jitter)

    gp = celerite.GP(kernel, mean=np.mean(y.value), fit_mean=False)
    gp.compute(x.value, yerr.value)
    if verbose:
        print("Initial log-likelihood: {0}".format(gp.log_likelihood(y.value)))

    # Define a cost function
    def neg_log_like(params, y, gp):
        gp.set_parameter_vector(params)
        return -gp.log_likelihood(y)

    def grad_neg_log_like(params, y, gp):
        gp.set_parameter_vector(params)
        return -gp.grad_log_likelihood(y)[1]

    # Fit for the maximum likelihood parameters
    initial_params = gp.get_parameter_vector()
    bounds = gp.get_parameter_bounds()
    soln = minimize(neg_log_like,
                    initial_params,
                    jac=grad_neg_log_like,
                    method="L-BFGS-B",
                    bounds=bounds,
                    args=(y.value, gp))
    gp.set_parameter_vector(soln.x)
    if verbose:
        print("Final log-likelihood: {0}".format(-soln.fun))

    # Make the maximum likelihood prediction
    t = np.linspace(np.min(x.value) - 100, np.max(x.value) + 100, 500)
    mu, var = gp.predict(y.value, t, return_var=True)
    std = np.sqrt(var)

    # Define the log probablity
    def log_probability(params):
        gp.set_parameter_vector(params)
        lp = gp.log_prior()
        if not np.isfinite(lp):
            return -np.inf
        return gp.log_likelihood(y) + lp

    initial = np.array(soln.x)
    ndim, nwalkers = len(initial), 32
    sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability)

    if verbose:
        print("Running burn-in...")
    p0 = initial + 1e-8 * np.random.randn(nwalkers, ndim)
    p0, lp, _ = sampler.run_mcmc(p0, nburn)

    if verbose:
        print("Running production...")
    sampler.reset()
    sampler.run_mcmc(p0, nsamp)

    # Get posterior and uncertianty
    samples = sampler.flatchain
    s = np.median(samples, axis=0)
    gp.set_parameter_vector(s)
    mu, var = gp.predict(y.value, t, return_var=True)
    std = np.sqrt(var)
    # Noise level
    noise_level = 2.0 * np.median(np.diff(x.value)) * np.mean(yerr.value**2)
    log_tau_drw = np.log10(1 / np.exp(samples[:, 1]))

    # Lomb-Scargle periodogram with PSD normalization
    freqLS, powerLS = LombScargle(x, y, yerr).autopower(normalization='psd')
    powerLS /= len(x)
    f = np.logspace(np.log10(np.min(freqLS.value)),
                    np.log10(np.max(freqLS.value)), 1000) / u.day
    # Binned Lomb-Scargle periodogram
    num_bins = 12  #len(freqLS)//100
    f_bin = np.logspace(np.log10(np.min(freqLS.value)),
                        np.log10(np.max(freqLS.value)), num_bins + 1)
    psd_binned = np.empty((num_bins, 3))
    f_bin_center = np.empty(num_bins)
    for i in range(num_bins):
        idx = (freqLS.value >= f_bin[i]) & (freqLS.value < f_bin[i + 1])
        len_idx = len(freqLS.value[idx])
        f_bin_center[i] = np.mean(freqLS.value[idx])  # freq center
        meani = np.mean(powerLS.value[idx])
        stdi = meani / np.sqrt(len_idx)
        psd_binned[i, 0] = meani
        psd_binned[i, 1] = meani + stdi  # hi
        psd_binned[i, 2] = meani - stdi  # lo
        # If below noise, level break
        if meani < noise_level:
            psd_binned = psd_binned[:i + 1, :]
            f_bin_center = f_bin_center[:i + 1]
            break
    # The posterior PSD
    #print(psd_binned[1:, 1])
    #print(psd_binned[1:, 2])
    psd_credint = np.empty((len(f), 3))
    # Compute the PSD and credibility interval at each frequency fi
    # Code adapted from B. C. Kelly carma_pack
    for i, fi in enumerate(f.value):
        omegai = 2 * np.pi * fi  # Convert to angular frequencies
        ai = np.exp(samples[:, 0])
        ci = np.exp(samples[:, 1])
        psd_samples = np.sqrt(2 / np.pi) * ai / ci * (1 + (omegai / ci)**2)**-1
        # Compute credibility interval
        psd_credint[i, 0] = np.percentile(psd_samples, 16, axis=0)
        psd_credint[i, 2] = np.percentile(psd_samples, 84, axis=0)
        psd_credint[i, 1] = np.median(psd_samples, axis=0)
    # Plot
    if plot:
        fig, axs = plt.subplots(1,
                                2,
                                figsize=(15, 5),
                                gridspec_kw={'width_ratios': [1, 1.5]})
        axs[0].set_xlim(np.min(freqLS.value), np.max(freqLS.value))
        axs[0].loglog(freqLS,
                      powerLS,
                      c='grey',
                      lw=2,
                      alpha=0.3,
                      label=r'Lomb$-$Scargle',
                      drawstyle='steps-pre')
        axs[0].fill_between(f_bin_center[1:],
                            psd_binned[1:, 1],
                            psd_binned[1:, 2],
                            alpha=0.8,
                            interpolate=True,
                            label=r'binned Lomb$-$Scargle',
                            color='k',
                            step='mid')
        axs[0].fill_between(f,
                            psd_credint[:, 2],
                            psd_credint[:, 0],
                            alpha=0.3,
                            label='posterior PSD',
                            color=color)
        xlim = axs[0].get_xlim()
        axs[0].hlines(noise_level, xlim[0], xlim[1], color='grey', lw=2)
        axs[0].annotate("Measurement Noise Level",
                        (1.25 * xlim[0], noise_level / 1.9),
                        fontsize=14)
        axs[0].set_ylabel("Power ($\mathrm{ppm}^2 / day^{-1}$)", fontsize=18)
        axs[0].set_xlabel("Frequency (days$^{-1}$)", fontsize=18)
        axs[0].tick_params('both', labelsize=16)
        axs[0].legend(fontsize=16, loc=1)
        axs[0].set_ylim(noise_level / 10.0, 10 * axs[0].get_ylim()[1])

        # Plot light curve prediction
        axs[1].errorbar(x.value,
                        y.value,
                        yerr=yerr.value,
                        c='k',
                        fmt='.',
                        alpha=0.75,
                        elinewidth=1)
        axs[1].fill_between(t,
                            mu + std,
                            mu - std,
                            color=color,
                            alpha=0.3,
                            label='posterior prediction')
        axs[1].set_xlabel('Time (MJD)', fontsize=18)
        axs[1].set_ylabel(r'Magnitude $g$', fontsize=18)
        axs[1].tick_params(labelsize=18)
        axs[1].set_ylim(np.max(y.value) + .1, np.min(y.value) - .1)
        axs[1].set_xlim(np.min(t), np.max(t))
        axs[1].legend(fontsize=16, loc=1)
        fig.tight_layout()

        fig, axs = plt.subplots(1,
                                2,
                                figsize=(15, 5),
                                gridspec_kw={'width_ratios': [1, 1.5]})

        axs[0].hist(log_tau_drw,
                    color=color,
                    alpha=0.8,
                    fill=None,
                    histtype='step',
                    lw=3,
                    bins=50,
                    label=r'posterior distribution')
        ylim = axs[0].get_ylim()
        axs[0].vlines(np.log10(0.2 * baseline.value),
                      ylim[0],
                      ylim[1],
                      color='grey',
                      lw=4)
        axs[0].set_ylim(ylim)
        axs[0].set_xlim(np.min(log_tau_drw), np.max(log_tau_drw))
        axs[0].set_xlabel(r'$\log_{10} \tau_{\rm{DRW}}$', fontsize=18)
        axs[0].set_ylabel('count', fontsize=18)
        axs[0].tick_params(labelsize=18)

        # ACF of sq. res.
        s = np.median(samples, axis=0)
        gp.set_parameter_vector(s)
        mu, var = gp.predict(y.value, x.value, return_var=False)
        res2 = (y.value - mu)**2

        maxlag = 50

        # Plot ACF
        axs[1].acorr(res2 - np.mean(res2), maxlags=maxlag, lw=2)
        # White noise
        wnoise_upper = 1.96 / np.sqrt(len(x))
        wnoise_lower = -1.96 / np.sqrt(len(x))
        axs[1].fill_between([0, maxlag],
                            wnoise_upper,
                            wnoise_lower,
                            facecolor='grey')
        axs[1].set_ylabel(r'ACF $\chi^2$', fontsize=18)
        axs[1].set_xlabel(r'Time Lag [days]', fontsize=18)
        axs[1].set_xlim(0, maxlag)
        axs[1].tick_params('both', labelsize=16)

        fig.tight_layout()
        plt.show()

        # Make corner plot
        # These are natural logs
        #fig = corner.corner(samples, quantiles=[0.16,0.84], show_titles=True,
        #            labels=[r"$\ln\ a$", r"$\ln\ c$"], titlesize=16);
        #for ax in fig.axes:
        #    ax.tick_params('both',labelsize=16)
        #    ax.xaxis.label.set_size(16)
        #    ax.yaxis.label.set_size(16)

        #fig.tight_layout()
        #plt.show()

    # Return the GP model and sample chains
    return gp, samples
Example #21
0
def test_log_likelihood(with_general, seed=42):
    np.random.seed(seed)
    x = np.sort(np.random.rand(10))
    yerr = np.random.uniform(0.1, 0.5, len(x))
    y = np.sin(x)

    if with_general:
        U = np.vander(x - np.mean(x), 4).T
        V = U * np.random.rand(4)[:, None]
        A = np.sum(U * V, axis=0) + 1e-8
    else:
        A = np.empty(0)
        U = np.empty((0, 0))
        V = np.empty((0, 0))

    # Check quiet argument with a non-positive definite kernel.
    class NPDTerm(terms.Term):
        parameter_names = ("par1", )

        def get_real_coefficients(self, params):  # NOQA
            return [params[0]], [0.1]

    gp = GP(NPDTerm(-1.0))
    with pytest.raises(celerite.solver.LinAlgError):
        gp.compute(x, 0.0)
    with pytest.raises(celerite.solver.LinAlgError):
        gp.log_likelihood(y)
    assert np.isinf(gp.log_likelihood(y, quiet=True))
    if terms.HAS_AUTOGRAD:
        assert np.isinf(gp.grad_log_likelihood(y, quiet=True)[0])

    kernel = terms.RealTerm(0.1, 0.5)
    gp = GP(kernel)
    with pytest.raises(RuntimeError):
        gp.log_likelihood(y)

    termlist = [(0.1 + 10. / j, 0.5 + 10. / j) for j in range(1, 4)]
    termlist += [(1.0 + 10. / j, 0.01 + 10. / j, 0.5, 0.01)
                 for j in range(1, 10)]
    termlist += [(0.6, 0.7, 1.0), (0.3, 0.05, 0.5, 0.6)]
    for term in termlist:
        if len(term) > 2:
            kernel += terms.ComplexTerm(*term)
        else:
            kernel += terms.RealTerm(*term)
        gp = GP(kernel)

        assert gp.computed is False

        with pytest.raises(ValueError):
            gp.compute(np.random.rand(len(x)), yerr)

        gp.compute(x, yerr, A=A, U=U, V=V)
        assert gp.computed is True
        assert gp.dirty is False

        ll = gp.log_likelihood(y)
        K = gp.get_matrix(include_diagonal=True)
        ll0 = -0.5 * np.dot(y, np.linalg.solve(K, y))
        ll0 -= 0.5 * np.linalg.slogdet(K)[1]
        ll0 -= 0.5 * len(x) * np.log(2 * np.pi)
        assert np.allclose(ll, ll0)

    # Check that changing the parameters "un-computes" the likelihood.
    gp.set_parameter_vector(gp.get_parameter_vector())
    assert gp.dirty is True
    assert gp.computed is False

    # Check that changing the parameters changes the likelihood.
    gp.compute(x, yerr, A=A, U=U, V=V)
    ll1 = gp.log_likelihood(y)
    params = gp.get_parameter_vector()
    params[0] += 10.0
    gp.set_parameter_vector(params)
    gp.compute(x, yerr, A=A, U=U, V=V)
    ll2 = gp.log_likelihood(y)
    assert not np.allclose(ll1, ll2)

    gp[1] += 10.0
    assert gp.dirty is True
    gp.compute(x, yerr, A=A, U=U, V=V)
    ll3 = gp.log_likelihood(y)
    assert not np.allclose(ll2, ll3)

    # Test zero delta t
    ind = len(x) // 2
    x = np.concatenate((x[:ind], [x[ind]], x[ind:]))
    y = np.concatenate((y[:ind], [y[ind]], y[ind:]))
    yerr = np.concatenate((yerr[:ind], [yerr[ind]], yerr[ind:]))
    gp.compute(x, yerr)
    ll = gp.log_likelihood(y)
    K = gp.get_matrix(include_diagonal=True)
    ll0 = -0.5 * np.dot(y, np.linalg.solve(K, y))
    ll0 -= 0.5 * np.linalg.slogdet(K)[1]
    ll0 -= 0.5 * len(x) * np.log(2 * np.pi)
    assert np.allclose(ll, ll0)
Example #22
0
    fn += "_george"
fn += ".csv"
fn = os.path.join(args.outdir, fn)
print("filename: {0}".format(fn))
with open(fn, "w") as f:
    f.write(header)
print(header, end="")

# Simulate a "dataset"
np.random.seed(42)
t = np.sort(np.random.rand(np.max(N)))
yerr = np.random.uniform(0.1, 0.2, len(t))
y = np.sin(t)

for xi, j in enumerate(J):
    kernel = terms.RealTerm(1.0, 0.1)
    for k in range((2*j - 1) % 2):
        kernel += terms.RealTerm(1.0, 0.1)
    for k in range((2*j - 1) // 2):
        kernel += terms.ComplexTerm(0.1, 2.0, 1.6)
    coeffs = kernel.coefficients
    assert 2*j == len(coeffs[0]) + 2*len(coeffs[2]), "Wrong number of terms"

    if args.george:
        george_kernel = None
        for a, c in zip(*(coeffs[:2])):
            k = CeleriteKernel(a=a, b=0.0, c=c, d=0.0)
            george_kernel = k if george_kernel is None else george_kernel + k
        for a, b, c, d in zip(*(coeffs[2:])):
            k = CeleriteKernel(a=a, b=0.0, c=c, d=0.0)
            george_kernel = k if george_kernel is None else george_kernel + k
Example #23
0
	"27dESin2": kernels.ExpSine2Kernel(2 / 1.3**2, 27.0 / 365.25),
	"RatQ": kernels.RationalQuadraticKernel(0.8, 0.1**2),
	"Mat32": kernels.Matern32Kernel((0.5)**2),
	"Exp": kernels.ExpKernel((0.5)**2),
	# "W": kernels.WhiteKernel,  # deprecated, delegated to `white_noise`
	"B": kernels.ConstantKernel,
}
george_solvers = {
	"basic": george.BasicSolver,
	"HODLR": george.HODLRSolver,
}

celerite_terms = {
	"N": terms.Term(),
	"B": terms.RealTerm(log_a=-6., log_c=-np.inf,
				bounds={"log_a": [-30, 30],
						"log_c": [-np.inf, np.inf]}),
	"W": terms.JitterTerm(log_sigma=-25,
				bounds={"log_sigma": [-30, 30]}),
	"Mat32": terms.Matern32Term(
				log_sigma=1.,
				log_rho=1.,
				bounds={"log_sigma": [-30, 30],
						# The `celerite` version of the Matern-3/2
						# kernel has problems with very large `log_rho`
						# values. -7.4 is empirical.
						"log_rho": [-7.4, 16]}),
	"SHO0": terms.SHOTerm(log_S0=-6, log_Q=1.0 / np.sqrt(2.), log_omega0=0.,
				bounds={"log_S0": [-30, 30],
						"log_Q": [-30, 30],
						"log_omega0": [-30, 30]}),
Example #24
0
    x = np.concatenate((x[:ind], [x[ind]], x[ind:]))
    y = np.concatenate((y[:ind], [y[ind]], y[ind:]))
    yerr = np.concatenate((yerr[:ind], [yerr[ind]], yerr[ind:]))
    gp.compute(x, yerr)
    ll = gp.log_likelihood(y)
    K = gp.get_matrix(include_diagonal=True)
    ll0 = -0.5 * np.dot(y, np.linalg.solve(K, y))
    ll0 -= 0.5 * np.linalg.slogdet(K)[1]
    ll0 -= 0.5 * len(x) * np.log(2*np.pi)
    assert np.allclose(ll, ll0), "face"


@pytest.mark.parametrize(
    "kernel",
    [
        terms.RealTerm(log_a=0.1, log_c=0.5),
        terms.RealTerm(log_a=0.1, log_c=0.5) +
        terms.RealTerm(log_a=-0.1, log_c=0.7),
        terms.ComplexTerm(log_a=0.1, log_c=0.5, log_d=0.1),
        terms.ComplexTerm(log_a=0.1, log_b=-0.2, log_c=0.5, log_d=0.1),
        terms.JitterTerm(log_sigma=0.1),
        terms.SHOTerm(log_S0=0.1, log_Q=-1, log_omega0=0.5) +
        terms.JitterTerm(log_sigma=0.1),
        terms.SHOTerm(log_S0=0.1, log_Q=-1, log_omega0=0.5),
        terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_omega0=0.5),
        terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_omega0=0.5) +
        terms.RealTerm(log_a=0.1, log_c=0.4),
        terms.SHOTerm(log_S0=0.1, log_Q=1.0, log_omega0=0.5) *
        terms.RealTerm(log_a=0.1, log_c=0.4),
    ]
)
Example #25
0
import os
import datetime

outDir = os.path.join(os.getcwd(), 'logL',
                      datetime.datetime.now().strftime('%Y-%m-%d') + '/')
if not os.path.exists(outDir): os.system('mkdir %s' % outDir)
print('We will save this figure in  %s' % outDir)

DirIn = 'DRWtestCeleriteZI/'
files = os.listdir(DirIn)

# Fitting  : each light curve is fit with various settings :

sigma_in = 0.2
tau_in = 100
kernel = terms.RealTerm(log_a=2 * np.log(sigma_in), log_c=np.log(1 / tau_in))

# Make a grid for logL...
# in units of tau / tau_in ,
# sigma / sigma_in
step = 0.01
start = 0.4
stop = 2.5
N = int((stop - start) / step)
grid = np.linspace(start, stop, N)

sigma_grid = grid * sigma_in
tau_grid = grid * tau_in

log_a_grid = 2 * np.log(sigma_grid)
log_c_grid = np.log(1 / tau_grid)
Example #26
0
def test_log_likelihood(seed=42):
    np.random.seed(seed)
    x = np.sort(np.random.rand(10))
    yerr = np.random.uniform(0.1, 0.5, len(x))
    y = np.sin(x)

    kernel = terms.RealTerm(0.1, 0.5)
    gp = GP(kernel)
    with pytest.raises(RuntimeError):
        gp.log_likelihood(y)

    termlist = [(0.1 + 10./j, 0.5 + 10./j) for j in range(1, 4)]
    termlist += [(1.0 + 10./j, 0.01 + 10./j, 0.5, 0.01) for j in range(1, 10)]
    termlist += [(0.6, 0.7, 1.0), (0.3, 0.05, 0.5, 0.6)]
    for term in termlist:
        if len(term) > 2:
            kernel += terms.ComplexTerm(*term)
        else:
            kernel += terms.RealTerm(*term)
        gp = GP(kernel)

        assert gp.computed is False

        with pytest.raises(ValueError):
            gp.compute(np.random.rand(len(x)), yerr)

        gp.compute(x, yerr)
        assert gp.computed is True
        assert gp.dirty is False

        ll = gp.log_likelihood(y)
        K = gp.get_matrix(include_diagonal=True)
        ll0 = -0.5 * np.dot(y, np.linalg.solve(K, y))
        ll0 -= 0.5 * np.linalg.slogdet(K)[1]
        ll0 -= 0.5 * len(x) * np.log(2*np.pi)
        assert np.allclose(ll, ll0)

    # Check that changing the parameters "un-computes" the likelihood.
    gp.set_parameter_vector(gp.get_parameter_vector())
    assert gp.dirty is True
    assert gp.computed is False

    # Check that changing the parameters changes the likelihood.
    gp.compute(x, yerr)
    ll1 = gp.log_likelihood(y)
    params = gp.get_parameter_vector()
    params[0] += 10.0
    gp.set_parameter_vector(params)
    gp.compute(x, yerr)
    ll2 = gp.log_likelihood(y)
    assert not np.allclose(ll1, ll2)

    gp[1] += 10.0
    assert gp.dirty is True
    gp.compute(x, yerr)
    ll3 = gp.log_likelihood(y)
    assert not np.allclose(ll2, ll3)

    # Test zero delta t
    ind = len(x) // 2
    x = np.concatenate((x[:ind], [x[ind]], x[ind:]))
    y = np.concatenate((y[:ind], [y[ind]], y[ind:]))
    yerr = np.concatenate((yerr[:ind], [yerr[ind]], yerr[ind:]))
    gp.compute(x, yerr)
    ll = gp.log_likelihood(y)
    K = gp.get_matrix(include_diagonal=True)
    ll0 = -0.5 * np.dot(y, np.linalg.solve(K, y))
    ll0 -= 0.5 * np.linalg.slogdet(K)[1]
    ll0 -= 0.5 * len(x) * np.log(2*np.pi)
    assert np.allclose(ll, ll0), "face"
Example #27
0
    #gp = george.GP(k_out)
    #should return gp but check for wn
    return k_out


def neo_update_kernel(theta, params):
    gp = george.GP(mean=0.0, fit_mean=False, white_noise=jitt)
    pass


from celerite import terms as cterms

#  2 or sp.log(10.) ?
T = {
    'Constant': 1.**2,
    'RealTerm': cterms.RealTerm(log_a=2., log_c=2.),
    'ComplexTerm': cterms.ComplexTerm(log_a=2., log_b=2., log_c=2., log_d=2.),
    'SHOTerm': cterms.SHOTerm(log_S0=2., log_Q=2., log_omega0=2.),
    'Matern32Term': cterms.Matern32Term(log_sigma=2., log_rho=2.0),
    'JitterTerm': cterms.JitterTerm(log_sigma=2.0)
}


def neo_term(terms):
    t_out = T[terms[0][0]]
    for f in range(len(terms[0])):
        if f == 0:
            pass
        else:
            t_out *= T[terms[0][f]]
Example #28
0
                       tau2=(-50,50), k2=(0,0.2), w2=(-2*np.pi,2*np.pi), e2=(0,0.8))
mean_model = Model(**kwargs)
# mean_model = Model(P1=8., tau1=1., k1=np.std(y)/100, w1=0., e1=0.4, 
#                    P2=100, tau2=1., k2=np.std(y)/100, w2=0., e2=0.4, offset1=0., offset2=0.)


#==============================================================================
# The fit
#==============================================================================
from scipy.optimize import minimize

import celerite
from celerite import terms

# Set up the GP model
kernel = terms.RealTerm(log_a=np.log(np.var(y)), log_c=-np.log(10.0))
gp = celerite.GP(kernel, mean=mean_model, fit_mean=True)
gp.compute(x, yerr)
print("Initial log-likelihood: {0}".format(gp.log_likelihood(y)))

# Define a cost function
def neg_log_like(params, y, gp):
    gp.set_parameter_vector(params)
    return -gp.log_likelihood(y)

# def grad_neg_log_like(params, y, gp):
#     gp.set_parameter_vector(params)
#     return -gp.grad_log_likelihood(y)[1]

# Fit for the maximum likelihood parameters
initial_params = gp.get_parameter_vector()
Example #29
0
#log_cadence_min = None # np.log(2*np.pi/(2./24))
#log_cadence_max = np.log(2*np.pi/(0.25/24))

#bounds = dict(log_S0=(-15, 30), log_Q=(-15, 15),
#              log_omega0=(log_cadence_min, log_cadence_max))

#kernel = terms.SHOTerm(log_S0=log_S0, log_Q=np.log(Q),
#                       log_omega0=log_w0, bounds=bounds)

#kernel.freeze_parameter("log_Q")  # We don't want to fit for "Q" in this term
bounds = dict(log_a=(-30, 30))  #, log_c=(np.log(4), np.log(8)))

log_c_median = 1.98108915

kernel = terms.RealTerm(log_a=3, log_c=log_c_median, bounds=bounds)

gp = celerite.GP(kernel, mean=mean_model, fit_mean=True)
gp.compute(times - original_params.t0, errors)


# Define a cost function
def neg_log_like(params, y, gp):
    gp.set_parameter_vector(params)
    return -gp.log_likelihood(y)


def grad_neg_log_like(params, y, gp):
    gp.set_parameter_vector(params)
    return -gp.grad_log_likelihood(y)[1]
Example #30
0
def getEclipseTimes(coords,
                    obsname,
                    period,
                    T0=None,
                    analyse_new=True,
                    myLoc=None):
    '''
coords  - "ra dec" - string, needs to be in a format that astropy can interpret.
    ra  - Target Right Ascension, in hours
    dec - Target Declination, in degrees
obsname - Observing location. Currently must be the /name/ of the observatory.
period  - Initial guess for the period. Remember - this is a function to REFINE a period, not get one from scratch!! 
T0      - Zero point for ephemeris calculation. If not supplied, then the earliest data point is used.
myLoc   - Working directory.
Searches the current directory for a file containing eclipse times, and fits an ephemeris (T0 and period) to it. 
If <analyse_new> is True, it also seraches <myLoc> for log files, and fits the for an eclipse time. 
The technique for this is to make a smoothed plot of the numerical gradient, and look for two mirrored peaks - one 
where the lightcurve enters eclipse (showing as a trough in gradient), and one for egress (showing as a peak in 
gradient). Ideally, they will be mirrors of each other, with the same width and height (though one will be the negative
of the other). 
A double gaussian is fitted to it using a gaussian process, and the midpoint between their peaks is taken to be the 
eclipse time. To characterise the error of the eclipse time, an MCMC is used to sample the found fit. This is beefy, 
and takes a while, but the Hessian we were getting out of scipy.optimize was heavily dependant on initial conditions,
so was untrustworthy.
'''
    ### VARIABLES ###
    ### ------------------------------------------------- ###

    star = coord.SkyCoord(coords, unit=(u.hour, u.deg))

    # Initial guess
    period = float(period)

    if myLoc == None:
        print("  Defaulting to current directory")
        myLoc = path.curdir

    oname = 'eclipse_times.txt'
    oname = '/'.join([myLoc, oname])
    if not path.isfile(oname):
        print(
            "  Couldn't find previous eclipse times file, '{}'. Creating that file."
            .format(oname))

    ### ------------------------------------------------- ###

    # tecl list
    tl = []
    if path.isfile(oname):
        print("  Found prior eclipses in '{}'. Using these in my fit.".format(
            oname))
        with open(oname, 'r') as f:
            for line in f:
                line = line.split(',')
                line[:2] = [float(x) for x in line[:2]]
                line[2] = line[2].replace('\n', '')
                tl.append(line)
        if analyse_new:
            for t in tl:
                print("  {:.7f}+/-{:.7f} from {}".format(t[0], t[1], t[2]))
    elif not analyse_new:
        print("  I have no eclipse data to analyse. {}".format(
            'No T0, stopping script.' if T0 ==
            None else "Continuing with 'guess' values..."))
        if T0 == None:
            exit()
        return T0, period

    if analyse_new:
        print("  Grabbing log files...")
        fnames = []
        try:
            for filename in listdir('/'.join([myLoc, 'Reduced_Data'])):
                if filename.endswith('.log'):
                    fnames.append('/'.join([myLoc, 'Reduced_Data', filename]))
        except:
            for filename in listdir('/'.join([myLoc])):
                if filename.endswith('.log'):
                    fnames.append('/'.join([myLoc, filename]))

        if len(fnames) == 0:
            print(
                "  I couldn't find any log files! For reference, I searched the following:"
            )
            print("   - {}".format('/'.join([myLoc, 'Reduced_Data'])))
            print("   - {}".format('/'.join([myLoc])))
            exit()
        # List the files we found
        print("  Found these log files: ")
        for i, fname in enumerate(fnames):
            print("  {:2d} - {}".format(i, fname))
        print('  ')

        locflag = input("\n    What is the source of these data: ")

        for lf in fnames:
            # lets make the file reading more robust
            try:
                log = Hlog.from_ascii(lf)
            except Exception:
                log = Hlog.from_ulog(lf)

            # Get the g band lightcurve, and correct it to the barycentric time
            gband = log.tseries('2', '1') / log.tseries('2', '2')
            gband_corr = tcorrect(gband, star, obsname)
            # Discard the first 10 observations, as they're often junk
            gband_corr = gband_corr[10:]

            x, y = smooth_derivative(gband_corr, 9, 5)
            yerr = 0.001 * np.ones_like(x)

            fig, ax = plt.subplots()
            plt.plot(x, y)
            gauss = PlotPoints(fig)
            gauss.connect()
            plt.show()

            if gauss.flag:
                print("  No eclipse taken from these data.")
                continue

            kwargs = gauss.gaussPars()
            # hold values close to initial guesses
            bounds = dict(t0=(kwargs['t0'] - kwargs['sep'] / 8,
                              kwargs['t0'] + kwargs['sep'] / 8),
                          sep=(0.9 * kwargs['sep'], 1.1 * kwargs['sep']),
                          log_sigma2=(np.log(kwargs['sep']**2 / 10000),
                                      np.log(kwargs['sep']**2 / 25)),
                          peak=(0.9 * kwargs['peak'], 1.1 * kwargs['peak']))
            kwargs['bounds'] = bounds

            mean_model = TwoGaussians(**kwargs)

            mean, median, std = sigma_clipped_stats(y)
            delta_t = np.mean(np.diff(x)) * 5
            kernel = terms.RealTerm(log_a=np.log(std**2),
                                    log_c=-np.log(delta_t))
            gp = celerite.GP(kernel, mean=mean_model, fit_mean=True)
            gp.compute(x, yerr)
            # print("  Initial log-likelihood: {0}".format(gp.log_likelihood(y)))

            # Fit for the maximum likelihood parameters
            initial_params = gp.get_parameter_vector()
            bounds = gp.get_parameter_bounds()

            # Find a solution using Stu's method
            soln = minimize(neg_log_like,
                            initial_params,
                            jac=grad_neg_log_like,
                            method="L-BFGS-B",
                            bounds=bounds,
                            args=(y, gp))
            if not soln.success:
                print('  Warning: may not have converged')
                print(soln.message)

            gp.set_parameter_vector(soln.x)
            mean_model.set_parameter_vector(gp.get_parameter_vector()[2:])

            out = soln['x']
            t_ecl = out[2]

            print("  Using MCMC to characterise error at peak likelihood...")

            # Use an MCMC model, starting from the solution we found, to model the errors
            ndim = 6
            nwalkers = 50

            # Initial positions. Scatter by 0.00001, as this is one above the order of magnitude of the error
            #  we expect on t_ecl.
            p0 = np.random.rand(ndim * nwalkers).reshape((nwalkers, ndim))
            scatter = 0.0001 / t_ecl
            p0 *= scatter
            p0 += 1. - scatter
            p0 = np.transpose(
                np.repeat(out, nwalkers).reshape((ndim, nwalkers))) * p0

            # Construct a sampler
            sampler = emcee.EnsembleSampler(nwalkers,
                                            ndim,
                                            log_like,
                                            args=[y, gp],
                                            threads=1)

            width = 30

            # Burn in
            print("")
            nsteps = 200
            start_time = time.time()
            for i, result in enumerate(sampler.sample(p0, iterations=nsteps)):
                n = int((width + 1) * float(i) / nsteps)
                sys.stdout.write("\r  Burning in...    [{}{}]".format(
                    '#' * n, ' ' * (width - n)))
            pos, prob, state = result

            # Data
            sampler.reset()
            nsteps = 300

            start_time = time.time()
            for i, result in enumerate(sampler.sample(pos, iterations=nsteps)):
                n = int((width + 1) * float(i) / nsteps)
                sys.stdout.write("\r  Sampling data... [{}{}]".format(
                    '#' * n, ' ' * (width - n)))
            print("")

            # corner.corner(sampler.flatchain, labels=['???', '???', 't_ecl', '???', 'a', 'b'])
            # plt.show()

            t_ecl = np.mean(sampler.flatchain[:, 2])
            err = np.std(sampler.flatchain[:, 2])
            sep = np.mean(sampler.flatchain[:, 3])

            print("    Got a solution: {:.7f}+/-{:.7f}\n".format(t_ecl, err))

            # print("  Got a Jacobian,\n {}".format(soln['jac']))
            # print("  Got a Hessian,\n {}".format(soln['hess_inv'].todense()))
            # print(f"Final log-liklihood: {(soln.fun)}")

            tl.append([float(t_ecl), float(err), locflag])

            # Make the maximum likelihood prediction
            mu, var = gp.predict(y, x, return_var=True)
            std = np.sqrt(var)

            # Plot the data
            color = "#ff7f0e"
            plt.plot(x, y, '.')
            plt.plot(x, mu, color=color)
            plt.fill_between(x,
                             mu + std,
                             mu - std,
                             color=color,
                             alpha=0.3,
                             edgecolor="none")
            plt.plot(x, mean_model.get_value(x), 'k-')
            plt.axvline(t_ecl, color='magenta')
            plt.xlim(xmin=t_ecl - (1 * sep), xmax=t_ecl + (1 * sep))

            plt.title("maximum likelihood prediction - {}".format(
                lf.split('/')[-1]))
            plt.show()
        print("  \nDone all the files!")

    # Collect the times
    ts = np.array([x[0] for x in tl])
    t_err = np.array([x[1] for x in tl])

    # data T0, if no T0 is given to us
    if T0 == None:
        print("  No prior T0, using first eclipse in data.")
        T0 = np.min(ts)

    print("  Fitting these eclipse times:")
    for t in tl:
        print("  {:.7f}+/-{:.7f} from {}".format(t[0], t[1], t[2]))
    print("\nStarting from an initial ephem of T0: {}, P: {}".format(
        T0, period))

    def test(params, data):
        # Gets the eclipse number.

        # Extract the params
        T = params[0]
        period = params[1]

        # How far are we from the predicted eclipse time
        comp = ((data - T) / period)

        return comp

    def errFunc(p, t, t_e):
        # Ideal E is an integer
        E = np.round((t - T0) / period)

        diffs = (test(p, t) - E) / t_e

        return diffs

    out = leastsq(errFunc, [T0, period], args=(ts, t_err), full_output=1)

    pfinal = out[0]
    covar = out[1]

    P, P_err = pfinal[1], np.sqrt(covar[1][1])
    T0, T0_err = pfinal[0], np.sqrt(covar[0][0])

    print("  Got a T0 of {:.10f}+/-{:.2e}".format(T0, T0_err))
    print("  Got a period of {:.10f}+/-{:.2e}".format(P, P_err))

    print("        T - T0           | #phases from predicted")
    with open(oname, 'w') as f:
        for datum in tl:
            t = datum[0]
            t_e = datum[1]
            source = datum[2]

            dt = ((t - T0) / P) % 1  # residual
            if dt > 0.5:
                dt -= 1

            print("    {: 10.6f}+/-{:-9.6f} | {: 9.6f}".
                  format(  # Make this handle errors properly
                      (t), t_e, dt))
            f.write("{}, {},{}\n".format(t, t_e, source))
    print("Wrote eclipse data to {}".format(oname))