Exemple #1
0
    plt.subplot(3, 5, 6 + i)
    plt.plot(
        B.concat(-t[::-1][:-1], t),
        B.concat(k[::-1][:-1], k),
        lw=1,
    )
    if hasattr(model, "t_u"):
        plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black")
    # plt.xlabel("Time (s)")
    if i == 0:
        plt.ylabel("$k_{f\,|\,h}$")
    plt.xlim(-6, 6)
    plt.ylim(-0.35, 1.2)
    tweak(legend=False)

    plt.subplot(3, 5, 11 + i)
    plt.plot(t, f, lw=1)
    if hasattr(model, "t_z"):
        plt.scatter(model.t_z, model.t_z * 0, s=5, marker="o", c="black")
    # plt.xlabel("Time (s)")
    if i == 0:
        plt.ylabel("$f$")
    plt.xlim(0, 8)
    tweak(legend=False)

plt.tight_layout()
plt.savefig(wd.file("interpolation.pdf"))
pdfcrop(wd.file("interpolation.pdf"))
plt.show()
Exemple #2
0
    means, lowers, uppers = \
        model.predict(x, num_samples=200, credible_bounds=True, latent=False)

    # For the purpose of comparison, standardise using the mean of the
    # *training* data. This is not how the SMSE usually is defined!
    pred = pd.DataFrame(means, index=train.index, columns=train.columns)
    smse = ((pred - test) ** 2).mean(axis=0) / \
           ((train.mean(axis=0) - test) ** 2).mean(axis=0)

    # Report average SMSE.
    wbml.out.kv('SMSEs', smse.dropna())
    wbml.out.kv('Average SMSE', smse.mean())

    # Plot the result.
    plt.figure(figsize=(15, 3))
    wbml.plot.tex()

    for i, name in enumerate(test.columns):
        p = list(train.columns).index(name)  # Index of output.
        plt.subplot(1, 3, i + 1)
        plt.plot(x, means[:, p], style='pred')
        plt.fill_between(x, lowers[:, p], uppers[:, p], style='pred')
        plt.scatter(x, y[:, p], style='train')
        plt.scatter(test[name].index, test[name], style='test')
        plt.xlabel('Time (year)')
        plt.ylabel(name)
        wbml.plot.tweak(legend=False)

    plt.tight_layout()
    plt.savefig(wd.file('exchange.pdf'))
Exemple #3
0
                          normalise_y=True)
    model.fit(x, y)
    means, lowers, uppers = \
        model.predict(x, num_samples=100, credible_bounds=True, latent=True)

    # Report SMSE.
    pred = pd.DataFrame(means, index=train.index, columns=train.columns)
    smse = wbml.metric.smse(pred, test)
    wbml.out.kv('SMSEs', smse.dropna())
    wbml.out.kv('Average SMSEs', smse.mean())

    # Name of output to plot.
    name = 'F2'

    # Plot the result.
    plt.figure(figsize=(12, 1.75))
    wbml.plot.tex()

    p = list(train.columns).index(name)
    plt.plot(x, means[:, p], style='pred')
    plt.fill_between(x, lowers[:, p], uppers[:, p], style='pred')
    plt.scatter(x, y[:, p], style='train')
    plt.scatter(test[name].index, test[name], style='test')
    plt.xlabel('Time (second)')
    plt.xlim(0.4, 1)
    plt.ylabel(f'{name} (volt)')
    wbml.plot.tweak(legend=False)

    plt.tight_layout()
    plt.savefig(wd.file('eeg.pdf'))
Exemple #4
0
scale = 5
n_u = 60
n_z = len(t)

# Setup, fit, and save model.
model = RGPCM(
    window=window,
    scale=scale,
    noise=0.05,
    n_u=n_u,
    n_z=n_z,
    t=t,
)
if args.train:
    model.fit(t, y, iters=50_000, rate=2e-2, optimise_hypers=20_000)
    model.save(wd.file("model.pickle"))
else:
    model.load(wd.file("model.pickle"))

# Make and save predictions.
if args.predict:
    posterior = model.condition(t, y)
    pred_f = (t, ) + posterior.predict(t)
    pred_psd = posterior.predict_psd()
    pred_psd = (
        pred_psd.x,
        pred_psd.mean,
        pred_psd.err_95_lower,
        pred_psd.err_95_upper,
        pred_psd.all_samples,
    )
Exemple #5
0
plt.figure(figsize=(12, 3))

plt.subplot(1, 3, 1)
for n in [100, 200, 300]:
    plt.plot(sorted(totals[n].keys()), get(totals[n], 0), "-o", label=f"$n={n}$")
plt.xlim(0, 250)
plt.xlabel("Number of latent processes $m$")
plt.ylabel("Total time (s)")
wbml.plot.tweak(legend=True, legend_loc="upper left")

plt.subplot(1, 3, 2)
for n in [100, 200, 300]:
    plt.plot(sorted(hs[n].keys()), get(hs[n], 0) * 1e3, "-o", label=f"$n={n}$")
plt.xlim(0, 250)
plt.xlabel("Number of latent processes $m$")
plt.ylabel("Time spent on basis (ms)")
wbml.plot.tweak(legend=True, legend_loc="upper left")

plt.subplot(1, 3, 3)
for n in [100, 200, 300]:
    plt.plot(sorted(percs[n].keys()), get(percs[n], 0), "-o", label=f"$n={n}$")
plt.xlim(0, 250)
plt.xlabel("Number of latent processes $m$")
plt.ylabel("Time spent on basis (\\%)")
wbml.plot.tweak(legend=True, legend_loc="upper left")

plt.savefig(wd.file("timing_h.pdf"))
wbml.plot.pdfcrop(wd.file("timing_h.pdf"))

plt.show()
Exemple #6
0
                        markov=0,
                        normalise_y=False)
    igp.fit(x_obs, y_obs)
    igp_means, igp_lowers, igp_uppers = \
        igp.predict(x, num_samples=100, credible_bounds=True, latent=True)

    # Plot the result.
    plt.figure(figsize=(15, 3))

    for i in range(3):
        plt.subplot(1, 3, i + 1)

        # Plot observations.
        plt.scatter(x_obs, y_obs[:, i], label='Observations', style='train')
        plt.plot(x, f[:, i], label='Truth', style='test')

        # Plot GPAR.
        plt.plot(x, means[:, i], label='GPAR', style='pred')
        plt.fill_between(x, lowers[:, i], uppers[:, i], style='pred')

        # Plot independent GPs.
        plt.plot(x, igp_means[:, i], label='IGP', style='pred2')
        plt.fill_between(x, igp_lowers[:, i], igp_uppers[:, i], style='pred2')

        plt.xlabel('$t$')
        plt.ylabel(f'$y_{i + 1}$')
        wbml.plot.tweak(legend=i == 2)

    plt.tight_layout()
    plt.savefig(wd.file('synthetic.pdf'))
Exemple #7
0
    igp.fit(x_obs, y_obs)
    igp_means, igp_lowers, igp_uppers = igp.predict(x,
                                                    num_samples=200,
                                                    credible_bounds=True,
                                                    latent=True)

    # Plot the result.
    plt.figure(figsize=(15, 3))

    for i in range(3):
        plt.subplot(1, 3, i + 1)

        # Plot observations.
        plt.scatter(x_obs, y_obs[:, i], label="Observations", style="train")
        plt.plot(x, f[:, i], label="Truth", style="test")

        # Plot GPAR.
        plt.plot(x, means[:, i], label="GPAR", style="pred")
        plt.fill_between(x, lowers[:, i], uppers[:, i], style="pred")

        # Plot independent GPs.
        plt.plot(x, igp_means[:, i], label="IGP", style="pred2")
        plt.fill_between(x, igp_lowers[:, i], igp_uppers[:, i], style="pred2")

        plt.xlabel("$t$")
        plt.ylabel(f"$y_{i + 1}$")
        wbml.plot.tweak(legend=i == 2)

    plt.tight_layout()
    plt.savefig(wd.file("synthetic.pdf"))
Exemple #8
0
    ]:
        with wbml.out.Section(name):
            model = model.condition(torch.tensor(x), torch.tensor(y_norm))
            x_test = np.array(test.index)
            y_test = np.array(test.reindex(train.columns, axis=1))
            logprob = model.logpdf(torch.tensor(x_test),
                                   torch.tensor(normaliser.normalise(y_test)))
            logdet = normaliser.normalise_logdet(y_test)
            pplp = logprob + logdet
            wbml.out.kv("PPLP", pplp / B.length(y_test))

    # Name of output to plot.
    name = "F2"

    # Plot the result.
    plt.figure(figsize=(12, 1.75))
    wbml.plot.tex()

    p = list(train.columns).index(name)
    plt.plot(x, means[:, p], style="pred")
    plt.fill_between(x, lowers[:, p], uppers[:, p], style="pred")
    plt.scatter(x, y[:, p], style="train")
    plt.scatter(test[name].index, test[name], style="test")
    plt.xlabel("Time (second)")
    plt.xlim(0.4, 1)
    plt.ylabel(f"{name} (volt)")
    wbml.plot.tweak(legend=False)

    plt.tight_layout()
    plt.savefig(wd.file("eeg.pdf"))
Exemple #9
0
        scale=scale,
        noise=0.05,
        n_u=n_u,
        n_z=n_z,
        t=t,
    ) for Model in [GPCM, RGPCM, CGPCM]
]
if args.train:
    for model in models:
        # The year 2014 is numerically iffy, so we train with a lower
        # learning rate to prevent the loss from NaNing out.
        if args.year == 2014:
            model.fit(t_train, y_train, rate=2e-2, iters=20_000)
        else:
            model.fit(t_train, y_train, iters=20_000)
        model.save(wd.file(model.name.lower(), "model.pickle"))
else:
    for model in models:
        model.load(wd.file(model.name.lower(), "model.pickle"))

# Make and save predictions.
if args.predict:
    for model in models:
        # Perform predictions.
        posterior = model.condition(t_train, y_train)
        pred_f = (t_pred, ) + normaliser.untransform(posterior.predict(t_pred))
        pred_f_test = (t_test, ) + normaliser.untransform(
            posterior.predict(t_test))
        pred_k = posterior.predict_kernel()
        # Carefully untransform kernel prediction.
        pred_k = (
Exemple #10
0
    plt.subplot(3, 4, 3 + 4 * i)
    plt.plot(t, np.stack(ks).T, lw=1)
    plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black")
    plt.title("Kernel")
    if i == 2:
        plt.xlabel("Lag (s)")
    plt.xlim(0, 6)
    tweak(legend=False)

    # Estimate PSD.
    freqs, psds = zip(*[estimate_psd(t, k, db=True) for k in ks])
    freqs = freqs[0]
    psds = np.stack(psds).T

    plt.subplot(3, 4, 4 + 4 * i)
    plt.title("PSD (dB)")
    inds = np.arange(int(len(freqs) / 2))
    inds = inds[freqs[inds] <= 2]
    plt.plot(freqs[inds], psds[inds, :], lw=1)
    if i == 2:
        plt.xlabel("Frequency (Hz)")
    plt.xlim(0, 2)
    plt.ylim(-40, 10)
    tweak(legend=False)

plt.tight_layout()
plt.savefig(wd.file("sample.pdf"))
pdfcrop(wd.file("sample.pdf"))
plt.show()
Exemple #11
0
t_s, elbo_s = tracker_s.get_xy(start=0)

# Double check that there isn't a huge delay between the first two times e.g. due to
# JIT compilation.
assert t_mf[1] < 1
assert t_cmf[1] < 1
assert t_ca[1] < 1
assert t_s[1] < 1

plt.figure(figsize=(5, 4))
plt.axhline(y=gp_logpdf, ls="--", c="black", lw=1, label="GP")
plt.plot(
    # These times should line up exactly, but they might not due to natural variability,
    # in runtimes. Force them to line up exactly by scaling the times.
    t_s / max(t_s) * max(t_ca),
    elbo_s,
    label="Structured",
)
plt.plot(t_ca, elbo_ca, label="CA")
plt.plot(t_cmf, np.maximum.accumulate(elbo_cmf), label="Collapsed MF")
plt.plot(t_mf, np.maximum.accumulate(elbo_mf), label="MF")
plt.xlabel("Time (s)")
plt.ylabel("ELBO")
plt.ylim(-900, -550)
# Round to the nearest five seconds.
plt.xlim(0, 5 * (max(max(t_mf), max(t_cmf), max(t_s)) // 5 + 1))
tweak(legend_loc="lower right")
plt.savefig(wd.file("elbos.pdf"))
pdfcrop(wd.file("elbos.pdf"))
plt.show()
Exemple #12
0
)
plt.plot(t, err_95_upper, style="pred", lw=1)
plt.plot(t, err_95_lower, style="pred", lw=1)

t, mean, var, err_95_lower, err_95_upper = psd_pred_mf
inds = t <= 1
t = t[inds]
mean = mean[inds]
err_95_lower = err_95_lower[inds]
err_95_upper = err_95_upper[inds]
plt.plot(t, mean, label="Mean-field", style="pred2")
plt.fill_between(
    t,
    err_95_lower,
    err_95_upper,
    style="pred2",
)
plt.plot(t, err_95_upper, style="pred2", lw=1)
plt.plot(t, err_95_lower, style="pred2", lw=1)

plt.xlabel("Frequency (Hz)")
plt.ylabel("Spectral density (dB)")
plt.title("PSD")
plt.xlim(0, 1)
plt.ylim(-20, 10)
tweak(legend=True)

plt.savefig(wd.file("smk.pdf"))
pdfcrop(wd.file("smk.pdf"))
plt.show()
Exemple #13
0
            plt.plot(
                x_proj[-args.n_plot :], y_proj[i_lat][-args.n_plot :], style="train"
            )
            plt.plot(x_proj[-args.n_plot :], mean[-args.n_plot :], style="pred")
            plt.fill_between(
                x_proj[-args.n_plot :],
                lower[-args.n_plot :],
                upper[-args.n_plot :],
                style="pred",
            )
            wbml.plot.tweak(legend=False)

            plt.gca().tick_params(labelsize=10)
    plt.tight_layout()
    plt.savefig(
        wd.file(f"simulators_latents{suffix}.pdf"), format="pdf", bbox_inches="tight"
    )

    # Plot predictions of all latent processes.
    plt.figure(figsize=(25, 8))
    for i_r in range(10):
        for i_s in range(5):
            plt.subplot(5, 10, i_r + i_s * 10 + 1)
            if i_s == 0:
                plt.title(f"$i_r={i_r + 1}$", fontsize=12)
            if i_r == 0:
                plt.ylabel(f"$i_s={i_s + 1}$", fontsize=12)
            if i_s == 5:
                plt.xlabel("Day", fontsize=10)
            i_lat = i_r + i_s * m_r
Exemple #14
0
        # Extract test set and extended test set.
        d_test, d_test_ext = d_tests[i], d_tests[i + 2]
        x = list(map(date_to_day, d_test.index))
        x_ext = list(map(date_to_day, d_test_ext.index))

        # Plot prediction.
        y_i = list(d_train.columns).index(d_test.columns[0])
        plt.plot(x_ext, mean[:, y_i], style='pred')
        plt.fill_between(x_ext, lowers[:, y_i], uppers[:, y_i], style='pred')

        # Plot data.
        plt.scatter(x_ext, d_test_ext, style='train', s=4)
        plt.scatter(x, d_test, style='test', s=4)

        # Finalise plot.
        plt.xlim(x_ext[0], x_ext[-1])
        plt.ylim(10, 30)
        plt.yticks([15, 20, 25])
        if i == 0:
            plt.title(lookup_size[d_size] + f' (SMSE: {smse:.3f})')
        if i == 1:
            plt.xlabel('Time (day)')
        if d_size == 0:
            plt.ylabel(f'{lookup_place[d_test.columns[0]]}\nTemp. (celsius)')

        wbml.plot.tweak(legend=False)

plt.tight_layout()
plt.savefig(wd.file('air_temp.pdf'))
Exemple #15
0
    plt.subplot(1, 6, 4 + i)

    def apply_to_psd(f):
        raw = 10**(psds / 10)
        return 10 * B.log(f(raw)) / B.log(10)

    for q in [1, 5, 10, 20, 30, 40]:
        plt.fill_between(
            freqs,
            apply_to_psd(lambda x: B.quantile(x, q / 100, axis=1)),
            apply_to_psd(lambda x: B.quantile(x, 1 - q / 100, axis=1)),
            facecolor="tab:blue",
            alpha=0.2,
        )
    # Careful: take the mean in PSD space!
    plt.plot(
        freqs,
        apply_to_psd(lambda x: B.mean(x, axis=1)),
        c="black",
    )
    plt.title(model.name + " (PSD)")
    plt.xlabel("Frequency (Hz)")
    plt.xlim(-3, 3)
    plt.ylim(-30, 5)
    tweak(legend=False)

plt.tight_layout()
plt.savefig(wd.file("priors.pdf"))
pdfcrop(wd.file("priors.pdf"))
plt.show()
Exemple #16
0
    plt.fill_between(
        t,
        mean2 - 1.96 * np.sqrt(var2),
        mean2 + 1.96 * np.sqrt(var2),
        style="pred2",
    )
    plt.plot(t, mean2 + 1.96 * np.sqrt(var2), style="pred2", lw=1)
    plt.plot(t, mean2 - 1.96 * np.sqrt(var2), style="pred2", lw=1)
    plt.yticks([0, 0.5, 1])
    plt.xticks([0, 2, 4])
    plt.xlim(0, 4)
    plt.ylim(-0.25, 1.25)
    if not first:
        plt.gca().set_yticklabels([])
    tweak(legend=legend)


plt.figure(figsize=(7.5, 3))
plt.subplot(1, 3, 1)
plt.title("GPCM on EQ")
plot_kernel_predictions("gpcm", "eq", legend=False, first=True)
plt.subplot(1, 3, 2)
plt.title("CGPCM on CEQ")
plot_kernel_predictions("cgpcm", "ceq-1", legend=False)
plt.subplot(1, 3, 3)
plt.title("RGPCM on Matern–$\\frac{1}{2}$")
plot_kernel_predictions("rgpcm", "matern12")
plt.savefig(wd.file("comparison.pdf"))
pdfcrop(wd.file("comparison.pdf"))
plt.show()
Exemple #17
0
plt.text(243,
         -1.25,
         "$m=p$",
         horizontalalignment="right",
         verticalalignment="center")
# plt.gca().set_xscale('log')
plt.plot([0, 300], [igp_pplp, igp_pplp],
         "-",
         lw=1.5,
         c="tab:orange",
         label="Independent GPs")
wbml.plot.tweak(legend=True, legend_loc="center")
plt.xlim(0, 250)
plt.xlabel("Number of latent processes $m$")
plt.ylabel("PPLP of held-out data")
plt.savefig(wd.file("temperature_pplp.pdf"))
wbml.plot.pdfcrop(wd.file("temperature_pplp.pdf"))

plt.figure(figsize=(5.5, 3))
plt.axvline(x=247, ymin=0, ymax=1, ls="--", c="black", lw=1)
plt.plot(ms, oilmm_rmses, "o-", lw=1.5, c="tab:blue", label="OILMM")
plt.text(243,
         2.075,
         "$m=p$",
         horizontalalignment="right",
         verticalalignment="center")
# plt.gca().set_xscale('log')
plt.plot([0, 300], [igp_rmse, igp_rmse],
         "-",
         lw=1.5,
         c="tab:orange",
Exemple #18
0
    f, y = gp_f.measure.sample(gp_f(t), gp_y(t))
    f, y = B.flatten(f), B.flatten(y)
    wd.save(
        {
            "t": t,
            "f": f,
            "k": B.flatten(kernel(t_k, 0)),
            "y": y,
            "true_logpdf": gp_y(t).logpdf(y),
        },
        slugify(str(kernel)),
        "data.pickle",
    )

    for scheme in ["mean-field", "structured"]:
        model = model_constructor(scheme)
        prefix = (slugify(str(kernel)), scheme, slugify(model.name))

        # Fit model and predict function and kernel.
        model.fit(t, y, iters=10_000)
        elbo = model.elbo(t, y)
        posterior = model.condition(t, y)
        f_pred = posterior.predict(t)
        k_pred = posterior.predict_kernel(t_k)

        # Save stuff.
        model.save(wd.file(*prefix, "model.pickle"))
        wd.save(elbo, *prefix, "elbo.pickle")
        wd.save((t,) + f_pred, *prefix, "f_pred.pickle")
        wd.save((k_pred.x, k_pred.mean, k_pred.var), *prefix, "k_pred.pickle")
Exemple #19
0
    # Report average SMSE.
    wbml.out.kv("SMSEs", smse.dropna())
    wbml.out.kv("Average SMSE", smse.mean())

    # Compute PPLP.
    x_test = np.array(test.index)
    y_test = np.array(test.reindex(train.columns, axis=1))
    logprob = model.logpdf(torch.tensor(x_test),
                           torch.tensor(normaliser.normalise(y_test)))
    logdet = normaliser.normalise_logdet(y_test)
    pplp = logprob + logdet
    wbml.out.kv("PPLP", pplp / B.length(y_test))

    # Plot the result.
    plt.figure(figsize=(12, 2))
    wbml.plot.tex()

    for i, name in enumerate(test.columns):
        p = list(train.columns).index(name)  # Index of output.
        plt.subplot(1, 3, i + 1)
        plt.plot(x, means[:, p], style="pred")
        plt.fill_between(x, lowers[:, p], uppers[:, p], style="pred")
        plt.scatter(x, y[:, p], style="train")
        plt.scatter(test[name].index, test[name], style="test")
        plt.xlabel("Time (year)")
        plt.ylabel(name)
        wbml.plot.tweak(legend=False)

    plt.tight_layout()
    plt.savefig(wd.file("exchange.pdf"))
Exemple #20
0
        # Extract test set and extended test set.
        d_test, d_test_ext = d_tests[i], d_tests[i + 2]
        x = list(map(date_to_day, d_test.index))
        x_ext = list(map(date_to_day, d_test_ext.index))

        # Plot prediction.
        y_i = list(d_train.columns).index(d_test.columns[0])
        plt.plot(x_ext, mean[:, y_i], style="pred")
        plt.fill_between(x_ext, lowers[:, y_i], uppers[:, y_i], style="pred")

        # Plot data.
        plt.scatter(x_ext, d_test_ext, style="train", s=4)
        plt.scatter(x, d_test, style="test", s=4)

        # Finalise plot.
        plt.xlim(x_ext[0], x_ext[-1])
        plt.ylim(10, 30)
        plt.yticks([15, 20, 25])
        if i == 0:
            plt.title(lookup_size[d_size] + f" (SMSE: {smse:.3f})")
        if i == 1:
            plt.xlabel("Time (day)")
        if d_size == 0:
            plt.ylabel(f"{lookup_place[d_test.columns[0]]}\nTemp. (celsius)")

        wbml.plot.tweak(legend=False)

plt.tight_layout()
plt.savefig(wd.file("air_temp.pdf"))