plt.subplot(3, 5, 6 + i) plt.plot( B.concat(-t[::-1][:-1], t), B.concat(k[::-1][:-1], k), lw=1, ) if hasattr(model, "t_u"): plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black") # plt.xlabel("Time (s)") if i == 0: plt.ylabel("$k_{f\,|\,h}$") plt.xlim(-6, 6) plt.ylim(-0.35, 1.2) tweak(legend=False) plt.subplot(3, 5, 11 + i) plt.plot(t, f, lw=1) if hasattr(model, "t_z"): plt.scatter(model.t_z, model.t_z * 0, s=5, marker="o", c="black") # plt.xlabel("Time (s)") if i == 0: plt.ylabel("$f$") plt.xlim(0, 8) tweak(legend=False) plt.tight_layout() plt.savefig(wd.file("interpolation.pdf")) pdfcrop(wd.file("interpolation.pdf")) plt.show()
means, lowers, uppers = \ model.predict(x, num_samples=200, credible_bounds=True, latent=False) # For the purpose of comparison, standardise using the mean of the # *training* data. This is not how the SMSE usually is defined! pred = pd.DataFrame(means, index=train.index, columns=train.columns) smse = ((pred - test) ** 2).mean(axis=0) / \ ((train.mean(axis=0) - test) ** 2).mean(axis=0) # Report average SMSE. wbml.out.kv('SMSEs', smse.dropna()) wbml.out.kv('Average SMSE', smse.mean()) # Plot the result. plt.figure(figsize=(15, 3)) wbml.plot.tex() for i, name in enumerate(test.columns): p = list(train.columns).index(name) # Index of output. plt.subplot(1, 3, i + 1) plt.plot(x, means[:, p], style='pred') plt.fill_between(x, lowers[:, p], uppers[:, p], style='pred') plt.scatter(x, y[:, p], style='train') plt.scatter(test[name].index, test[name], style='test') plt.xlabel('Time (year)') plt.ylabel(name) wbml.plot.tweak(legend=False) plt.tight_layout() plt.savefig(wd.file('exchange.pdf'))
normalise_y=True) model.fit(x, y) means, lowers, uppers = \ model.predict(x, num_samples=100, credible_bounds=True, latent=True) # Report SMSE. pred = pd.DataFrame(means, index=train.index, columns=train.columns) smse = wbml.metric.smse(pred, test) wbml.out.kv('SMSEs', smse.dropna()) wbml.out.kv('Average SMSEs', smse.mean()) # Name of output to plot. name = 'F2' # Plot the result. plt.figure(figsize=(12, 1.75)) wbml.plot.tex() p = list(train.columns).index(name) plt.plot(x, means[:, p], style='pred') plt.fill_between(x, lowers[:, p], uppers[:, p], style='pred') plt.scatter(x, y[:, p], style='train') plt.scatter(test[name].index, test[name], style='test') plt.xlabel('Time (second)') plt.xlim(0.4, 1) plt.ylabel(f'{name} (volt)') wbml.plot.tweak(legend=False) plt.tight_layout() plt.savefig(wd.file('eeg.pdf'))
scale = 5 n_u = 60 n_z = len(t) # Setup, fit, and save model. model = RGPCM( window=window, scale=scale, noise=0.05, n_u=n_u, n_z=n_z, t=t, ) if args.train: model.fit(t, y, iters=50_000, rate=2e-2, optimise_hypers=20_000) model.save(wd.file("model.pickle")) else: model.load(wd.file("model.pickle")) # Make and save predictions. if args.predict: posterior = model.condition(t, y) pred_f = (t, ) + posterior.predict(t) pred_psd = posterior.predict_psd() pred_psd = ( pred_psd.x, pred_psd.mean, pred_psd.err_95_lower, pred_psd.err_95_upper, pred_psd.all_samples, )
plt.figure(figsize=(12, 3)) plt.subplot(1, 3, 1) for n in [100, 200, 300]: plt.plot(sorted(totals[n].keys()), get(totals[n], 0), "-o", label=f"$n={n}$") plt.xlim(0, 250) plt.xlabel("Number of latent processes $m$") plt.ylabel("Total time (s)") wbml.plot.tweak(legend=True, legend_loc="upper left") plt.subplot(1, 3, 2) for n in [100, 200, 300]: plt.plot(sorted(hs[n].keys()), get(hs[n], 0) * 1e3, "-o", label=f"$n={n}$") plt.xlim(0, 250) plt.xlabel("Number of latent processes $m$") plt.ylabel("Time spent on basis (ms)") wbml.plot.tweak(legend=True, legend_loc="upper left") plt.subplot(1, 3, 3) for n in [100, 200, 300]: plt.plot(sorted(percs[n].keys()), get(percs[n], 0), "-o", label=f"$n={n}$") plt.xlim(0, 250) plt.xlabel("Number of latent processes $m$") plt.ylabel("Time spent on basis (\\%)") wbml.plot.tweak(legend=True, legend_loc="upper left") plt.savefig(wd.file("timing_h.pdf")) wbml.plot.pdfcrop(wd.file("timing_h.pdf")) plt.show()
markov=0, normalise_y=False) igp.fit(x_obs, y_obs) igp_means, igp_lowers, igp_uppers = \ igp.predict(x, num_samples=100, credible_bounds=True, latent=True) # Plot the result. plt.figure(figsize=(15, 3)) for i in range(3): plt.subplot(1, 3, i + 1) # Plot observations. plt.scatter(x_obs, y_obs[:, i], label='Observations', style='train') plt.plot(x, f[:, i], label='Truth', style='test') # Plot GPAR. plt.plot(x, means[:, i], label='GPAR', style='pred') plt.fill_between(x, lowers[:, i], uppers[:, i], style='pred') # Plot independent GPs. plt.plot(x, igp_means[:, i], label='IGP', style='pred2') plt.fill_between(x, igp_lowers[:, i], igp_uppers[:, i], style='pred2') plt.xlabel('$t$') plt.ylabel(f'$y_{i + 1}$') wbml.plot.tweak(legend=i == 2) plt.tight_layout() plt.savefig(wd.file('synthetic.pdf'))
igp.fit(x_obs, y_obs) igp_means, igp_lowers, igp_uppers = igp.predict(x, num_samples=200, credible_bounds=True, latent=True) # Plot the result. plt.figure(figsize=(15, 3)) for i in range(3): plt.subplot(1, 3, i + 1) # Plot observations. plt.scatter(x_obs, y_obs[:, i], label="Observations", style="train") plt.plot(x, f[:, i], label="Truth", style="test") # Plot GPAR. plt.plot(x, means[:, i], label="GPAR", style="pred") plt.fill_between(x, lowers[:, i], uppers[:, i], style="pred") # Plot independent GPs. plt.plot(x, igp_means[:, i], label="IGP", style="pred2") plt.fill_between(x, igp_lowers[:, i], igp_uppers[:, i], style="pred2") plt.xlabel("$t$") plt.ylabel(f"$y_{i + 1}$") wbml.plot.tweak(legend=i == 2) plt.tight_layout() plt.savefig(wd.file("synthetic.pdf"))
]: with wbml.out.Section(name): model = model.condition(torch.tensor(x), torch.tensor(y_norm)) x_test = np.array(test.index) y_test = np.array(test.reindex(train.columns, axis=1)) logprob = model.logpdf(torch.tensor(x_test), torch.tensor(normaliser.normalise(y_test))) logdet = normaliser.normalise_logdet(y_test) pplp = logprob + logdet wbml.out.kv("PPLP", pplp / B.length(y_test)) # Name of output to plot. name = "F2" # Plot the result. plt.figure(figsize=(12, 1.75)) wbml.plot.tex() p = list(train.columns).index(name) plt.plot(x, means[:, p], style="pred") plt.fill_between(x, lowers[:, p], uppers[:, p], style="pred") plt.scatter(x, y[:, p], style="train") plt.scatter(test[name].index, test[name], style="test") plt.xlabel("Time (second)") plt.xlim(0.4, 1) plt.ylabel(f"{name} (volt)") wbml.plot.tweak(legend=False) plt.tight_layout() plt.savefig(wd.file("eeg.pdf"))
scale=scale, noise=0.05, n_u=n_u, n_z=n_z, t=t, ) for Model in [GPCM, RGPCM, CGPCM] ] if args.train: for model in models: # The year 2014 is numerically iffy, so we train with a lower # learning rate to prevent the loss from NaNing out. if args.year == 2014: model.fit(t_train, y_train, rate=2e-2, iters=20_000) else: model.fit(t_train, y_train, iters=20_000) model.save(wd.file(model.name.lower(), "model.pickle")) else: for model in models: model.load(wd.file(model.name.lower(), "model.pickle")) # Make and save predictions. if args.predict: for model in models: # Perform predictions. posterior = model.condition(t_train, y_train) pred_f = (t_pred, ) + normaliser.untransform(posterior.predict(t_pred)) pred_f_test = (t_test, ) + normaliser.untransform( posterior.predict(t_test)) pred_k = posterior.predict_kernel() # Carefully untransform kernel prediction. pred_k = (
plt.subplot(3, 4, 3 + 4 * i) plt.plot(t, np.stack(ks).T, lw=1) plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black") plt.title("Kernel") if i == 2: plt.xlabel("Lag (s)") plt.xlim(0, 6) tweak(legend=False) # Estimate PSD. freqs, psds = zip(*[estimate_psd(t, k, db=True) for k in ks]) freqs = freqs[0] psds = np.stack(psds).T plt.subplot(3, 4, 4 + 4 * i) plt.title("PSD (dB)") inds = np.arange(int(len(freqs) / 2)) inds = inds[freqs[inds] <= 2] plt.plot(freqs[inds], psds[inds, :], lw=1) if i == 2: plt.xlabel("Frequency (Hz)") plt.xlim(0, 2) plt.ylim(-40, 10) tweak(legend=False) plt.tight_layout() plt.savefig(wd.file("sample.pdf")) pdfcrop(wd.file("sample.pdf")) plt.show()
t_s, elbo_s = tracker_s.get_xy(start=0) # Double check that there isn't a huge delay between the first two times e.g. due to # JIT compilation. assert t_mf[1] < 1 assert t_cmf[1] < 1 assert t_ca[1] < 1 assert t_s[1] < 1 plt.figure(figsize=(5, 4)) plt.axhline(y=gp_logpdf, ls="--", c="black", lw=1, label="GP") plt.plot( # These times should line up exactly, but they might not due to natural variability, # in runtimes. Force them to line up exactly by scaling the times. t_s / max(t_s) * max(t_ca), elbo_s, label="Structured", ) plt.plot(t_ca, elbo_ca, label="CA") plt.plot(t_cmf, np.maximum.accumulate(elbo_cmf), label="Collapsed MF") plt.plot(t_mf, np.maximum.accumulate(elbo_mf), label="MF") plt.xlabel("Time (s)") plt.ylabel("ELBO") plt.ylim(-900, -550) # Round to the nearest five seconds. plt.xlim(0, 5 * (max(max(t_mf), max(t_cmf), max(t_s)) // 5 + 1)) tweak(legend_loc="lower right") plt.savefig(wd.file("elbos.pdf")) pdfcrop(wd.file("elbos.pdf")) plt.show()
) plt.plot(t, err_95_upper, style="pred", lw=1) plt.plot(t, err_95_lower, style="pred", lw=1) t, mean, var, err_95_lower, err_95_upper = psd_pred_mf inds = t <= 1 t = t[inds] mean = mean[inds] err_95_lower = err_95_lower[inds] err_95_upper = err_95_upper[inds] plt.plot(t, mean, label="Mean-field", style="pred2") plt.fill_between( t, err_95_lower, err_95_upper, style="pred2", ) plt.plot(t, err_95_upper, style="pred2", lw=1) plt.plot(t, err_95_lower, style="pred2", lw=1) plt.xlabel("Frequency (Hz)") plt.ylabel("Spectral density (dB)") plt.title("PSD") plt.xlim(0, 1) plt.ylim(-20, 10) tweak(legend=True) plt.savefig(wd.file("smk.pdf")) pdfcrop(wd.file("smk.pdf")) plt.show()
plt.plot( x_proj[-args.n_plot :], y_proj[i_lat][-args.n_plot :], style="train" ) plt.plot(x_proj[-args.n_plot :], mean[-args.n_plot :], style="pred") plt.fill_between( x_proj[-args.n_plot :], lower[-args.n_plot :], upper[-args.n_plot :], style="pred", ) wbml.plot.tweak(legend=False) plt.gca().tick_params(labelsize=10) plt.tight_layout() plt.savefig( wd.file(f"simulators_latents{suffix}.pdf"), format="pdf", bbox_inches="tight" ) # Plot predictions of all latent processes. plt.figure(figsize=(25, 8)) for i_r in range(10): for i_s in range(5): plt.subplot(5, 10, i_r + i_s * 10 + 1) if i_s == 0: plt.title(f"$i_r={i_r + 1}$", fontsize=12) if i_r == 0: plt.ylabel(f"$i_s={i_s + 1}$", fontsize=12) if i_s == 5: plt.xlabel("Day", fontsize=10) i_lat = i_r + i_s * m_r
# Extract test set and extended test set. d_test, d_test_ext = d_tests[i], d_tests[i + 2] x = list(map(date_to_day, d_test.index)) x_ext = list(map(date_to_day, d_test_ext.index)) # Plot prediction. y_i = list(d_train.columns).index(d_test.columns[0]) plt.plot(x_ext, mean[:, y_i], style='pred') plt.fill_between(x_ext, lowers[:, y_i], uppers[:, y_i], style='pred') # Plot data. plt.scatter(x_ext, d_test_ext, style='train', s=4) plt.scatter(x, d_test, style='test', s=4) # Finalise plot. plt.xlim(x_ext[0], x_ext[-1]) plt.ylim(10, 30) plt.yticks([15, 20, 25]) if i == 0: plt.title(lookup_size[d_size] + f' (SMSE: {smse:.3f})') if i == 1: plt.xlabel('Time (day)') if d_size == 0: plt.ylabel(f'{lookup_place[d_test.columns[0]]}\nTemp. (celsius)') wbml.plot.tweak(legend=False) plt.tight_layout() plt.savefig(wd.file('air_temp.pdf'))
plt.subplot(1, 6, 4 + i) def apply_to_psd(f): raw = 10**(psds / 10) return 10 * B.log(f(raw)) / B.log(10) for q in [1, 5, 10, 20, 30, 40]: plt.fill_between( freqs, apply_to_psd(lambda x: B.quantile(x, q / 100, axis=1)), apply_to_psd(lambda x: B.quantile(x, 1 - q / 100, axis=1)), facecolor="tab:blue", alpha=0.2, ) # Careful: take the mean in PSD space! plt.plot( freqs, apply_to_psd(lambda x: B.mean(x, axis=1)), c="black", ) plt.title(model.name + " (PSD)") plt.xlabel("Frequency (Hz)") plt.xlim(-3, 3) plt.ylim(-30, 5) tweak(legend=False) plt.tight_layout() plt.savefig(wd.file("priors.pdf")) pdfcrop(wd.file("priors.pdf")) plt.show()
plt.fill_between( t, mean2 - 1.96 * np.sqrt(var2), mean2 + 1.96 * np.sqrt(var2), style="pred2", ) plt.plot(t, mean2 + 1.96 * np.sqrt(var2), style="pred2", lw=1) plt.plot(t, mean2 - 1.96 * np.sqrt(var2), style="pred2", lw=1) plt.yticks([0, 0.5, 1]) plt.xticks([0, 2, 4]) plt.xlim(0, 4) plt.ylim(-0.25, 1.25) if not first: plt.gca().set_yticklabels([]) tweak(legend=legend) plt.figure(figsize=(7.5, 3)) plt.subplot(1, 3, 1) plt.title("GPCM on EQ") plot_kernel_predictions("gpcm", "eq", legend=False, first=True) plt.subplot(1, 3, 2) plt.title("CGPCM on CEQ") plot_kernel_predictions("cgpcm", "ceq-1", legend=False) plt.subplot(1, 3, 3) plt.title("RGPCM on Matern–$\\frac{1}{2}$") plot_kernel_predictions("rgpcm", "matern12") plt.savefig(wd.file("comparison.pdf")) pdfcrop(wd.file("comparison.pdf")) plt.show()
plt.text(243, -1.25, "$m=p$", horizontalalignment="right", verticalalignment="center") # plt.gca().set_xscale('log') plt.plot([0, 300], [igp_pplp, igp_pplp], "-", lw=1.5, c="tab:orange", label="Independent GPs") wbml.plot.tweak(legend=True, legend_loc="center") plt.xlim(0, 250) plt.xlabel("Number of latent processes $m$") plt.ylabel("PPLP of held-out data") plt.savefig(wd.file("temperature_pplp.pdf")) wbml.plot.pdfcrop(wd.file("temperature_pplp.pdf")) plt.figure(figsize=(5.5, 3)) plt.axvline(x=247, ymin=0, ymax=1, ls="--", c="black", lw=1) plt.plot(ms, oilmm_rmses, "o-", lw=1.5, c="tab:blue", label="OILMM") plt.text(243, 2.075, "$m=p$", horizontalalignment="right", verticalalignment="center") # plt.gca().set_xscale('log') plt.plot([0, 300], [igp_rmse, igp_rmse], "-", lw=1.5, c="tab:orange",
f, y = gp_f.measure.sample(gp_f(t), gp_y(t)) f, y = B.flatten(f), B.flatten(y) wd.save( { "t": t, "f": f, "k": B.flatten(kernel(t_k, 0)), "y": y, "true_logpdf": gp_y(t).logpdf(y), }, slugify(str(kernel)), "data.pickle", ) for scheme in ["mean-field", "structured"]: model = model_constructor(scheme) prefix = (slugify(str(kernel)), scheme, slugify(model.name)) # Fit model and predict function and kernel. model.fit(t, y, iters=10_000) elbo = model.elbo(t, y) posterior = model.condition(t, y) f_pred = posterior.predict(t) k_pred = posterior.predict_kernel(t_k) # Save stuff. model.save(wd.file(*prefix, "model.pickle")) wd.save(elbo, *prefix, "elbo.pickle") wd.save((t,) + f_pred, *prefix, "f_pred.pickle") wd.save((k_pred.x, k_pred.mean, k_pred.var), *prefix, "k_pred.pickle")
# Report average SMSE. wbml.out.kv("SMSEs", smse.dropna()) wbml.out.kv("Average SMSE", smse.mean()) # Compute PPLP. x_test = np.array(test.index) y_test = np.array(test.reindex(train.columns, axis=1)) logprob = model.logpdf(torch.tensor(x_test), torch.tensor(normaliser.normalise(y_test))) logdet = normaliser.normalise_logdet(y_test) pplp = logprob + logdet wbml.out.kv("PPLP", pplp / B.length(y_test)) # Plot the result. plt.figure(figsize=(12, 2)) wbml.plot.tex() for i, name in enumerate(test.columns): p = list(train.columns).index(name) # Index of output. plt.subplot(1, 3, i + 1) plt.plot(x, means[:, p], style="pred") plt.fill_between(x, lowers[:, p], uppers[:, p], style="pred") plt.scatter(x, y[:, p], style="train") plt.scatter(test[name].index, test[name], style="test") plt.xlabel("Time (year)") plt.ylabel(name) wbml.plot.tweak(legend=False) plt.tight_layout() plt.savefig(wd.file("exchange.pdf"))
# Extract test set and extended test set. d_test, d_test_ext = d_tests[i], d_tests[i + 2] x = list(map(date_to_day, d_test.index)) x_ext = list(map(date_to_day, d_test_ext.index)) # Plot prediction. y_i = list(d_train.columns).index(d_test.columns[0]) plt.plot(x_ext, mean[:, y_i], style="pred") plt.fill_between(x_ext, lowers[:, y_i], uppers[:, y_i], style="pred") # Plot data. plt.scatter(x_ext, d_test_ext, style="train", s=4) plt.scatter(x, d_test, style="test", s=4) # Finalise plot. plt.xlim(x_ext[0], x_ext[-1]) plt.ylim(10, 30) plt.yticks([15, 20, 25]) if i == 0: plt.title(lookup_size[d_size] + f" (SMSE: {smse:.3f})") if i == 1: plt.xlabel("Time (day)") if d_size == 0: plt.ylabel(f"{lookup_place[d_test.columns[0]]}\nTemp. (celsius)") wbml.plot.tweak(legend=False) plt.tight_layout() plt.savefig(wd.file("air_temp.pdf"))