t = np.linspace(0, 10, 300) noise_f = np.random.randn(len(t), 1) # Construct model. model = CGPCM(window=2, scale=1, n_u=10, t=t) # Instantiate model. models = model() # Perform sampling. if args.train: ks, us, fs = sample(model, t, noise_f) wd.save((ks, us, fs), "samples.pickle") else: ks, us, fs = wd.load("samples.pickle") # Plot. plt.figure(figsize=(15, 4)) for i, (k, u, f) in enumerate(zip(ks, us, fs)): plt.subplot(3, 5, 1 + i) plt.plot( B.concat(-t[::-1][:-1], t), B.concat(u[:-1] * 0, u), lw=1, ) if hasattr(model, "t_u"): plt.scatter(model.t_u, model.t_u * 0, s=5, marker="o", c="black") # plt.xlabel("Time (s)") if i == 0:
# Perform sampling. if args.train: ks = [ _extract_samples(model.predict_kernel(num_samples=20000)) for model in models ] psds = [ _extract_samples(model.predict_psd(num_samples=20000)) for model in models ] model_ks, model_psds = ks, psds wd.save((model_ks, model_psds), "samples.pickle") else: model_ks, model_psds = wd.load("samples.pickle") # Plot. plt.figure(figsize=(15, 2.5)) for i, (model, (x, ks)) in enumerate(zip(models, model_ks)): plt.subplot(1, 6, 1 + i) for q in [1, 5, 10, 20, 30, 40]: plt.fill_between( x, B.quantile(ks, q / 100, axis=1), B.quantile(ks, 1 - q / 100, axis=1), facecolor="tab:blue", alpha=0.2, ) plt.plot(x, B.mean(ks, axis=1), c="black")
pred_psd.err_95_lower + 20 * B.log(normaliser._scale), pred_psd.err_95_upper + 20 * B.log(normaliser._scale), ) # Save predictions. wd.save(pred_f, model.name.lower(), "pred_f.pickle") wd.save(pred_f_test, model.name.lower(), "pred_f_test.pickle") wd.save(pred_k, model.name.lower(), "pred_k.pickle") wd.save(pred_psd, model.name.lower(), "pred_psd.pickle") # Load predictions. preds_f = {} preds_f_test = {} preds_k = {} preds_psd = {} for model in models: preds_f[model.name] = wd.load(model.name.lower(), "pred_f.pickle") preds_f_test[model.name] = wd.load(model.name.lower(), "pred_f_test.pickle") preds_k[model.name] = wd.load(model.name.lower(), "pred_k.pickle") preds_psd[model.name] = wd.load(model.name.lower(), "pred_psd.pickle") # Print performances. for name in ["GPCM", "CGPCM", "RGPCM"]: with out.Section(name): t, mean, var = preds_f_test[name] out.kv("RMSE", metric.rmse(mean, y_test)) out.kv("MLL", metric.mll(mean, var, y_test)) def plot_psd(name, y_label=True, style="pred", finish=True): """Plot prediction for the PSD."""
pred_f = (t, ) + posterior.predict(t) pred_psd = posterior.predict_psd() pred_psd = ( pred_psd.x, pred_psd.mean, pred_psd.err_95_lower, pred_psd.err_95_upper, pred_psd.all_samples, ) pred_k = posterior.predict_kernel() pred_k = (pred_k.x, pred_k.mean, pred_k.var) wd.save(pred_f, "pred_f.pickle") wd.save(pred_psd, "pred_psd.pickle") wd.save(pred_k, "pred_k.pickle") else: pred_f = wd.load("pred_f.pickle") pred_psd = wd.load("pred_psd.pickle") pred_k = wd.load("pred_k.pickle") # Unpack prediction for the PDF and cut off a frequency 0.5. freqs, mean, lower, upper, samps = pred_psd upper_freq = 0.5 samps = samps[freqs <= upper_freq, :] mean = mean[freqs <= upper_freq] lower = lower[freqs <= upper_freq] upper = upper[freqs <= upper_freq] freqs = freqs[freqs <= upper_freq] # Compute the spectrum of the excitation process. instance = model() spec_x = (2 * instance.lam) / (instance.lam**2 + (2 * B.pi * freqs)**2)
scheme="structured", window=window, scale=scale, noise=noise, n_u=n_u, n_z=n_z, t=t, ) model.fit(t, y, iters=30_000) k_pred_struc = extract(model.condition(t, y).predict_kernel(t_k)) psd_pred_struc = extract(model.condition(t, y).predict_psd()) wd.save((k_pred_mf, psd_pred_mf, k_pred_struc, psd_pred_struc), "preds.pickle") else: k_pred_mf, psd_pred_mf, k_pred_struc, psd_pred_struc = wd.load( "preds.pickle") # Report metrics. with out.Section("Structured"): t, mean, var, _, _ = k_pred_struc inds = t <= 3 out.kv("MLL", metric.mll(mean[inds], var[inds], k[inds])) out.kv("RMSE", metric.rmse(mean[inds], k[inds])) with out.Section("Mean field"): t, mean, var, _, _ = k_pred_mf inds = t <= 3 out.kv("MLL", metric.mll(mean[inds], var[inds], k[inds])) out.kv("RMSE", metric.rmse(mean[inds], k[inds])) plt.figure(figsize=(7.5, 3.75))
parser.add_argument( "--separable", action="store_true", help="Use a separable model." ) args = parser.parse_args() # Determine paths to write things to. if args.separable: suffix = "_separable" else: suffix = "" wd = WorkingDirectory( "_experiments", "simulators", subtle=True, log=f"log_process{suffix}.txt" ) results = wd.load(f"results_mr{args.mr}_ms{args.ms}{suffix}.pickle") # Give overview of things that have been stored. wbml.out.kv("Results", ", ".join(results.keys())) wbml.out.kv("Parameters", ", ".join(results["learned_parameters"].keys())) # Print learned scales. scales = results["learned_parameters"]["space/scales"] wbml.out.kv("Latitude scale", scales[0]) wbml.out.kv("Longitude scale", scales[1]) # Extract everything from the dictionary of results. m = results["m"] p = results["p"] m_s = results["m_s"] m_r = results["m_r"]
import numpy as np import wbml.metric as metric import wbml.out as out from scipy.stats import ttest_rel from wbml.experiment import WorkingDirectory # Setup script. wd = WorkingDirectory("_experiments", "crude_oil_aggregate") # Load all experiments and compute metrics. names = ["GPCM", "CGPCM", "RGPCM"] mlls = {name: [] for name in names} rmses = {name: [] for name in names} for year in range(2012, 2017 + 1): wd_results = WorkingDirectory("_experiments", "crude_oil", str(year), observe=True) t, y = wd_results.load("data.pickle")["test"] for name in names: _, mean, var = wd_results.load(name.lower(), "pred_f_test.pickle") mlls[name].append(metric.mll(mean, var, y)) rmses[name].append(metric.rmse(mean, y)) # Print aggregate results. for name in names: with out.Section(name): out.kv("MLL", np.mean(mlls[name])) out.kv("MLL (std)", np.std(mlls[name]) / len(mlls[name]) ** 0.5) out.kv("RMSE", np.mean(rmses[name])) out.kv("RMSE (std)", np.std(rmses[name]) / len(rmses[name]) ** 0.5) # Compare results. for name1, name2 in [("RGPCM", "CGPCM"), ("RGPCM", "GPCM"), ("CGPCM", "GPCM")]:
# Load data. data = load() # Create lookups. lookup_place = {('temp', 'Chi'): 'Chimet', ('temp', 'Cam'): 'Cambermet'} lookup_size = {0: '10 Days', 1: '15 Days', 2: '1 Month'} # Plot the results. plt.figure(figsize=(15, 4)) for d_size in [0, 1, 2]: d_all, d_train, d_tests = data[d_size] # Load predictions. preds = wd.load(f'results{d_size}.pickle') # Compute SMSEs for the first two data sets; the others are the extended # ones. smses = [] for (mean, _, _), d_test in list(zip(preds, d_tests))[:2]: mean = pd.DataFrame(mean, index=d_test.index, columns=d_train.columns) smse = wbml.metric.smse(mean, d_test).mean() smses.append(smse) smse = np.mean(smses) # Construct plots. for i, (mean, lowers, uppers) in enumerate(preds[2:]): plt.subplot(2, 3, d_size + i * 3 + 1) # Extract test set and extended test set.
# Load data. data = load() # Create lookups. lookup_place = {("temp", "Chi"): "Chimet", ("temp", "Cam"): "Cambermet"} lookup_size = {0: "10 Days", 1: "15 Days", 2: "1 Month"} # Plot the results. plt.figure(figsize=(15, 4)) for d_size in [0, 1, 2]: d_all, d_train, d_tests = data[d_size] # Load predictions. preds = wd.load(f"results{d_size}.pickle") # Compute SMSEs for the first two data sets; the others are the extended # ones. smses = [] for (mean, _, _), d_test in list(zip(preds, d_tests))[:2]: mean = pd.DataFrame(mean, index=d_test.index, columns=d_train.columns) smse = wbml.metric.smse(mean, d_test).mean() smses.append(smse) smse = np.mean(smses) # Construct plots. for i, (mean, lowers, uppers) in enumerate(preds[2:]): plt.subplot(2, 3, d_size + i * 3 + 1) # Extract test set and extended test set.