def setup(name): """Setup an experiment. Args: name (str): Name of the experiment. Returns: tuple[:class:`argparse.Namespace`, :class:`wbml.experiment.WorkingDirectory`]: Tuple containing the parsed arguments and the working directory. """ # Parse arguments. parser = argparse.ArgumentParser() parser.add_argument("path", nargs="*") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--iters", type=int) parser.add_argument("--scheme", type=str, default="structured") parser.add_argument("--load", action="store_true") parser.add_argument("--fix-hypers", action="store_true") parser.add_argument( "--model", choices=["gpcm", "gprv", "cgpcm"], type=str, default=["gpcm", "gprv", "cgpcm"], nargs="+", ) args = parser.parse_args() # Setup working directory. wd = WorkingDirectory("_experiments", name, *args.path, seed=args.seed) return args, wd
import pandas as pd import wbml.metric import wbml.out from wbml.data.jura import load from wbml.experiment import WorkingDirectory from gpar import GPARRegressor, log_transform def inputs(df): return df.reset_index()[["x", "y"]].to_numpy() if __name__ == "__main__": wbml.out.report_time = True wd = WorkingDirectory("_experiments", "jura") train, test = load() # Fit and predict GPAR. model = GPARRegressor( scale=10.0, linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, transform_y=log_transform, )
import matplotlib.pyplot as plt import numpy as np from gpar.regression import GPARRegressor from wbml.experiment import WorkingDirectory import wbml.plot if __name__ == '__main__': wd = WorkingDirectory('_experiments', 'synthetic') # Create toy data set. n = 200 x = np.linspace(0, 1, n) noise = 0.1 # Draw functions depending on each other in complicated ways. f1 = -np.sin(10 * np.pi * (x + 1)) / (2 * x + 1) - x**4 f2 = np.cos(f1)**2 + np.sin(3 * x) f3 = f2 * f1**2 + 3 * x f = np.stack((f1, f2, f3), axis=0).T # Add noise and subsample. y = f + noise * np.random.randn(n, 3) x_obs, y_obs = x[::8], y[::8] # Fit and predict GPAR. model = GPARRegressor(scale=0.1, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=0.1, noise=0.1,
from datetime import datetime, timedelta import lab as B import numpy as np import wbml.out as out from probmods import Normaliser from wbml.data.vix import load from wbml.experiment import WorkingDirectory from gpcm import GPCM, CGPCM, RGPCM # Setup script. out.report_time = True B.epsilon = 1e-8 wd = WorkingDirectory("_experiments", f"vix_forecast") # Setup experiment. data = load() def first_monday(year): """Get the first Monday of a year.""" dt = datetime(year, 1, 1) while dt.weekday() != 0: dt += timedelta(days=1) return dt def get_data(lower, upper): """Get data for a certain time range.""" df = data[(data.index >= lower) & (data.index < upper)]
import lab as B import matplotlib.pyplot as plt from wbml.experiment import WorkingDirectory from wbml.plot import tex, tweak, pdfcrop from gpcm import GPCM, CGPCM, RGPCM # Parse arguments. parser = argparse.ArgumentParser() parser.add_argument("--train", action="store_true") args = parser.parse_args() # Setup script. B.epsilon = 1e-10 tex() wd = WorkingDirectory("_experiments", "priors", seed=0) # Construct models. models = [ GPCM(window=2, scale=0.5, n_u=30, t=(0, 10)), CGPCM(window=2, scale=0.5, n_u=30, t=(0, 10)), RGPCM(window=2, scale=0.5, n_u=30, t=(0, 10)), ] # Instantiate models. models = [model() for model in models] def _extract_samples(quantities): x = quantities.x samples = quantities.all_samples
import matplotlib.pyplot as plt import numpy as np import pandas as pd import wbml.metric import wbml.out import wbml.plot from lab import B from wbml.data.eeg import load from wbml.experiment import WorkingDirectory from gpar import GPARRegressor if __name__ == '__main__': B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'eeg') _, train, test = load() x = np.array(train.index) y = np.array(train) # Fit and predict GPAR. model = GPARRegressor(scale=0.02, linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.01, impute=True, replace=False, normalise_y=True)
import lab as B import matplotlib.pyplot as plt import numpy as np import wbml.out as out from scipy.signal import periodogram from wbml.data.vix import load from wbml.experiment import WorkingDirectory from wbml.plot import tweak, pdfcrop, tex from gpcm import RGPCM # Setup script. out.report_time = True B.epsilon = 1e-6 tex() wd = WorkingDirectory("_experiments", "vix_analyse") # Parse arguments. parser = argparse.ArgumentParser() parser.add_argument("--train", action="store_true") parser.add_argument("--predict", action="store_true") args = parser.parse_args() # Load and process data. data = load() lower = datetime.datetime(2000, 1, 1) upper = datetime.datetime(2001, 1, 1) data = data[(data.index >= lower) & (data.index < upper)] # Convert to days since start. t = np.array([(ti - lower).days for ti in data.index], dtype=float) y = np.log(np.array(data.open))
def objective_vectorised(params, *args): vs_copy = vs.copy() vs_copy.set_latent_vector(params) return objective(vs_copy, *args) def objective_wrapped(vs_, *args): return objective_vectorised(vs_.get_latent_vector(), *args) return objective_wrapped # Setup script. out.report_time = True B.epsilon = 1e-8 tex() wd = WorkingDirectory("_experiments", "compare_inference") # Setup experiment. noise = 0.5 t = B.linspace(0, 20, 500) # Setup GPCM models. window = 2 scale = 1 n_u = 40 n_z = 40 # Sample data. kernel = EQ() y = B.flatten(GP(kernel)(t, noise).sample()) gp_logpdf = GP(kernel)(t, noise).logpdf(y)
import pandas as pd import torch import wbml.plot import wbml.metric from matrix import Dense, Diagonal from stheno import EQ from varz import Vars from varz.torch import minimise_l_bfgs_b from wbml.data.eeg import load from wbml.experiment import WorkingDirectory from oilmm import OILMM, Normaliser, ILMMPP if __name__ == "__main__": wbml.out.report_time = True wd = WorkingDirectory("_experiments", "eeg") _, train, test = load() x = np.array(train.index) y = np.array(train) # Normalise data. normaliser = Normaliser(y) y_norm = normaliser.normalise(y) p = B.shape(y)[1] m = 3 vs = Vars(torch.float64) def construct_model(vs):
import lab as B import matplotlib.pyplot as plt import wbml.metric as metric import wbml.out as out from stheno import EQ, GP from wbml.experiment import WorkingDirectory from wbml.plot import tweak, pdfcrop, tex from gpcm import GPCM from gpcm.util import estimate_psd # Setup script. out.report_time = True B.epsilon = 1e-8 tex() wd = WorkingDirectory("_experiments", "smk") # Parse arguments. parser = argparse.ArgumentParser() parser.add_argument("--train", action="store_true") args = parser.parse_args() # Setup experiment. noise = 0.1 t = B.linspace(0, 40, 200) t_k = B.linspace(0, 4, 200) # Setup GPCM models. window = 2 scale = 0.25 n_u = 80
from oilmm import OILMM, Normaliser from stheno import Matern52 from varz import Vars from varz.torch import minimise_l_bfgs_b from wbml.data.cmip5 import load from wbml.experiment import WorkingDirectory if __name__ == "__main__": # Parse arguments of script. parser = argparse.ArgumentParser() parser.add_argument("-m", type=int, default=13 * 19) args = parser.parse_args() B.epsilon = 1e-6 wbml.out.report_time = True wd = WorkingDirectory("_experiments", f"temperature_{args.m}") loc, temp, _ = load() # Smooth and subsample temperature data. temp = temp.rolling(window=31, center=True, min_periods=1, win_type="hamming") temp = temp.mean().iloc[::31, :] # Create train and test splits x = np.array([(day - temp.index[0]).days for day in temp.index]) y = np.array(temp) # Divide into training and test set.
parser.add_argument( "-ms", type=int, default=5, help="Number of latent processes for simulators." ) parser.add_argument( "--separable", action="store_true", help="Use a separable model." ) args = parser.parse_args() # Determine paths to write things to. if args.separable: suffix = "_separable" else: suffix = "" wd = WorkingDirectory( "_experiments", "simulators", subtle=True, log=f"log_process{suffix}.txt" ) results = wd.load(f"results_mr{args.mr}_ms{args.ms}{suffix}.pickle") # Give overview of things that have been stored. wbml.out.kv("Results", ", ".join(results.keys())) wbml.out.kv("Parameters", ", ".join(results["learned_parameters"].keys())) # Print learned scales. scales = results["learned_parameters"]["space/scales"] wbml.out.kv("Latitude scale", scales[0]) wbml.out.kv("Longitude scale", scales[1]) # Extract everything from the dictionary of results. m = results["m"]
import numpy as np import wbml.metric as metric import wbml.out as out from scipy.stats import ttest_rel from wbml.experiment import WorkingDirectory # Setup script. wd = WorkingDirectory("_experiments", "crude_oil_aggregate") # Load all experiments and compute metrics. names = ["GPCM", "CGPCM", "RGPCM"] mlls = {name: [] for name in names} rmses = {name: [] for name in names} for year in range(2012, 2017 + 1): wd_results = WorkingDirectory("_experiments", "crude_oil", str(year), observe=True) t, y = wd_results.load("data.pickle")["test"] for name in names: _, mean, var = wd_results.load(name.lower(), "pred_f_test.pickle") mlls[name].append(metric.mll(mean, var, y)) rmses[name].append(metric.rmse(mean, y)) # Print aggregate results. for name in names: with out.Section(name): out.kv("MLL", np.mean(mlls[name])) out.kv("MLL (std)", np.std(mlls[name]) / len(mlls[name]) ** 0.5) out.kv("RMSE", np.mean(rmses[name])) out.kv("RMSE (std)", np.std(rmses[name]) / len(rmses[name]) ** 0.5) # Compare results. for name1, name2 in [("RGPCM", "CGPCM"), ("RGPCM", "GPCM"), ("CGPCM", "GPCM")]:
import numpy as np import torch import wbml.plot from stheno import Matern52 from varz import Vars from varz.torch import minimise_l_bfgs_b from wbml.data.cmip5 import load from wbml.experiment import WorkingDirectory from oilmm import IGP, Normaliser if __name__ == "__main__": B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory("_experiments", "temperature_igp") loc, temp, _ = load() # Smooth and subsample temperature data. temp = temp.rolling(window=31, center=True, min_periods=1, win_type="hamming") temp = temp.mean().iloc[::31, :] # Create train and test splits x = np.array([(day - temp.index[0]).days for day in temp.index]) y = np.array(temp) # Divide into training and test set.
default=5, help="Number of latent processes for simulators.") parser.add_argument("--separable", action="store_true", help="Use a separable model.") args = parser.parse_args() # Determine suffix. if args.separable: suffix = "_separable" else: suffix = "" B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory("_experiments", "simulators", log=f"log{suffix}.txt") # Load data. loc, temp, sims = load() sims = {k: v for k, v in list(sims.items())} x_data = np.array([(day - temp.index[0]).days for day in temp.index[:args.n]]) y_data = np.concatenate([sim.to_numpy()[:args.n] for sim in sims.values()], axis=1) wbml.out.out("Data loaded") # Normalise training data. normaliser = Normaliser(y_data) y_data = normaliser.normalise(y_data) # Determine initialisation of spatial length scales.
from lab import B from wbml.data.air_temp import load as load_temp from wbml.experiment import WorkingDirectory from gpar import GPARRegressor def convert_index(df): index = df.index - d_all.index[0] return np.array([td.total_seconds() / 3600 / 24 for td in index]) if __name__ == "__main__": B.epsilon = 1e-6 wbml.out.report_time = True wd = WorkingDirectory("_experiments", "air_temp") # Load data. d_size = 0 if len(sys.argv) < 2 else int(sys.argv[1]) d_all, d_train, d_tests = load_temp()[d_size] # Determine the number of inducing points. n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size] # Place inducing points evenly spaced. x = convert_index(d_all) x_ind = np.linspace(x.min(), x.max(), n_ind) # Fit and predict GPAR. NOTE: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, # to make the results a little more drastic. model = GPARRegressor(
import lab as B import wbml.out as out from slugify import slugify from stheno import EQ, CEQ, Exp, GP, Delta from wbml.experiment import WorkingDirectory from gpcm import GPCM, CGPCM, RGPCM # Setup script. out.report_time = True B.epsilon = 1e-8 wd = WorkingDirectory("_experiments", "comparison") # Setup experiment. noise = 1.0 t = B.linspace(0, 40, 400) t_k = B.linspace(0, 4, 200) # Setup GPCM models. window = 2 scale = 0.5 n_u = 30 n_z = 80 for kernel, model_constructor in [ ( EQ(), lambda scheme: GPCM( scheme=scheme, window=window, scale=scale,
# Parse arguments. parser = argparse.ArgumentParser() parser.add_argument("--train", action="store_true") parser.add_argument("--predict", action="store_true") parser.add_argument("--year", type=int, default=2013) args = parser.parse_args() # Setup experiment. out.report_time = True # Year 2014 needs extra stability. if args.year == 2014: B.epsilon = 1e-6 else: B.epsilon = 1e-8 tex() wd = WorkingDirectory("_experiments", "crude_oil", str(args.year)) # Load and process data. data = load() lower = datetime(args.year, 1, 1) upper = datetime(args.year + 1, 1, 1) data = data[(lower <= data.index) & (data.index < upper)] t = np.array([(ti - lower).days for ti in data.index], dtype=float) y = np.array(data.open) t_pred = B.linspace(min(t), max(t), 500) # Split data. test_inds = np.empty(t.shape, dtype=bool) test_inds.fill(False) for lower, upper in [( datetime(args.year, 1, 1) + i * timedelta(weeks=1),
from wbml.parser import Parser, Whitespace, Literal, Float, Integer import wbml.plot import matplotlib.pyplot as plt from wbml.experiment import WorkingDirectory import numpy as np wd = WorkingDirectory("_experiments", "timing_parse") parser = Parser("_experiments/timing/log.txt") # Skip header. for _ in range(10): parser.next_line() totals = {n: {} for n in [100, 200, 300]} hs = {n: {} for n in [100, 200, 300]} percs = {n: {} for n in [100, 200, 300]} while True: try: parser.find_line("n:") n = parser.parse(Literal("n:"), Whitespace(), Integer()) parser.find_line("m:") m = parser.parse(Literal("m:"), Whitespace(), Integer()) # Parse total time. parser.find_line("Total:") parser.find_line("Mean:") total_mean = parser.parse(Whitespace(), Literal("Mean:"), Whitespace(), Float()) parser.find_line("Error:") total_error = parser.parse(
import pickle import numpy as np import wbml.metric import wbml.out from wbml.experiment import WorkingDirectory from gpar import GPARRegressor if __name__ == "__main__": wbml.out.report_time = True wd = WorkingDirectory("_experiments", "ml") # Load data. with open("examples/paper/ml_data/data.pickle", "rb") as f: results = pickle.load(f, encoding="latin1") # Generate inputs and outputs. output_indices = [0, 5, 10, 15, 20] params = results.keys() x = np.array([list(p) for p in params]) y = np.array( [np.take(results[p]["val_loss"], output_indices) for p in params]) # Record number of outputs. num_outputs = len(output_indices) # Filter extreme data points to reduce noise. max_error_at_0 = 5 min_log_learning_rate = -10 keep = np.logical_and(x[:, 3] > min_log_learning_rate,
import wbml.out from lab import B from wbml.data.jura import load from wbml.experiment import WorkingDirectory from gpar import GPARRegressor, log_transform def inputs(df): return df.reset_index()[['x', 'y']].to_numpy() if __name__ == '__main__': B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'jura') train, test = load() # Fit and predict GPAR. model = GPARRegressor(scale=10., linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, transform_y=log_transform) model.fit(inputs(train), train.to_numpy(), fix=False) means = model.predict(inputs(test), num_samples=200, latent=True)
import time import lab.torch as B import numpy as np import torch import wbml.plot from matrix import Dense, Diagonal from oilmm import OILMM from stheno import Matern52 from varz import Vars from wbml.data.cmip5 import load from wbml.experiment import WorkingDirectory if __name__ == "__main__": B.epsilon = 1e-8 wd = WorkingDirectory("_experiments", "timing") loc, temp, _ = load() # Smooth and subsample temperature data. temp = temp.rolling(window=31, center=True, min_periods=1, win_type="hamming") temp = temp.mean().iloc[::31, :] x = np.array([(day - temp.index[0]).days for day in temp.index]) y = np.array(temp) p = B.shape(y)[1]
from stheno import GP from wbml.experiment import WorkingDirectory from wbml.plot import tex, tweak, pdfcrop from gpcm import CGPCM from gpcm.util import closest_psd # Parse arguments. parser = argparse.ArgumentParser() parser.add_argument("--train", action="store_true") args = parser.parse_args() # Setup script. B.epsilon = 1e-10 tex() wd = WorkingDirectory("_experiments", "sample_interpolate", seed=14) def sample(model, t, noise_f): """Sample from a model. Args: model (:class:`gpcm.model.AbstractGPCM`): Model to sample from. t (vector): Time points to sample at. noise_f (vector): Noise for the sample of the function. Should have the same size as `t`. Returns: tuple[vector, ...]: Tuple containing kernel samples, filter samples, and function samples. """
import numpy as np import scipy.stats as st import wbml.metric as metric import wbml.out as out from wbml.experiment import WorkingDirectory # Setup script. wd = WorkingDirectory("_experiments", "vix_forecast_process") wd_results = WorkingDirectory("_experiments", "vix_forecast", observe=True) def compute_metrics(model, summarise=True): """Compute metrics. Args: model (str): Name of the model folder. summarise (bool, optional): Summarise the metrics rather than given the data back. Defaults to `True`. Returns: union[None, tuple[:class:`np.array`, :class:`np.array`]]: The metrics if `summarise` is `False`. Otherwise nothing. """ rmses, mlls = [], [] preds = wd_results.load(model, "preds.pickle") for (y, mean, var) in preds: rmses.append(metric.rmse(mean, y)) mlls.append(metric.mll(mean, var, y)) if summarise: with out.Section(model.upper()): for name, values in [("MLL", mlls), ("RMSE", rmses)]:
import pickle import numpy as np import wbml.metric import wbml.out from lab import B from wbml.experiment import WorkingDirectory from gpar import GPARRegressor if __name__ == '__main__': wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'ml') B.epsilon = 1e-8 # Load data. with open('examples/paper/ml_data/data.pickle', 'rb') as f: results = pickle.load(f, encoding='latin1') # Generate inputs and outputs. output_indices = [0, 5, 10, 15, 20] params = results.keys() x = np.array([list(p) for p in params]) y = np.array([np.take(results[p]['val_loss'], output_indices) for p in params]) # Record number of outputs. num_outputs = len(output_indices) # Filter extreme data points to reduce noise. max_error_at_0 = 5
from lab import B from wbml.data.air_temp import load as load_temp from wbml.experiment import WorkingDirectory from gpar import GPARRegressor def convert_index(df): index = df.index - d_all.index[0] return np.array([td.total_seconds() / 3600 / 24 for td in index]) if __name__ == '__main__': B.epsilon = 1e-6 wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'air_temp') # Load data. d_size = 0 if len(sys.argv) < 2 else int(sys.argv[1]) d_all, d_train, d_tests = load_temp()[d_size] # Determine the number of inducing points. n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size] # Place inducing points evenly spaced. x = convert_index(d_all) x_ind = np.linspace(x.min(), x.max(), n_ind) # Fit and predict GPAR. # Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the # results a little more drastic.
from wbml.parser import Parser, Whitespace, Literal, Float, SkipUntil import wbml.plot import matplotlib.pyplot as plt from wbml.experiment import WorkingDirectory wd = WorkingDirectory("_experiments", "temperature_parse") def parse(path): parser = Parser(path) parser.find_line("RMSE") rmse = parser.parse(SkipUntil("|"), Whitespace(), Literal("RMSE:"), Whitespace(), Float()) parser.find_line("PPLP") pplp = parser.parse(SkipUntil("|"), Whitespace(), Literal("PPLP:"), Whitespace(), Float()) return rmse, pplp ms = [1, 2, 5, 10, 15, 20, 25, 50, 75, 100, 125, 150, 175, 200, 225, 247] oilmm_rmses, oilmm_pplps = zip( *[parse(f"_experiments/temperature_{m}/log.txt") for m in ms]) igp_rmse, igp_pplp = parse("_experiments/temperature_igp/log.txt") wbml.plot.tex() plt.figure(figsize=(5.5, 3)) plt.axvline(x=247, ymin=0, ymax=1, ls="--", c="black", lw=1) plt.plot(ms, oilmm_pplps, "o-", lw=1.5, c="tab:blue", label="OILMM") plt.text(243,
import matplotlib.pyplot as plt import numpy as np import wbml.out as out from wbml.experiment import WorkingDirectory from wbml.metric import smll, rmse from wbml.plot import tex, tweak, pdfcrop # Setup script. tex() wd = WorkingDirectory("_experiments", "comparison_process") wd_results = WorkingDirectory("_experiments", "comparison", observe=True) def kernel_analysis(data, scheme, model, metric, until=4): """Analyse the prediction for a kernel.""" k = wd_results.load(data, "data.pickle")["k"] t, mean, var = wd_results.load(data, scheme, model, "k_pred.pickle") inds = t <= until if metric == "smll": return smll(mean[inds], var[inds], k[inds]) elif metric == "rmse": return rmse(mean[inds], k[inds]) else: raise ValueError(f'Bad metric "{metric}".') for model, kernel in [("gpcm", "eq"), ("cgpcm", "ceq-1"), ("rgpcm", "matern12")]: with out.Section(model.upper()): with out.Section("SMLL"): out.kv("MF", kernel_analysis(kernel, "mean-field", model, "smll"))
import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import wbml.plot from matrix import Dense from oilmm import ILMMPP, Normaliser from stheno import Matern12 from varz import Vars from varz.torch import minimise_l_bfgs_b from wbml.data.exchange import load from wbml.experiment import WorkingDirectory if __name__ == "__main__": wbml.out.report_time = True wd = WorkingDirectory("_experiments", "exchange_ilmm") B.epsilon = 1e-8 _, train, test = load() x = np.array(train.index) y = np.array(train) # Normalise data. normaliser = Normaliser(y) y_norm = normaliser.normalise(y) p = B.shape(y)[1] m = 3 vs = Vars(torch.float64)
import matplotlib.pyplot as plt import numpy as np from wbml.data.air_temp import load from wbml.experiment import WorkingDirectory import wbml.plot import wbml.metric import pandas as pd def date_to_day(dt): return dt.day + (dt.hour + (dt.minute + dt.second / 60) / 60) / 24 wd = WorkingDirectory('_experiments', 'air_temp', log='log_process.txt', subtle=True) # Load data. data = load() # Create lookups. lookup_place = {('temp', 'Chi'): 'Chimet', ('temp', 'Cam'): 'Cambermet'} lookup_size = {0: '10 Days', 1: '15 Days', 2: '1 Month'} # Plot the results. plt.figure(figsize=(15, 4)) for d_size in [0, 1, 2]: d_all, d_train, d_tests = data[d_size] # Load predictions.