# compact and the subsequent matrix operations will use sparse # matrices. We can perform Gaussian process regression on large # datasets with this choice of prior, provided that the lengthscale of # the prior is much less than the size of the domain (which is not # true for this demo) basis = spwen32 # define hyperparameters for the prior. Tune these parameters to get a # satisfactory interpolant. These can also be chosen with maximum # likelihood methods. prior_mean = 0.0 prior_var = 1.0 prior_lengthscale = 0.8 # this controls the sparsity # create the prior Gaussian process prior_gp = gpiso(basis, var=prior_var, eps=prior_lengthscale) # add a first order polynomial to the prior to make it suitable for # data with linear trends prior_gp += gppoly(1) # condition the prior on the observations, creating a new Gaussian # process for the posterior. posterior_gp = prior_gp.condition(xobs, uobs, dcov=np.diag(sobs**2)) # differentiate the posterior with respect to x derivative_gp = posterior_gp.differentiate((1, 0)) # evaluate the posterior and posterior derivative at the interpolation # points. calling the GaussianProcess instances will return their mean # and standard deviation at the provided points. post_mean, post_std = posterior_gp(xitp)
from rbf.basis import spwen12 import logging logging.basicConfig(level=logging.DEBUG) np.random.seed(1) # create synthetic data n = 10000 y = np.linspace(-20.0, 20.0, n) # observation points sigma = np.full(n, 0.5) d = np.exp(-0.3 * np.abs(y)) * np.sin(y) + np.random.normal(0.0, sigma) # evaluate the output at a subset of the observation points x = np.linspace(-20.0, 20.0, 1000) # interpolation points u_true = np.exp(-0.3 * np.abs(x)) * np.sin(x) # true signal # create a sparse GP gp = gpiso(spwen12, eps=4.0, var=1.0) # condition with the observations gpc = gp.condition(y[:, None], d, dcov=sp.diags(sigma**2)) # find the mean and std of the conditioned GP. Chunk size controls the # trade off between speed and memory consumption. It should be tuned # by the user. u, us = gpc(x[:, None], chunk_size=1000) fig, ax = plt.subplots() ax.plot(x, u_true, 'k-', label='true signal') ax.plot(y, d, 'k.', alpha=0.1, mec='none', label='observations') ax.plot(x, u, 'b-', label='post. mean') ax.fill_between(x, u - us, u + us, color='b', alpha=0.2,
def objective(x, t, d): '''objective function to be minimized''' gp = gpiso('exp', eps=x[0], var=x[1]) return -gp.log_likelihood(t,d)
import matplotlib.pyplot as plt import logging from rbf.gproc import gpiso, gppoly logging.basicConfig(level=logging.DEBUG) np.random.seed(1) y = np.linspace(-7.5, 7.5, 50) # obsevation points x = np.linspace(-7.5, 7.5, 1000) # interpolation points truth = np.exp(-0.3*np.abs(x))*np.sin(x) # true signal at interp. points # form synthetic data obs_sigma = np.full(50, 0.1) # noise standard deviation noise = np.random.normal(0.0, obs_sigma) noise[20], noise[25] = 2.0, 1.0 # add anomalously large noise obs_mu = np.exp(-0.3*np.abs(y))*np.sin(y) + noise # form prior Gaussian process prior = gpiso('se', eps=1.0, var=1.0) + gppoly(1) # find outliers which will be removed toss = prior.outliers(y[:, None], obs_mu, obs_sigma, tol=4.0) # condition with non-outliers post = prior.condition( y[~toss, None], obs_mu[~toss], dcov=np.diag(obs_sigma[~toss]**2) ) post_mu, post_sigma = post(x[:, None]) # plot the results fig, ax = plt.subplots(figsize=(6, 4)) ax.errorbar(y[~toss], obs_mu[~toss], obs_sigma[~toss], fmt='k.', capsize=0.0, label='inliers') ax.errorbar(y[toss], obs_mu[toss], obs_sigma[toss], fmt='r.', capsize=0.0, label='outliers') ax.plot(x, post_mu, 'b-', label='posterior mean') ax.fill_between(x, post_mu-post_sigma, post_mu+post_sigma,
Gaussian process based on the marginal likelihood. Optimization is performed in two ways, first with a grid search method and then with a downhill simplex method. ''' import numpy as np import matplotlib.pyplot as plt from scipy.optimize import fmin from rbf.gproc import gpiso np.random.seed(3) # True signal which we want to recover. This is a exponential function # with mean=0.0, variance=2.0, and time-scale=0.1. For graphical # purposes, we will only estimate the variance and time-scale. eps = 0.1 var = 2.0 gp = gpiso('exp', eps=eps, var=var) n = 500 # number of observations time = np.linspace(-5.0, 5.0, n)[:,None] # observation points data = gp.sample(time) # signal which we want to describe # find the optimal hyperparameter with a brute force grid search eps_search = 10**np.linspace(-2, 1, 30) var_search = 10**np.linspace(-2, 2, 30) log_likelihoods = np.zeros((30, 30)) for i, eps_test in enumerate(eps_search): for j, var_test in enumerate(var_search): gp = gpiso('exp', eps=eps_test, var=var_test) log_likelihoods[i, j] = gp.log_likelihood(time, data) # find the optimal hyperparameters with a positively constrained
from rbf.basis import get_r, get_eps, RBF from rbf.gproc import gpiso np.random.seed(1) period = 5.0 cls = 0.5 # characteristic length scale var = 1.0 # variance r = get_r() # get symbolic variables eps = get_eps() # create a symbolic expression of the periodic covariance function expr = exp(-sin(r * pi / period)**2 / eps**2) # define a periodic RBF using the symbolic expression basis = RBF(expr) # define a Gaussian process using the periodic RBF gp = gpiso(basis, eps=cls, var=var) t = np.linspace(-10, 10, 1000)[:, None] sample = gp.sample(t) # draw a sample mu, sigma = gp(t) # evaluate mean and std. dev. # plot the results fig, ax = plt.subplots(figsize=(6, 4)) ax.grid(True) ax.plot(t[:, 0], mu, 'b-', label='mean') ax.fill_between(t[:, 0], mu - sigma, mu + sigma, color='b', alpha=0.2, edgecolor='none', label='std. dev.')
''' This script demonstrates how to make a custom *GaussianProcess* by combining *GaussianProcess* instances. The resulting Gaussian process has two distinct length-scales. ''' import numpy as np import matplotlib.pyplot as plt from rbf.gproc import gpiso np.random.seed(1) dx = np.linspace(0.0, 5.0, 1000)[:, None] x = np.linspace(-5, 5.0, 1000)[:, None] gp_long = gpiso('se', eps=2.0, var=1.0) gp_short = gpiso('se', eps=0.25, var=0.5) gp = gp_long + gp_short # compute the autocovariances acov_long = gp_long.covariance(dx, [[0.0]]) acov_short = gp_short.covariance(dx, [[0.0]]) acov = gp.covariance(dx, [[0.0]]) # draw 3 samples sample = gp.sample(x) # mean and uncertainty of the new gp mean, sigma = gp(x) # plot the autocovariance functions fig, axs = plt.subplots(2, 1, figsize=(6, 6)) axs[0].plot(dx, acov_long, 'r--', label='long component') axs[0].plot(dx, acov_short, 'b--', label='short component') axs[0].plot(dx, acov, 'k-', label='sum') axs[0].set_xlabel('$\mathregular{\Delta x}$', fontsize=10) axs[0].set_ylabel('auto-covariance', fontsize=10) axs[0].legend(fontsize=10)
def negative_log_likelihood(params): log_eps, log_var = params gp = gpiso('se', eps=10**log_eps, var=10**log_var) out = -gp.log_likelihood(y[:, None], d, dcov=dcov) return out
u_true = np.exp(-0.3*np.abs(x))*np.sin(x) # observation noise covariance dsigma = np.full(25, 0.1) dcov = np.diag(dsigma**2) # noisy observations of the signal d = np.exp(-0.3*np.abs(y))*np.sin(y) + np.random.normal(0.0, dsigma) def negative_log_likelihood(params): log_eps, log_var = params gp = gpiso('se', eps=10**log_eps, var=10**log_var) out = -gp.log_likelihood(y[:, None], d, dcov=dcov) return out log_eps, log_var = minimize(negative_log_likelihood, [0.0, 0.0]).x # create a prior GaussianProcess using the most likely variance and lengthscale gp_prior = gpiso('se', eps=10**log_eps, var=10**log_var) # generate a sample of the prior sample_prior = gp_prior.sample(x[:, None]) # find the mean and standard deviation of the prior mean_prior, std_prior = gp_prior(x[:, None]) # condition the prior on the observations gp_post = gp_prior.condition(y[:, None], d, dcov=dcov) sample_post = gp_post.sample(x[:, None]) mean_post, std_post = gp_post(x[:, None]) ## Plotting ##################################################################### fig, axs = plt.subplots(2, 1, figsize=(6, 6)) ax = axs[0] ax.grid(ls=':') ax.tick_params(labelsize=10)