def test_features(): # Test that optimisation runs for a full-fledged GPAR. reg = GPARRegressor(replace=True, scale=1.0, per=True, per_period=1.0, per_decay=10.0, input_linear=True, input_linear_scale=0.1, linear=True, linear_scale=1.0, nonlinear=True, nonlinear_scale=1.0, rq=True, noise=0.1) x = np.stack([np.linspace(0, 10, 20), np.linspace(10, 20, 20)], axis=1) y = reg.sample(x, p=2) reg.fit(x, y, iters=10)
def test_logpdf_differentiable(): reg = GPARRegressor(replace=False, impute=False, linear=True, linear_scale=1., nonlinear=False, noise=1e-8, normalise_y=False) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2, latent=True) # Test that gradient calculation works. reg.vs.requires_grad(True) for var in reg.vs.get_vars(): assert var.grad is None reg.logpdf(torch.tensor(x), torch.tensor(y)).backward() for var in reg.vs.get_vars(): assert var.grad is not None
def test_logpdf(x, w): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor( replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10.0, noise=1e-2, normalise_y=False, ) y = reg.sample(x, w, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2) f1, e1 = gpar.layers[0]() f2, e2 = gpar.layers[1]() # Test computation under prior. x1 = x x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1) if w is not None: x1 = WeightedUnique(x1, w[:, 0]) x2 = WeightedUnique(x2, w[:, 1]) logpdf1 = (f1 + e1)(x1).logpdf(y[:, 0]) logpdf2 = (f2 + e2)(x2).logpdf(y[:, 1]) approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6) # Test computation under posterior. post1 = f1.measure | ((f1 + e1)(x1), y[:, 0]) post2 = f2.measure | ((f2 + e2)(x2), y[:, 1]) e1_post = GP(e1.mean, e1.kernel, measure=post1) e2_post = GP(e2.mean, e2.kernel, measure=post2) logpdf1 = (post1(f1) + e1_post)(x1).logpdf(y[:, 0]) logpdf2 = (post2(f2) + e2_post)(x2).logpdf(y[:, 1]) with pytest.raises(RuntimeError): reg.logpdf(x, y, w, posterior=True) reg.condition(x, y, w) approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6) # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan all_different( reg.logpdf(x, y, w, sample_missing=True), reg.logpdf(x, y, w, sample_missing=True), )
def test_logpdf(x, w): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor( replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10.0, noise=1e-2, normalise_y=False, ) y = reg.sample(x, w, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, B.shape(B.uprank(x))[1], 2) f1, noise1 = gpar.layers[0]() f2, noise2 = gpar.layers[1]() if w is not None: noise1 = noise1 / w[:, 0] noise2 = noise2 / w[:, 1] # Test computation under prior. x1 = x x2 = B.concat(B.uprank(x), y[:, 0:1], axis=1) logpdf1 = f1(x1, noise1).logpdf(y[:, 0]) logpdf2 = f2(x2, noise2).logpdf(y[:, 1]) approx(reg.logpdf(x, y, w), logpdf1 + logpdf2, atol=1e-6) # Test computation under posterior. f1_post = f1 | (f1(x1, noise1), y[:, 0]) f2_post = f2 | (f2(x2, noise2), y[:, 1]) logpdf1 = f1_post(x1, noise1).logpdf(y[:, 0]) logpdf2 = f2_post(x2, noise2).logpdf(y[:, 1]) with pytest.raises(RuntimeError): reg.logpdf(x, y, w, posterior=True) reg.condition(x, y, w) approx(reg.logpdf(x, y, w, posterior=True), logpdf1 + logpdf2, atol=1e-6) # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan all_different( reg.logpdf(x, y, w, sample_missing=True), reg.logpdf(x, y, w, sample_missing=True), )
def test_logpdf(): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor(replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10., noise=1e-4, normalise_y=False) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, 1, 2) f1, e1 = gpar.layers[0]() f2, e2 = gpar.layers[1]() # Test computation under prior. logpdf1 = (f1 + e1)(B.array(x)).logpdf(B.array(y[:, 0])) x_stack = np.concatenate([x[:, None], y[:, 0:1]], axis=1) logpdf2 = (f2 + e2)(B.array(x_stack)).logpdf(B.array(y[:, 1])) yield approx, reg.logpdf(x, y), logpdf1 + logpdf2, 6 # Test computation under posterior. e1_post = GP(e1.kernel, e1.mean, graph=e1.graph) e2_post = GP(e2.kernel, e2.mean, graph=e2.graph) f1_post = f1 | ((f1 + e1)(B.array(x)), B.array(y[:, 0])) f2_post = f2 | ((f2 + e2)(B.array(x_stack)), B.array(y[:, 1])) logpdf1 = (f1_post + e1_post)(B.array(x)).logpdf(B.array(y[:, 0])) logpdf2 = (f2_post + e2_post)(B.array(x_stack)).logpdf(B.array(y[:, 1])) yield raises, RuntimeError, lambda: reg.logpdf(x, y, posterior=True) reg.fit(x, y, iters=0) yield approx, reg.logpdf(x, y, posterior=True), logpdf1 + logpdf2, 6 # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan yield ge, \ np.abs(reg.logpdf(x, y, sample_missing=True) - reg.logpdf(x, y, sample_missing=True)), \ 1e-3
# Draw functions depending on each other in complicated ways. f1 = -np.sin(10 * np.pi * (x + 1)) / (2 * x + 1) - x**4 f2 = np.cos(f1)**2 + np.sin(3 * x) f3 = f2 * f1**2 + 3 * x f = np.stack((f1, f2, f3), axis=0).T # Add noise and subsample. y = f + noise * np.random.randn(n, 3) x_obs, y_obs = x[::8], y[::8] # Fit and predict GPAR. model = GPARRegressor(scale=0.1, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=0.1, noise=0.1, impute=True, replace=False, normalise_y=False) model.fit(x_obs, y_obs) means, lowers, uppers = \ model.predict(x, num_samples=100, credible_bounds=True, latent=True) # Fit and predict independent GPs: set markov=0. igp = GPARRegressor(scale=0.1, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=0.1, noise=0.1,
def test_sample_and_predict(): reg = GPARRegressor(replace=False, impute=False, linear=True, linear_scale=1., nonlinear=False, noise=1e-8, normalise_y=False) x = np.linspace(0, 5, 10) # Test checks. yield raises, ValueError, lambda: reg.sample(x) yield raises, RuntimeError, lambda: reg.sample(x, posterior=True) # Test that output is simplified correctly. yield isinstance, reg.sample(x, p=2), np.ndarray yield isinstance, reg.sample(x, p=2, num_samples=2), list # Test that it produces random samples. Not sure how to test correctness. yield ge, np.sum(np.abs(reg.sample(x, p=2) - reg.sample(x, p=2))), 1e-2 yield ge, np.sum( np.abs( reg.sample(x, p=2, latent=True) - reg.sample(x, p=2, latent=True))), 1e-3 # Test that mean of posterior samples are around the data. y = reg.sample(x, p=2) reg.fit(x, y, iters=0) yield approx, y, np.mean(reg.sample(x, posterior=True, num_samples=20), axis=0), 4 yield approx, y, np.mean(reg.sample(x, latent=True, posterior=True, num_samples=20), axis=0), 4 # Test that prediction is around the data. yield approx, y, reg.predict(x, num_samples=20), 4 yield approx, y, reg.predict(x, latent=True, num_samples=20), 4 # Test that prediction is confident. _, lowers, uppers = reg.predict(x, num_samples=10, credible_bounds=True) yield ok, np.less_equal(uppers - lowers, 1e-3).all()
def test_inducing_points_uprank(): reg = GPARRegressor(x_ind=np.linspace(0, 10, 20)) assert reg.x_ind is not None assert B.rank(reg.x_ind) == 2
def test_get_variables(): gpar = GPARRegressor() gpar.vs.get(init=1.0, name='variable') assert list(gpar.get_variables().items()) == [('variable', 1.0)]
args.experiment = 'synthetic' x1 = np.linspace(1, 5, 5) x2 = np.concatenate([ np.linspace(1, 9, 9), np.linspace(10, 98, 45), np.linspace(100, 980, 45) ]) xx1, xx2 = np.meshgrid(x1, x2) x = np.stack([np.ravel(xx1), np.ravel(xx2)], axis=1) model = GPARRegressor(scale=[2., .3], scale_tie=True, linear=True, linear_scale=10., linear_with_inputs=False, nonlinear=False, nonlinear_with_inputs=False, markov=1, replace=True, noise=args.noise) n = 10 y = model.sample(transform_x(x), p=n, latent=False) else: # Load data. x = np.genfromtxt( os.path.join(data_dir, args.data, args.experiment, f'x_{"rmse" if args.rmse else "loglik"}.txt')) y = np.genfromtxt(
def test_scale_tying(): reg = GPARRegressor(scale_tie=True) reg.sample(np.linspace(0, 10, 20), p=2) # Instantiate variables. vs = reg.get_variables() assert '0/input/scales' in vs assert '1/input/scales' not in vs
def test_get_variables(): gpar = GPARRegressor() gpar.vs.get(init=1.0, name='variable') yield eq, list(gpar.get_variables().items()), [('variable', 1.0)]
x2 = np.concatenate([ np.linspace(1, 9, 9), np.linspace(10, 95, 18), np.linspace(100, 950, 18) ]) xx1, xx2 = np.meshgrid(x1, x2) x = np.stack([np.ravel(xx1), np.ravel(xx2)], axis=1) if len(args.synthetic_scales) != 2: raise ValueError('Requires exactly 2 synthetic scale arguments') model = GPARRegressor(scale=args.synthetic_scales, scale_tie=True, linear=True, linear_scale=10., input_linear=False, nonlinear=False, markov=None, replace=True, noise=args.noise) n = 3 y = model.sample(transform_x(x), p=n, latent=False) else: # Load data. x = np.genfromtxt( os.path.join(data_dir, args.data, args.experiment, f'x_{"rmse" if args.rmse else "loglik"}.txt')) y = np.genfromtxt(
def test_fit(): reg = GPARRegressor(replace=False, impute=False, normalise_y=True, transform_y=squishing_transform) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2) # TODO: Remove this once greedy search is implemented. yield raises, NotImplementedError, lambda: reg.fit(x, y, greedy=True) # Test that data is correctly transformed if it has an output with zero # variance. reg.fit(x, y, iters=0) yield ok, (~B.isnan(reg.y)).numpy().all() y_pathological = y.copy() y_pathological[:, 0] = 1 reg.fit(x, y_pathological, iters=0) yield ok, (~B.isnan(reg.y)).numpy().all() # Test transformation and normalisation of outputs. z = B.linspace(-1, 1, 10, dtype=torch.float64) z = B.stack([z, 2 * z], axis=1) yield allclose, reg._untransform_y(reg._transform_y(z)), z yield allclose, reg._unnormalise_y(reg._normalise_y(z)), z # Test that fitting runs without issues. vs = reg.vs.detach() yield lambda x_, y_: reg.fit(x_, y_, fix=False), x, y reg.vs = vs yield lambda x_, y_: reg.fit(x, y, fix=True), x, y
f'{"output_linear-" if args.output_linear else ""}' f'{"output_nonlinear-" if args.output_nonlinear else ""}' f'markov_{args.markov}-' f'{args.seed}') if os.path.isfile(fig_path + '.png'): print('Experiment already run. Exiting') sys.exit(0) np.random.seed(args.seed) model = GPARRegressor(scale=[1., .5], scale_tie=args.scale_tie, linear=args.output_linear, linear_scale=10., input_linear=args.input_linear, input_linear_scale=10., nonlinear=args.output_nonlinear, markov=args.markov, replace=True, noise=0.01) # model2 = GPARRegressor(scale=[1., .5], scale_tie=True, # linear=True, linear_scale=10., input_linear=False, # nonlinear=False, # markov=1, # replace=True, # noise=0.01) # Load data. formatting_args = (data_dir, args.data, args.experiment, 'rmse' if args.rmse else 'loglik')
import matplotlib.pyplot as plt import numpy as np import wbml.plot from gpar.regression import GPARRegressor x = np.linspace(0, 1, 100) model = GPARRegressor(scale=0.1, linear=False, nonlinear=True, nonlinear_scale=0.5, impute=True, replace=True, noise=0.1, normalise_y=True) # Sample observations and discard some. y = model.sample(x, p=3) y_obs = y.copy() y_obs[np.random.permutation(100)[:25], 0] = np.nan y_obs[np.random.permutation(100)[:50], 1] = np.nan y_obs[np.random.permutation(100)[:75], 2] = np.nan # Fit model and predict. model.fit(x, y) means, lowers, uppers = \ model.predict(x, num_samples=200, latent=False, credible_bounds=True) # Plot the result. plt.figure(figsize=(8, 6)) for i in range(3): plt.subplot(3, 1, i + 1) plt.plot(x, means[:, i], label='Prediction', style='pred') plt.fill_between(x, lowers[:, i], uppers[:, i], style='pred')
def test_sample_and_predict(x, w): # Use output transform to ensure that is handled correctly. reg = GPARRegressor( replace=False, impute=False, linear=True, linear_scale=1.0, nonlinear=False, noise=1e-8, normalise_y=False, transform_y=squishing_transform, ) # Test checks. with pytest.raises(ValueError): reg.sample(x, w) with pytest.raises(RuntimeError): reg.sample(x, w, posterior=True) # Test that output is simplified correctly. assert isinstance(reg.sample(x, w, p=2), np.ndarray) assert isinstance(reg.sample(x, w, p=2, num_samples=2), list) # Test that it produces random samples. Not sure how to test correctness. all_different(reg.sample(x, w, p=2), reg.sample(x, w, p=2)) all_different(reg.sample(x, w, p=2, latent=True), reg.sample(x, w, p=2, latent=True)) # Test that mean of posterior samples are around the data. y = reg.sample(x, w, p=2) reg.condition(x, y, w) approx(y, np.mean(reg.sample(x, w, posterior=True, num_samples=100), axis=0), atol=5e-2) approx( y, np.mean(reg.sample(x, w, latent=True, posterior=True, num_samples=100), axis=0), atol=5e-2, ) # Test that prediction is around the data. approx(y, reg.predict(x, w, num_samples=100), atol=5e-2) approx(y, reg.predict(x, w, latent=True, num_samples=100), atol=5e-2) # Test that prediction is confident. _, lowers, uppers = reg.predict(x, w, num_samples=100, credible_bounds=True) approx(uppers, lowers, atol=5e-2)
def test_scale_tying(x, w): reg = GPARRegressor(scale_tie=True) reg.sample(x, w, p=2) # Instantiate variables. vs = reg.get_variables() assert "0/input/scales" in vs assert "1/input/scales" not in vs
def test_scale_tying(): reg = GPARRegressor(scale_tie=True) reg.sample(np.linspace(0, 10, 20), p=2) # Instantiate variables. vs = reg.get_variables() yield ok, (0, 'I/scales') in vs yield ok, (1, 'I/scales') not in vs
def test_sample_and_predict(): # Use output transform to ensure that is handled correctly. reg = GPARRegressor(replace=False, impute=False, linear=True, linear_scale=1., nonlinear=False, noise=1e-8, normalise_y=False, transform_y=squishing_transform) x = np.linspace(0, 5, 5) # Test checks. with pytest.raises(ValueError): reg.sample(x) with pytest.raises(RuntimeError): reg.sample(x, posterior=True) # Test that output is simplified correctly. assert isinstance(reg.sample(x, p=2), np.ndarray) assert isinstance(reg.sample(x, p=2, num_samples=2), list) # Test that it produces random samples. Not sure how to test correctness. assert np.sum(np.abs(reg.sample(x, p=2) - reg.sample(x, p=2))) >= 1e-2 assert np.sum(np.abs(reg.sample(x, p=2, latent=True) - reg.sample(x, p=2, latent=True))) >= 1e-3 # Test that mean of posterior samples are around the data. y = reg.sample(x, p=2) reg.condition(x, y) approx(y, np.mean(reg.sample(x, posterior=True, num_samples=100), axis=0), digits=3) approx(y, np.mean(reg.sample(x, latent=True, posterior=True, num_samples=100), axis=0), digits=3) # Test that prediction is around the data. approx(y, reg.predict(x, num_samples=100), digits=3) approx(y, reg.predict(x, latent=True, num_samples=100), digits=3) # Test that prediction is confident. _, lowers, uppers = reg.predict(x, num_samples=100, credible_bounds=True) assert np.less_equal(uppers - lowers, 1e-2).all()
def test_condition_and_fit(): reg = GPARRegressor(replace=False, impute=False, normalise_y=True, transform_y=squishing_transform) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2) # Test that data is correctly normalised. reg.condition(x, y) approx(B.mean(reg.y, axis=0), B.zeros(reg.p)) approx(B.std(reg.y, axis=0), B.ones(reg.p)) # Test that data is correctly normalised if it has an output with zero # variance. y_pathological = y.copy() y_pathological[:, 0] = 1 reg.condition(x, y_pathological) assert (~B.isnan(reg.y)).numpy().all() # Test transformation and normalisation of outputs. z = torch.linspace(-1, 1, 10, dtype=torch.float64) z = B.stack(z, 2 * z, axis=1) allclose(reg._untransform_y(reg._transform_y(z)), z) allclose(reg._unnormalise_y(reg._normalise_y(z)), z) # Test that fitting runs without issues. vs = reg.vs.copy(detach=True) reg.fit(x, y, fix=False) reg.vs = vs reg.fit(x, y, fix=True) # TODO: Remove this once greedy search is implemented. with pytest.raises(NotImplementedError): reg.fit(x, y, greedy=True)
f1 = -np.sin(10 * np.pi * (x + 1)) / (2 * x + 1) - x**4 f2 = np.cos(f1)**2 + np.sin(3 * x) f3 = f2 * f1**2 + 3 * x f = np.stack((f1, f2, f3), axis=0).T # Add noise and subsample. y = f + noise * np.random.randn(n, 3) x_obs, y_obs = x[::8], y[::8] # Fit and predict GPAR. model = GPARRegressor( scale=0.1, linear=True, linear_scale=10.0, nonlinear=True, nonlinear_scale=0.1, noise=0.1, impute=True, replace=False, normalise_y=False, ) model.fit(x_obs, y_obs) means, lowers, uppers = model.predict(x, num_samples=200, credible_bounds=True, latent=True) # Fit and predict independent GPs: set `markov=0` in GPAR. igp = GPARRegressor( scale=0.1, linear=True,