def test_features(): # Test that optimisation runs for a full-fledged GPAR. reg = GPARRegressor(replace=True, scale=1.0, per=True, per_period=1.0, per_decay=10.0, input_linear=True, input_linear_scale=0.1, linear=True, linear_scale=1.0, nonlinear=True, nonlinear_scale=1.0, rq=True, noise=0.1) x = np.stack([np.linspace(0, 10, 20), np.linspace(10, 20, 20)], axis=1) y = reg.sample(x, p=2) reg.fit(x, y, iters=10)
def test_fit(): reg = GPARRegressor(replace=False, impute=False, normalise_y=True, transform_y=squishing_transform) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2) # TODO: Remove this once greedy search is implemented. yield raises, NotImplementedError, lambda: reg.fit(x, y, greedy=True) # Test that data is correctly transformed if it has an output with zero # variance. reg.fit(x, y, iters=0) yield ok, (~B.isnan(reg.y)).numpy().all() y_pathological = y.copy() y_pathological[:, 0] = 1 reg.fit(x, y_pathological, iters=0) yield ok, (~B.isnan(reg.y)).numpy().all() # Test transformation and normalisation of outputs. z = B.linspace(-1, 1, 10, dtype=torch.float64) z = B.stack([z, 2 * z], axis=1) yield allclose, reg._untransform_y(reg._transform_y(z)), z yield allclose, reg._unnormalise_y(reg._normalise_y(z)), z # Test that fitting runs without issues. vs = reg.vs.detach() yield lambda x_, y_: reg.fit(x_, y_, fix=False), x, y reg.vs = vs yield lambda x_, y_: reg.fit(x, y, fix=True), x, y
def test_sample_and_predict(): reg = GPARRegressor(replace=False, impute=False, linear=True, linear_scale=1., nonlinear=False, noise=1e-8, normalise_y=False) x = np.linspace(0, 5, 10) # Test checks. yield raises, ValueError, lambda: reg.sample(x) yield raises, RuntimeError, lambda: reg.sample(x, posterior=True) # Test that output is simplified correctly. yield isinstance, reg.sample(x, p=2), np.ndarray yield isinstance, reg.sample(x, p=2, num_samples=2), list # Test that it produces random samples. Not sure how to test correctness. yield ge, np.sum(np.abs(reg.sample(x, p=2) - reg.sample(x, p=2))), 1e-2 yield ge, np.sum( np.abs( reg.sample(x, p=2, latent=True) - reg.sample(x, p=2, latent=True))), 1e-3 # Test that mean of posterior samples are around the data. y = reg.sample(x, p=2) reg.fit(x, y, iters=0) yield approx, y, np.mean(reg.sample(x, posterior=True, num_samples=20), axis=0), 4 yield approx, y, np.mean(reg.sample(x, latent=True, posterior=True, num_samples=20), axis=0), 4 # Test that prediction is around the data. yield approx, y, reg.predict(x, num_samples=20), 4 yield approx, y, reg.predict(x, latent=True, num_samples=20), 4 # Test that prediction is confident. _, lowers, uppers = reg.predict(x, num_samples=10, credible_bounds=True) yield ok, np.less_equal(uppers - lowers, 1e-3).all()
def test_logpdf(): # Sample some data from a "sensitive" GPAR. reg = GPARRegressor(replace=False, impute=False, nonlinear=True, nonlinear_scale=0.1, linear=True, linear_scale=10., noise=1e-4, normalise_y=False) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2, latent=True) # Extract models. gpar = _construct_gpar(reg, reg.vs, 1, 2) f1, e1 = gpar.layers[0]() f2, e2 = gpar.layers[1]() # Test computation under prior. logpdf1 = (f1 + e1)(B.array(x)).logpdf(B.array(y[:, 0])) x_stack = np.concatenate([x[:, None], y[:, 0:1]], axis=1) logpdf2 = (f2 + e2)(B.array(x_stack)).logpdf(B.array(y[:, 1])) yield approx, reg.logpdf(x, y), logpdf1 + logpdf2, 6 # Test computation under posterior. e1_post = GP(e1.kernel, e1.mean, graph=e1.graph) e2_post = GP(e2.kernel, e2.mean, graph=e2.graph) f1_post = f1 | ((f1 + e1)(B.array(x)), B.array(y[:, 0])) f2_post = f2 | ((f2 + e2)(B.array(x_stack)), B.array(y[:, 1])) logpdf1 = (f1_post + e1_post)(B.array(x)).logpdf(B.array(y[:, 0])) logpdf2 = (f2_post + e2_post)(B.array(x_stack)).logpdf(B.array(y[:, 1])) yield raises, RuntimeError, lambda: reg.logpdf(x, y, posterior=True) reg.fit(x, y, iters=0) yield approx, reg.logpdf(x, y, posterior=True), logpdf1 + logpdf2, 6 # Test that sampling missing gives a stochastic estimate. y[::2, 0] = np.nan yield ge, \ np.abs(reg.logpdf(x, y, sample_missing=True) - reg.logpdf(x, y, sample_missing=True)), \ 1e-3
def test_condition_and_fit(): reg = GPARRegressor(replace=False, impute=False, normalise_y=True, transform_y=squishing_transform) x = np.linspace(0, 5, 10) y = reg.sample(x, p=2) # Test that data is correctly normalised. reg.condition(x, y) approx(B.mean(reg.y, axis=0), B.zeros(reg.p)) approx(B.std(reg.y, axis=0), B.ones(reg.p)) # Test that data is correctly normalised if it has an output with zero # variance. y_pathological = y.copy() y_pathological[:, 0] = 1 reg.condition(x, y_pathological) assert (~B.isnan(reg.y)).numpy().all() # Test transformation and normalisation of outputs. z = torch.linspace(-1, 1, 10, dtype=torch.float64) z = B.stack(z, 2 * z, axis=1) allclose(reg._untransform_y(reg._transform_y(z)), z) allclose(reg._unnormalise_y(reg._normalise_y(z)), z) # Test that fitting runs without issues. vs = reg.vs.copy(detach=True) reg.fit(x, y, fix=False) reg.vs = vs reg.fit(x, y, fix=True) # TODO: Remove this once greedy search is implemented. with pytest.raises(NotImplementedError): reg.fit(x, y, greedy=True)
# Add noise and subsample. y = f + noise * np.random.randn(n, 3) x_obs, y_obs = x[::8], y[::8] # Fit and predict GPAR. model = GPARRegressor(scale=0.1, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=0.1, noise=0.1, impute=True, replace=False, normalise_y=False) model.fit(x_obs, y_obs) means, lowers, uppers = \ model.predict(x, num_samples=100, credible_bounds=True, latent=True) # Fit and predict independent GPs: set markov=0. igp = GPARRegressor(scale=0.1, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=0.1, noise=0.1, markov=0, normalise_y=False) igp.fit(x_obs, y_obs) igp_means, igp_lowers, igp_uppers = \ igp.predict(x, num_samples=100, credible_bounds=True, latent=True)
# markov=1, # replace=True, # noise=0.01) # model = GPARRegressor(scale=args.synthetic_scales, scale_tie=True, # linear=True, linear_scale=10., input_linear=False, # nonlinear=False, # markov=1, # replace=True, # noise=args.noise) # time the fitting start = time.time() # Fit model and print hyperparameters. model.fit(transform_x(x), y, iters=args.iters, trace=False, fix=True) if args.joint: model.fit(transform_x(x), y, iters=args.iters, trace=False, fix=False) print('Hyperparameters:') for k, v in model.get_variables().items(): print(k) print(' ', v) # Predict. # preds = [] # for i, x_test in enumerate(x_tests): # print('Predicting {}/{}'.format(i + 1, max_layers - min_layers + 1)) # preds.append(model.predict(transform_x(x_test), # num_samples=50, # latent=True, # credible_bounds=True))
x = np.linspace(0, 1, 100) model = GPARRegressor(scale=0.1, linear=False, nonlinear=True, nonlinear_scale=0.5, impute=True, replace=True, noise=0.1, normalise_y=True) # Sample observations and discard some. y = model.sample(x, p=3) y_obs = y.copy() y_obs[np.random.permutation(100)[:25], 0] = np.nan y_obs[np.random.permutation(100)[:50], 1] = np.nan y_obs[np.random.permutation(100)[:75], 2] = np.nan # Fit model and predict. model.fit(x, y) means, lowers, uppers = \ model.predict(x, num_samples=200, latent=False, credible_bounds=True) # Plot the result. plt.figure(figsize=(8, 6)) for i in range(3): plt.subplot(3, 1, i + 1) plt.plot(x, means[:, i], label='Prediction', style='pred') plt.fill_between(x, lowers[:, i], uppers[:, i], style='pred') plt.scatter(x, y[:, i], label='Truth', style='test') plt.scatter(x, y_obs[:, i], label='Observations', style='train') plt.ylabel(f'Output {i + 1}') wbml.plot.tweak(legend=i == 0)
nonlinear_with_inputs=False, markov=1, replace=True, noise=0.01) while y_tested.shape[0] < args.max_evals: # Define the GPAR model for this iteration. TODO: reuse hyper parameters from previous run as a starting point? print(f'Running iteration {iteration}') # Fit the GPAR model. If joint, this means we want to co-fit the parameters after the first fitting # Potential looses some accuracy on each layer and sometimes goes unstable as a result print('\t Fitting model') model.fit(transform_x(x_tested), y_tested, trace=trace, progressive=True, iters=args.iters) if args.joint: model.fit(transform_x(x_tested), y_tested, trace=trace, progressive=False, iters=args.iters) # Sample the test points to allow us to plot the form of the GPAR function print('\t Predicting test points') test_samples = unnormalise( model.sample(transform_x(x_test), num_samples=num_function_samples, latent=True,
print(f'Running iteration {iteration}') # Fit the GPAR model. If joint, this means we want to co-fit the parameters after the first fitting # Potential looses some accuracy on each layer and sometimes goes unstable as a result print('\t Fitting model') x_data = x.copy() y_data = y_normalised.copy() y_data[np.where(~y_tested)] = np.nan active_inds = np.where(y_tested[:, 0]) x_data = x_data[active_inds] y_data = y_data[active_inds] model.fit(transform_x(x_data), y_data, trace=trace, fix=True, iters=args.iters) if args.joint: model.fit(transform_x(x_data), y_data, trace=trace, fix=False, iters=args.iters) # Sample the test points to allow us to plot the form of the GPAR function print('\t Predicting test points') test_samples = unnormalise( model.sample(transform_x(x_test), num_samples=50, latent=True,