if __name__ == '__main__': B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'eeg') _, train, test = load() x = np.array(train.index) y = np.array(train) # Fit and predict GPAR. model = GPARRegressor(scale=0.02, linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.01, impute=True, replace=False, normalise_y=True) model.fit(x, y) means, lowers, uppers = \ model.predict(x, num_samples=100, credible_bounds=True, latent=True) # Report SMSE. pred = pd.DataFrame(means, index=train.index, columns=train.columns) smse = wbml.metric.smse(pred, test) wbml.out.kv('SMSEs', smse.dropna()) wbml.out.kv('Average SMSEs', smse.mean()) # Name of output to plot. name = 'F2'
y_train, y_test = y[inds_train], y[inds_test] # Perform dropping of data. prob_drop = 0.3 indices_all = np.arange(y_train.shape[0]) indices_remain = indices_all for i in range(1, num_outputs): # Drop indices randomly. n = len(indices_remain) perm = np.random.permutation(n)[:int(np.round(0.3 * n))] indices_drop = indices_remain[perm] indices_remain = np.array(list(set(indices_remain) - set(indices_drop))) # Drop data. y_train[indices_drop, i:] = np.nan # Fit and predict GPAR. model = GPARRegressor(scale=0.1, linear=True, linear_scale=100., nonlinear=True, nonlinear_scale=1.0, noise=0.01, impute=True, replace=True, normalise_y=True) model.fit(x_train, y_train) means = model.predict(x_test, num_samples=100, latent=True) # Print remaining numbers: wbml.out.kv('Remaining', np.sum(~np.isnan(y_train), axis=0)) # Compute SMSEs for all but the first output. wbml.out.kv('SMSE', wbml.metric.smse(means, y_test))
np.linspace(100, 980, 45)]) xx1, xx2 = np.meshgrid(x1, x2) x = np.stack([np.ravel(xx1), np.ravel(xx2)], axis=1) # model = GPARRegressor(scale=[2., .3], scale_tie=True, # linear=True, linear_scale=10., linear_with_inputs=False, # nonlinear=False, nonlinear_with_inputs=False, # markov=1, # replace=True, # noise=args.noise) model = GPARRegressor( scale=[2., 0.5], scale_tie=True, linear=True, linear_scale=10., input_linear=False, nonlinear=False, # missing non linear inputs now? markov=1, replace=True, noise=args.noise) n = 3 y = model.sample(transform_x(x), p=n, latent=args.latent) plt.figure(figsize=(20, 10)) cs = ['tab:red', 'tab:blue', 'tab:green', 'tab:pink', 'tab:cyan'] for i in range(y.shape[1]): plt.subplot(2, 5, i + 1) plt.title('Output {}'.format(i + 1))
from gpar import GPARRegressor, log_transform def inputs(df): return df.reset_index()[['x', 'y']].to_numpy() if __name__ == '__main__': B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'jura') train, test = load() # Fit and predict GPAR. model = GPARRegressor(scale=10., linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, transform_y=log_transform) model.fit(inputs(train), train.to_numpy(), fix=False) means = model.predict(inputs(test), num_samples=200, latent=True) means = pd.DataFrame(means, index=test.index, columns=train.columns) wbml.out.kv('MAE', wbml.metric.mae(means, test)['Cd'])
d_size = 0 if len(sys.argv) < 2 else int(sys.argv[1]) d_all, d_train, d_tests = load_temp()[d_size] # Determine the number of inducing points. n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size] # Place inducing points evenly spaced. x = convert_index(d_all) x_ind = np.linspace(x.min(), x.max(), n_ind) # Fit and predict GPAR. # Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the # results a little more drastic. model = GPARRegressor(scale=0.2, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=1., noise=0.1, impute=True, replace=True, normalise_y=True, x_ind=x_ind) model.fit(convert_index(d_train), d_train.to_numpy()) # Predict for the test sets. preds = [] for i, d in enumerate(d_tests): preds.append(model.predict(convert_index(d), num_samples=50, credible_bounds=True, latent=False)) # Save predictions. wd.save(preds, f'results{d_size}.pickle')
header = [header[i] for i in order] # Remove regions from training data. y_all = y.copy() regions = [('o7', np.arange(301,1400), header.index('o7'))]#, # ('o7', np.arange(451,500), header.index('o7')), # ('o7', np.arange(451,500), header.index('o7')), # ('o7', np.arange(451,500), header.index('o7'))] for _, inds, p in regions: y[inds, p] = np.nan # Fit and predict GPAR. model = GPARRegressor(scale=0.1, linear=True, linear_scale=3., nonlinear=True, nonlinear_scale=0.2, rq=False, noise=1., impute=True, replace=True, normalise_y=True) model.fit(x, y) means, lowers, uppers = \ model.predict(x, num_samples=200, credible_bounds=True, latent=False) # Compute SMSEs. smses = [] for _, inds, p in regions: # For the purpose of comparison, standardise using the mean of the # *training* data! This is *not* how the SMSE usually is defined. mse_mean = np.nanmean((y_all[inds, p] - np.nanmean(y[:, p])) ** 2) mse_gpar = np.nanmean((y_all[inds, p] - means[inds, p]) ** 2) smses.append(mse_gpar / mse_mean) print('Average SMSE:', np.mean(smses))
if __name__ == "__main__": wbml.out.report_time = True wd = WorkingDirectory("_experiments", "exchange") _, train, test = load() x = np.array(train.index) y = np.array(train) # Fit and predict GPAR. model = GPARRegressor( scale=0.1, linear=True, linear_scale=10.0, nonlinear=True, nonlinear_scale=1.0, rq=True, noise=0.01, impute=True, replace=False, normalise_y=True, ) model.fit(x, y) means, lowers, uppers = model.predict(x, num_samples=200, credible_bounds=True, latent=False) # For the purpose of comparison, standardise using the mean of the *training* # data. This is not how the SMSE usually is defined! pred = pd.DataFrame(means, index=train.index, columns=train.columns) mse = ((pred - test)**2).mean(axis=0)
d_size = 0 if len(sys.argv) < 2 else int(sys.argv[1]) d_all, d_train, d_tests = load_temp()[d_size] n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size] # Place inducing points evenly spaced. x_ind = np.linspace(d_all.x[:, 0].min(), d_all.x[:, 0].max(), n_ind) # Fit and predict GPAR. # Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the results # a little more drastic. model = GPARRegressor(scale=0.2, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=1., noise=0.1, impute=True, replace=True, normalise_y=True, x_ind=x_ind) model.fit(d_train.x, d_train.y) # Predict for the test sets. preds = [] for i, d in enumerate(d_tests): print('Sampling', i + 1) preds.append( model.predict(d.x, num_samples=50, credible_bounds=True, latent=False)) # Save predictions. with open('examples/paper/air_temp_results{}.pickle'.format(d_size),
y = data[:, [header.index(name) for name in ['Ni', 'Zn', 'Cd']]] return x, y # Load and extract data. x_train, y_train = load('examples/data/jura/jura_prediction.dat') x_test, y_test = load('examples/data/jura/jura_validation.dat') # Append first two outputs of test data to training data: the last one is # predicted. x_train = np.concatenate((x_train, x_test), axis=0) y_train_test = y_test.copy() y_train_test[:, -1] = np.nan y_train = np.concatenate((y_train, y_train_test), axis=0) # Fit and predict GPAR. model = GPARRegressor(scale=10., linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, transform_y=log_transform) model.fit(x_train, y_train, fix=False) means_test = model.predict(x_test, num_samples=200, latent=True) # Compute MAE. print('MAE:', np.nanmean(np.abs(y_test[:, -1] - means_test[:, -1])))