Esempio n. 1
0
    _, train, test = load()

    x = np.array(train.index)
    y = np.array(train)

    # Fit and predict GPAR.
    model = GPARRegressor(scale=0.02,
                          linear=False,
                          nonlinear=True,
                          nonlinear_scale=1.0,
                          noise=0.01,
                          impute=True,
                          replace=False,
                          normalise_y=True)
    model.fit(x, y)
    means, lowers, uppers = \
        model.predict(x, num_samples=100, credible_bounds=True, latent=True)

    # Report SMSE.
    pred = pd.DataFrame(means, index=train.index, columns=train.columns)
    smse = wbml.metric.smse(pred, test)
    wbml.out.kv('SMSEs', smse.dropna())
    wbml.out.kv('Average SMSEs', smse.mean())

    # Name of output to plot.
    name = 'F2'

    # Plot the result.
    plt.figure(figsize=(12, 1.75))
    wbml.plot.tex()
Esempio n. 2
0
    y_train, y_test = y[inds_train], y[inds_test]

    # Perform dropping of data.
    prob_drop = 0.3
    indices_all = np.arange(y_train.shape[0])
    indices_remain = indices_all
    for i in range(1, num_outputs):
        # Drop indices randomly.
        n = len(indices_remain)
        perm = np.random.permutation(n)[:int(np.round(0.3 * n))]
        indices_drop = indices_remain[perm]
        indices_remain = np.array(list(set(indices_remain) - set(indices_drop)))

        # Drop data.
        y_train[indices_drop, i:] = np.nan

    # Fit and predict GPAR.
    model = GPARRegressor(scale=0.1,
                          linear=True, linear_scale=100.,
                          nonlinear=True, nonlinear_scale=1.0,
                          noise=0.01,
                          impute=True, replace=True, normalise_y=True)
    model.fit(x_train, y_train)
    means = model.predict(x_test, num_samples=100, latent=True)

    # Print remaining numbers:
    wbml.out.kv('Remaining', np.sum(~np.isnan(y_train), axis=0))

    # Compute SMSEs for all but the first output.
    wbml.out.kv('SMSE', wbml.metric.smse(means, y_test))
Esempio n. 3
0
from gpar import GPARRegressor, log_transform


def inputs(df):
    return df.reset_index()[['x', 'y']].to_numpy()


if __name__ == '__main__':
    B.epsilon = 1e-8
    wbml.out.report_time = True
    wd = WorkingDirectory('_experiments', 'jura')

    train, test = load()

    # Fit and predict GPAR.
    model = GPARRegressor(scale=10.,
                          linear=False,
                          nonlinear=True,
                          nonlinear_scale=1.0,
                          noise=0.1,
                          impute=True,
                          replace=True,
                          normalise_y=True,
                          transform_y=log_transform)
    model.fit(inputs(train), train.to_numpy(), fix=False)
    means = model.predict(inputs(test), num_samples=200, latent=True)
    means = pd.DataFrame(means, index=test.index, columns=train.columns)

    wbml.out.kv('MAE', wbml.metric.mae(means, test)['Cd'])
Esempio n. 4
0
    d_all, d_train, d_tests = load_temp()[d_size]

    # Determine the number of inducing points.
    n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size]

    # Place inducing points evenly spaced.
    x = convert_index(d_all)
    x_ind = np.linspace(x.min(), x.max(), n_ind)

    # Fit and predict GPAR.
    #   Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the
    #   results a little more drastic.
    model = GPARRegressor(scale=0.2,
                          linear=True, linear_scale=10.,
                          nonlinear=True, nonlinear_scale=1.,
                          noise=0.1,
                          impute=True, replace=True, normalise_y=True,
                          x_ind=x_ind)
    model.fit(convert_index(d_train), d_train.to_numpy())

    # Predict for the test sets.
    preds = []
    for i, d in enumerate(d_tests):
        preds.append(model.predict(convert_index(d),
                                   num_samples=50,
                                   credible_bounds=True,
                                   latent=False))

    # Save predictions.
    wd.save(preds, f'results{d_size}.pickle')
Esempio n. 5
0
    trial = pickle.load(f)
x = trial['x']
y_train = trial['y_train']
y_test = trial['y_test']
y_labels = trial['y_labels']

# Fit and predict GPAR.
model = GPARRegressor(scale=0.02,
                      linear=False,
                      nonlinear=True,
                      nonlinear_scale=1.0,
                      noise=0.01,
                      impute=True,
                      replace=False,
                      normalise_y=True)
model.fit(x, y_train)
means, lowers, uppers = \
    model.predict(x, num_samples=100, credible_bounds=True, latent=True)

# Compute SMSE.
i_test = np.any(~np.isnan(y_test), axis=0)
mse_mean = np.nanmean(
    (y_test[:, i_test] -
     np.nanmean(y_test[:, i_test], axis=0, keepdims=True))**2)
mse_gpar = np.nanmean((y_test[:, i_test] - means[:, i_test])**2)
print('SMSE:', mse_gpar / mse_mean)

# Plot the result.
plt.figure(figsize=(12, 9))
plt.rcParams['font.family'] = 'serif'
plt.rcParams['mathtext.fontset'] = 'dejavuserif'
Esempio n. 6
0
# Place inducing points evenly spaced.
x_ind = np.linspace(d_all.x[:, 0].min(), d_all.x[:, 0].max(), n_ind)

# Fit and predict GPAR.
#   Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the results
#   a little more drastic.
model = GPARRegressor(scale=0.2,
                      linear=True,
                      linear_scale=10.,
                      nonlinear=True,
                      nonlinear_scale=1.,
                      noise=0.1,
                      impute=True,
                      replace=True,
                      normalise_y=True,
                      x_ind=x_ind)
model.fit(d_train.x, d_train.y)

# Predict for the test sets.
preds = []
for i, d in enumerate(d_tests):
    print('Sampling', i + 1)
    preds.append(
        model.predict(d.x, num_samples=50, credible_bounds=True, latent=False))

# Save predictions.
with open('examples/paper/air_temp_results{}.pickle'.format(d_size),
          'wb') as f:
    pickle.dump(preds, f)
Esempio n. 7
0
    y = data[:, [header.index(name) for name in ['Ni', 'Zn', 'Cd']]]
    return x, y


# Load and extract data.
x_train, y_train = load('examples/data/jura/jura_prediction.dat')
x_test, y_test = load('examples/data/jura/jura_validation.dat')

# Append first two outputs of test data to training data: the last one is
# predicted.
x_train = np.concatenate((x_train, x_test), axis=0)
y_train_test = y_test.copy()
y_train_test[:, -1] = np.nan
y_train = np.concatenate((y_train, y_train_test), axis=0)

# Fit and predict GPAR.
model = GPARRegressor(scale=10.,
                      linear=False,
                      nonlinear=True,
                      nonlinear_scale=1.0,
                      noise=0.1,
                      impute=True,
                      replace=True,
                      normalise_y=True,
                      transform_y=log_transform)
model.fit(x_train, y_train, fix=False)
means_test = model.predict(x_test, num_samples=200, latent=True)

# Compute MAE.
print('MAE:', np.nanmean(np.abs(y_test[:, -1] - means_test[:, -1])))