Example #1
0
def run_model(n_simulations, n_testing):
    "Generate training data, fit emulator, and test model accuracy on random points, returning RMSE"

    # run MICE Model
    print('running MICE')
    ed = LatinHypercubeDesign(design_space)

    n_init = 5

    md = MICEDesign(ed,
                    f,
                    n_samples=n_simulations - n_init,
                    n_init=n_init,
                    n_cand=100)

    md.run_sequential_design()

    inputs_mice = md.get_inputs()
    targets_mice = md.get_targets()

    print('fitting GPs')

    gp_mice = GaussianProcess(inputs_mice, np.squeeze(targets_mice))
    gp_mice = fit_GP_MAP(gp_mice)

    # run LHD model
    inputs, targets = generate_training_data(n_simulations)

    gp = GaussianProcess(inputs, targets)
    gp = fit_GP_MAP(gp)

    print("making predictions")

    testing, test_targets = generate_test_data(n_testing)

    norm_const = np.max(test_targets) - np.min(test_targets)

    test_vals_mice, unc_mice, deriv = gp_mice.predict(testing,
                                                      deriv=False,
                                                      unc=True)
    test_vals, unc, deriv = gp.predict(testing, deriv=False, unc=True)

    return (
        np.sqrt(np.sum(
            (test_vals - test_targets)**2) / float(n_testing)) / norm_const,
        np.sqrt(np.sum(unc**2) / float(n_testing)) / norm_const**2,
        np.sqrt(np.sum(
            (test_vals_mice - test_targets)**2) / float(n_testing)) /
        norm_const,
        np.sqrt(np.sum(unc_mice**2) / float(n_testing)) / norm_const**2)
Example #2
0
def fit_emulators(n_emulators, processes=None):
    "load data and fit emulators for tsunami data, returning the time required to fit the emulators"

    inputs, targets = load_tsunami_data(n_emulators)

    gp = MultiOutputGP(inputs, targets)

    start_time = time()
    gp = fit_GP_MAP(gp, processes=processes)
    finish_time = time()

    return finish_time - start_time
Example #3
0
def run_mogp_analysis(analysis_points, known_value, threshold, results_dir):

    input_points, results, ed = load_results(results_dir)

    # fit GP to simulations

    gp = mogp_emulator.GaussianProcess(input_points, results)
    gp = mogp_emulator.fit_GP_MAP(gp)

    # We can now make predictions for a large number of input points much
    # more quickly than running the simulation.

    query_points = ed.sample(analysis_points)
    predictions = gp.predict(query_points)

    # set up history matching

    hm = mogp_emulator.HistoryMatching(obs=known_value,
                                       expectations=predictions,
                                       threshold=threshold)

    implaus = hm.get_implausibility()
    NROY = hm.get_NROY()

    # make some plots

    plt.figure()
    plt.plot(query_points[NROY, 0], query_points[NROY, 1], 'o')
    plt.xlabel('Normal Stress (MPa)')
    plt.ylabel('Shear to Normal Stress Ratio')
    plt.xlim((-120., -80.))
    plt.ylim((0.1, 0.4))
    plt.title("NROY Points")
    plt.savefig("results/nroy.png")

    import matplotlib.tri

    plt.figure()
    tri = matplotlib.tri.Triangulation(-(query_points[:, 0] - 80.) / 40.,
                                       (query_points[:, 1] - 0.1) / 0.3)
    plt.tripcolor(query_points[:, 0],
                  query_points[:, 1],
                  tri.triangles,
                  implaus,
                  vmin=0.,
                  vmax=6.,
                  cmap="viridis_r")
    cb = plt.colorbar()
    cb.set_label("Implausibility")
    plt.xlabel('Normal Stress (MPa)')
    plt.ylabel('Shear to Normal Stress Ratio')
    plt.title("Implausibility Metric")
    plt.savefig("results/implausibility.png")
def run_model(n_emulators, n_simulations, n_testing, processes = None):
    "Generate training data, fit emulators, and test model accuracy on random points, returning RMSE"

    inputs, targets, emulator_params = generate_training_data(n_emulators, n_simulations)

    gp = MultiOutputGP(inputs, targets)
    gp = fit_GP_MAP(gp, processes = processes)

    norm_const = np.mean(targets)

    testing, test_targets = generate_test_data(n_testing, emulator_params)

    test_vals, unc, deriv = gp.predict(testing, deriv = False, unc = True, processes = processes)

    return (np.sqrt(np.sum((test_vals - test_targets)**2)/float(n_emulators)/float(n_testing))/norm_const,
            np.sqrt(np.sum(unc**2)/float(n_emulators)/float(n_testing))/norm_const**2)
def run_model(n_simulations, n_dimensions, n_testing):
    "Generate training data, fit emulator, and test model accuracy on random points, returning RMSE"

    inputs, targets = generate_training_data(n_simulations, n_dimensions)

    norm_const = np.mean(targets)

    gp = GaussianProcess(inputs, targets)
    gp = fit_GP_MAP(gp)

    testing, test_targets = generate_test_data(n_testing, n_dimensions)

    test_vals, unc, deriv = gp.predict(testing, deriv=False, unc=True)

    return (np.sqrt(np.sum(
        (test_vals - test_targets)**2) / float(n_testing)) / norm_const,
            np.sqrt(np.sum(unc**2) / float(n_testing)) / norm_const**2)
Example #6
0
    def train(self, X, y):

        self.n_train = X.shape[0]

        try:
            n_in = X.shape[1]
            if self.n_in != n_in:
                raise RuntimeError(
                    'Size of training data feature is different from expected')
        except IndexError:
            if self.n_in != 1:
                raise RuntimeError(
                    'Size of training data feature is different from expected default =1'
                )

        try:
            n_out = y.shape[1]
            if self.n_out != n_out:
                raise RuntimeError(
                    'Size of training data target is different from expected')
        except IndexError:
            if self.n_out != 1:
                raise RuntimeError(
                    'Size of training data target is different from expected default =1'
                )

        if self.backend == 'scikit-learn':
            self.instance = GaussianProcessRegressor(
                kernel=self.kernel,
                n_restarts_optimizer=self.n_iter,
                normalize_y=True)
            self.instance.fit(X, y)
            self.kernel = self.instance.kernel_
        elif self.backend == 'mogp':
            if self.n_out == 1:
                self.instance = GaussianProcess(X,
                                                y.reshape(-1),
                                                kernel=self.kernel_argument,
                                                nugget=self.noize_argument)
            else:
                self.instance = MultiOutputGP(X,
                                              y.T,
                                              kernel=self.kernel_argument,
                                              nugget=self.noize_argument)
            self.instance = mogp.fit_GP_MAP(self.instance)
for d in range(n_samples):
    next_point = md2.get_next_point()
    next_target = simulator(next_point)
    md2.set_next_target(next_target)

# look at design and outputs

inputs = md2.get_inputs()
targets = md2.get_targets()

print("Final inputs:\n", inputs)
print("Final targets:\n", targets)

# look at final GP emulator and make some predictions to compare with lhd

lhd_design = lhd.sample(n_init + n_samples)

gp_lhd = mogp_emulator.fit_GP_MAP(lhd_design, np.array([simulator(p) for p in lhd_design]))

gp_mice = mogp_emulator.GaussianProcess(inputs, targets)

gp_mice = mogp_emulator.fit_GP_MAP(inputs, targets)

test_points = lhd.sample(10)

print("LHD:")
print_results(test_points, gp_lhd(test_points))
print()
print("MICE:")
print_results(test_points, gp_mice(test_points))
Example #8
0
print(simulation_points)
print(simulation_points.shape)

simulation_output_fixed = np.delete(simulation_output, 4, axis=0)
print(simulation_output_fixed)
print(simulation_output_fixed.shape)

# Fitting the MO GP. MAP with no prior parameters == uniform prior ==  MLE fitting
mo_gp = mogp_emulator.MultiOutputGP(inputs=simulation_points,
                                    targets=simulation_output_fixed)
print(mo_gp)

print("Number of emulators: " + str(len(mo_gp.emulators)))

mo_gp_fit_theta = np.array(
    [mogp_emulator.fit_GP_MAP(gp)._theta for gp in mo_gp.emulators])
np.savetxt("moGP_fit_hyperpars.csv", mo_gp_fit_theta, delimiter=",")

mo_gp_pred_mean = np.array(
    [gp.predict(testing=theta).mean for gp in mo_gp.emulators])
np.savetxt("moGP_prediction_mean.csv", mo_gp_pred_mean, delimiter=",")
mo_gp_pred_uncertainty = np.array(
    [gp.predict(testing=theta).unc for gp in mo_gp.emulators])
np.savetxt("moGP_prediction_uncertainty.csv",
           mo_gp_pred_uncertainty,
           delimiter=",")

#mo_gp_pred = mo_gp.predict(testing = theta) # trying to generate predictions in parallel
#numpy.savetxt("moGP_prediction.csv", mo_gp_pred, delimiter=",")

#mo_gp_fit = mogp_emulator.fit_GP_MAP(mo_gp) # trying to fit in parallel
Example #9
0
# run simulation

targets = np.array([simulator(p) for p in inputs])

###################################################################################

# First example -- fit GP using MLE and Squared Exponential Kernel and predict

print("Example 1: Basic GP")

# create GP and then fit using MLE

gp = mogp_emulator.GaussianProcess(inputs, targets)

gp = mogp_emulator.fit_GP_MAP(gp)

# create 20 target points to predict

predict_points = ed.sample(n_preds)

means, variances, derivs = gp.predict(predict_points)

print_results(predict_points, means)

###################################################################################

# Second Example: How to change the kernel, use a fixed nugget, and create directly using fitting function

print("Example 2: Matern Kernel")
    x[d+1,2] = expon(scale=10).ppf(x[d+1, 2])
    x[d+1,4] = expon(scale=10).ppf(x[d+1, 4])
    x[d+1,1] = norm(0, 2.5).ppf(x[d+1, 1])
    x[d+1,3] = norm(0, 2.5).ppf(x[d+1, 3])
    x[d+1,5] = norm(0, 2.5).ppf(x[d+1, 5])
    next_target = simulator6d_halved(x[d+1,:])
    print(x[d+1, :])
    print(next_target)
    md.set_next_target(next_target)

    X_train = x
    inputs = md.get_inputs()
    targets = md.get_targets()

    gp_mice = mogp_emulator.GaussianProcess(inputs, targets)
    gp_mice = mogp_emulator.fit_GP_MAP(inputs, targets)

    y_predict = gp_mice(X_test_tran)
    rmse[d] = np.sqrt(mean_squared_error(y_test, y_predict))
    mae[d] = mean_absolute_error(y_test, y_predict)


max_error = np.max(np.abs((y_predict - y_test)))

plt.figure(1)
plt.scatter(np.arange(2,102,1), mae)
plt.ylabel('MAE')
plt.xlabel('Number of training points')
plt.savefig('analysis/sequential_design_plots/seq_design_mae_regular_no_rot.png')

plt.figure(2)
Example #11
0
# the parameter to take. By default, we assume a uniform distribution between two endpoints, which
# we will use for this simulation.

# Once we construct the design, can draw a specified number of samples as shown.

lhd = mogp_emulator.LatinHypercubeDesign([(-5., 1.), (0., 1000.)])

n_simulations = 50
simulation_points = lhd.sample(n_simulations)
simulation_output = np.array([simulator(p) for p in simulation_points])

# Next, fit the surrogate GP model using MLE (MAP with uniform priors)
# Print out hyperparameter values as correlation lengths and sigma

gp = mogp_emulator.GaussianProcess(simulation_points, simulation_output)
gp = mogp_emulator.fit_GP_MAP(gp)

print("Correlation lengths = {}".format(np.sqrt(np.exp(-gp.theta[:2]))))
print("Sigma = {}".format(np.sqrt(np.exp(gp.theta[2]))))

# Validate emulator by comparing to true simulated value
# To compare with the emulator, use the predict method to get mean and variance
# values for the emulator predictions and see how many are within 2 standard
# deviations

n_valid = 10
validation_points = lhd.sample(n_valid)
validation_output = np.array([simulator(p) for p in validation_points])

predictions = gp.predict(validation_points)
np.random.seed(73449)

results_dir = os.path.join(os.getcwd(), "results/training")

input_points, results, ed = load_results(results_dir)

write_lhc_points("lhc_values", input_points, results)

validation_dir = os.path.join(os.getcwd(), "results/validation")

validation_points, validation_results, ed = load_results(validation_dir)

write_lhc_points("validation_points", validation_points, validation_results)

gp = fit_GP_MAP(input_points, results)

with open("correlation_lengths.tex", "w") as outfile:
    outfile.write(
        "{:.3f}, {:.3f}, and {:.3f}".format(*np.sqrt(np.exp(-gp.theta[:3]))))

with open("covariance_scale.tex", "w") as outfile:
    outfile.write("{:.3f}".format(np.sqrt(np.exp(gp.theta[3]))))

validations = gp.predict(validation_points)

valid_error = (validations.mean - validation_results) / np.sqrt(
    validations.unc)

analysis_points = 10000
threshold = 3.