def test_y_normalization(): """ Test normalization of the target values in GP Fitting non-normalizing GP on normalized y and fitting normalizing GP on unnormalized y should yield identical results """ y_mean = y.mean(0) y_norm = y - y_mean for kernel in kernels: # Fit non-normalizing GP on normalized y gpr = GaussianProcessRegressor(kernel=kernel) gpr.fit(X, y_norm) # Fit normalizing GP on unnormalized y gpr_norm = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_norm.fit(X, y) # Compare predicted mean, std-devs and covariances y_pred, y_pred_std = gpr.predict(X2, return_std=True) y_pred = y_mean + y_pred y_pred_norm, y_pred_std_norm = gpr_norm.predict(X2, return_std=True) assert_almost_equal(y_pred, y_pred_norm) assert_almost_equal(y_pred_std, y_pred_std_norm) _, y_cov = gpr.predict(X2, return_cov=True) _, y_cov_norm = gpr_norm.predict(X2, return_cov=True) assert_almost_equal(y_cov, y_cov_norm)
def test_no_fit_default_predict(): # Test that GPR predictions without fit does not break by default. default_kernel = (C(1.0, constant_value_bounds="fixed") * RBF(1.0, length_scale_bounds="fixed")) gpr1 = GaussianProcessRegressor() _, y_std1 = gpr1.predict(X, return_std=True) _, y_cov1 = gpr1.predict(X, return_cov=True) gpr2 = GaussianProcessRegressor(kernel=default_kernel) _, y_std2 = gpr2.predict(X, return_std=True) _, y_cov2 = gpr2.predict(X, return_cov=True) assert_array_almost_equal(y_std1, y_std2) assert_array_almost_equal(y_cov1, y_cov2)
def plot_gp(x_min, x_max, x, y, train_features, train_labels): fig = plt.figure(figsize=(16, 10)) fig.suptitle('Gaussian Process and Utility Function After {} Steps'.format(len(train_features)), fontdict={'size':30}) gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) axis = plt.subplot(gs[0]) acq = plt.subplot(gs[1]) gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, ) gp.fit(train_features, train_labels) mu, sigma = gp.predict(x, return_std=True) axis.plot(x, y, linewidth=3, label='Target') axis.plot(train_features.flatten(), train_labels, 'D', markersize=8, label=u'Observations', color='r') axis.plot(x, mu, '--', color='k', label='Prediction') axis.fill(np.concatenate([x, x[::-1]]), np.concatenate([mu - 1.9600 * sigma, (mu + 1.9600 * sigma)[::-1]]), alpha=.6, fc='c', ec='None', label='95% confidence interval') axis.set_xlim((x_min, x_max)) axis.set_ylim((None, None)) axis.set_ylabel('f(x)', fontdict={'size':20}) axis.set_xlabel('x', fontdict={'size':20}) bounds = np.asarray([[x_min, x_max]]) acquisition_fucntion_kappa = 5 mean, std = gp.predict(x, return_std=True) acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std acq.plot(x, acquisition_fucntion_values, label='Utility Function', color='purple') acq.plot(x[np.argmax(acquisition_fucntion_values)], np.max(acquisition_fucntion_values), '*', markersize=15, label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1) acq.set_xlim((x_min, x_max)) acq.set_ylim((0, np.max(acquisition_fucntion_values) + 0.5)) acq.set_ylabel('Utility', fontdict={'size':20}) acq.set_xlabel('x', fontdict={'size':20}) axis.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.) acq.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)
def test_gpr_interpolation(kernel): # Test the interpolating property for different kernels. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_pred, y_cov = gpr.predict(X, return_cov=True) assert_almost_equal(y_pred, y) assert_almost_equal(np.diag(y_cov), 0.)
def test_predict_cov_vs_std(): """ Test that predicted std.-dev. is consistent with cov's diagonal.""" for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_mean, y_cov = gpr.predict(X2, return_cov=True) y_mean, y_std = gpr.predict(X2, return_std=True) assert_almost_equal(np.sqrt(np.diag(y_cov)), y_std)
def test_gpr_interpolation(): """Test the interpolating property for different kernels.""" for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_pred, y_cov = gpr.predict(X, return_cov=True) assert_true(np.allclose(y_pred, y)) assert_true(np.allclose(np.diag(y_cov), 0.))
def test_duplicate_input(): """ Test GPR can handle two different output-values for the same input. """ for kernel in kernels: gpr_equal_inputs = GaussianProcessRegressor(kernel=kernel, alpha=1e-2) gpr_similar_inputs = GaussianProcessRegressor(kernel=kernel, alpha=1e-2) X_ = np.vstack((X, X[0])) y_ = np.hstack((y, y[0] + 1)) gpr_equal_inputs.fit(X_, y_) X_ = np.vstack((X, X[0] + 1e-15)) y_ = np.hstack((y, y[0] + 1)) gpr_similar_inputs.fit(X_, y_) X_test = np.linspace(0, 10, 100)[:, None] y_pred_equal, y_std_equal = gpr_equal_inputs.predict(X_test, return_std=True) y_pred_similar, y_std_similar = gpr_similar_inputs.predict(X_test, return_std=True) assert_almost_equal(y_pred_equal, y_pred_similar) assert_almost_equal(y_std_equal, y_std_similar)
def test_y_multioutput(): """ Test that GPR can deal with multi-dimensional target values""" y_2d = np.vstack((y, y*2)).T # Test for fixed kernel that first dimension of 2d GP equals the output # of 1d GP and that second dimension is twice as large kernel = RBF(length_scale=1.0) gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None, normalize_y=False) gpr.fit(X, y) gpr_2d = GaussianProcessRegressor(kernel=kernel, optimizer=None, normalize_y=False) gpr_2d.fit(X, y_2d) y_pred_1d, y_std_1d = gpr.predict(X2, return_std=True) y_pred_2d, y_std_2d = gpr_2d.predict(X2, return_std=True) _, y_cov_1d = gpr.predict(X2, return_cov=True) _, y_cov_2d = gpr_2d.predict(X2, return_cov=True) assert_almost_equal(y_pred_1d, y_pred_2d[:, 0]) assert_almost_equal(y_pred_1d, y_pred_2d[:, 1] / 2) # Standard deviation and covariance do not depend on output assert_almost_equal(y_std_1d, y_std_2d) assert_almost_equal(y_cov_1d, y_cov_2d) y_sample_1d = gpr.sample_y(X2, n_samples=10) y_sample_2d = gpr_2d.sample_y(X2, n_samples=10) assert_almost_equal(y_sample_1d, y_sample_2d[:, 0]) # Test hyperparameter optimization for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr.fit(X, y) gpr_2d = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_2d.fit(X, np.vstack((y, y)).T) assert_almost_equal(gpr.kernel_.theta, gpr_2d.kernel_.theta, 4)
def test_prior(kernel): # Test that GP prior has mean 0 and identical variances. gpr = GaussianProcessRegressor(kernel=kernel) y_mean, y_cov = gpr.predict(X, return_cov=True) assert_almost_equal(y_mean, 0, 5) if len(gpr.kernel.theta) > 1: # XXX: quite hacky, works only for current kernels assert_almost_equal(np.diag(y_cov), np.exp(kernel.theta[0]), 5) else: assert_almost_equal(np.diag(y_cov), 1, 5)
def test_sample_statistics(): """ Test that statistics of samples drawn from GP are correct.""" for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_mean, y_cov = gpr.predict(X2, return_cov=True) samples = gpr.sample_y(X2, 300000) # More digits accuracy would require many more samples assert_almost_equal(y_mean, np.mean(samples, 1), 2) assert_almost_equal(np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1)
def fit_GP(x_train): y_train = gaussian(x_train, mu, sig).ravel() # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(1, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(x_train, y_train) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) return y_train, y_pred, sigma
def test_GP_brownian_motion(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # add data t = np.linspace(0, 10, 100) # # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # Instanciate a Gaussian Process model kernel = lambda x, y: 1. * min(x, y) # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters X = np.atleast_2d(t).T gp.fit(X, y) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters # gp.fit(t, y) # Make the prediction on the meshed x-axis (ask for MSE as well) # y_star, err_y_star = gp.predict(t, return_std=True) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(t, return_std=True) fig = plt.figure() ax = fig.add_axes((0.1, 0.3, 0.8, 0.65)) ax.invert_yaxis() ax.plot(t, y, color='blue', label='L bol', lw=2.5) ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.') # # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP') ax.plot(t, y_pred, '-', color='gray') # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3) ax.fill(np.concatenate([t, t[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.show()
def plot_gaussian(data, col): ''' Plots the gaussian process regression with a characteristic length scale of 10 years. Essentially this highlights the 'slow trend' in the data. Parameters ---------- data: dataframe pandas dataframe containing 'date', 'linMean' which is the average runtime and 'linSD' which is the standard deviation. col: string the color in which the plot the data ''' #extract the results from the dataframe Year = np.array(data[u'date'].tolist()) Mean = np.array(data[u'linMean'].tolist()) SD = np.array(data[u'linSD'].tolist()) #initialize the gaussian process. Note that the process is calculated with a #length scale of 10years to give the 'slow trend' in the results. length_scale = 10. kernel = 1.* RBF(length_scale) gp = GaussianProcessRegressor(kernel=kernel, sigma_squared_n=(SD) ** 2, \ normalize_y=True) #now fit the data and get the predicted mean and standard deviation #Note: for reasons that are unclear, GaussianProcessRegressor won't take 1D #arrays so the data are converted to 2D and then converted back for plotting gp.fit(np.atleast_2d(Year).T, np.atleast_2d(Mean).T) Year_array = np.atleast_2d(np.linspace(min(Year)-2, max(Year)+2, 100)).T Mean_prediction, SD_prediction = gp.predict(Year_pred, return_std=True) Year_array=Year_array.ravel() Mean_prediction=Mean_prediction.ravel() #plot the predicted best fit plt.plot(Year_array, Mean_prediction, col, alpha=1) #plot the 95% confidence interval plt.fill_between(Year_array, (Mean_prediction - 1.9600 * SD_prediction), \ y2=(Mean_prediction + 1.9600 * SD_prediction), alpha=0.5, \ color=col) plt.draw()
def test_K_inv_reset(kernel): y2 = f(X2).ravel() # Test that self._K_inv is reset after a new fit gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert hasattr(gpr, '_K_inv') assert gpr._K_inv is None gpr.predict(X, return_std=True) assert gpr._K_inv is not None gpr.fit(X2, y2) assert gpr._K_inv is None gpr.predict(X2, return_std=True) gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2) gpr2.predict(X2, return_std=True) # the value of K_inv should be independent of the first fit assert_array_equal(gpr._K_inv, gpr2._K_inv)
class GP(BaseTuner): def __init__(self, tunables, gridding=0, r_minimum=2): """ Extra args: r_minimum: the minimum number of past results this selector needs in order to use gaussian process for prediction. If not enough results are present during a fit(), subsequent calls to propose() will revert to uniform selection. """ super(GP, self).__init__(tunables, gridding=gridding) self.r_minimum = r_minimum def fit(self, X, y): """ Use X and y to train a Gaussian process. """ super(GP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return self.gp = GaussianProcessRegressor(normalize_y=True) self.gp.fit(X, y) def predict(self, X): if self.X.shape[0] < self.r_minimum: # we probably don't have enough logger.warn('GP: not enough data, falling back to uniform sampler') return Uniform(self.tunables).predict(X) y, stdev = self.gp.predict(X, return_std=True) return np.array(list(zip(y, stdev))) def _acquire(self, predictions): """ Predictions from the GP will be in the form (prediction, error). The default acquisition function returns the index with the highest predicted value, not factoring in error. """ return np.argmax(predictions[:, 0])
def integrated_sigma(alpha, n_samples, n_restarts_optimizer=16, f=f): print("integrated_sigma(n_samples={n_samples}, alpha={alpha})".format( n_samples=n_samples, alpha=alpha, )) X = np.atleast_2d( np.linspace(1, 9, n_samples) ).T y = f(X).ravel() x = np.atleast_2d(np.linspace(0, 10, 16 * 1024)).T kernel = kernels.Matern() + (kernels.WhiteKernel(noise_level=alpha) if alpha is not None else 0.0) gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, ) gp.fit(X, y) y_pred, sigma = gp.predict(x, return_std=True) return simps( x=x.ravel(), y=sigma, )
def classifier_GPR(data, session): y = data_HP['DM'] X = data_HP['Data'] length = [] correct_list_within = [] correct_list_between = [] all_y_within_1 = [] all_y_between_1 = [] all_Ys = [] for s, sess in enumerate(X): # Design matrix for the session DM = y[s] firing_rates_all_time = X[s] if firing_rates_all_time.shape[1]>20: # Tasks indicies choices = DM[:,1] task = DM[:,4] task_1 = np.where(task == 1)[0] #& (choices == 1))[0] task_2 = np.where(task == 2)[0] #& (choices == 1))[0] task_3 = np.where(task == 3)[0] #& (choices == 1))[0] # Find the maximum length of any of the tasks in a session length.append(len(task_1)) length.append(len(task_2)) length.append(len(task_3)) min_trials_in_task = int(np.min(length)/2) #firing_rates_all_time = (firing_rates_all_time-firing_rates_all_time_mean)/firing_rates_all_time_std # Select the first min_trials_in_task in task one firing_rates_mean_task_1_1 = firing_rates_all_time[task_1] firing_rates_mean_task_1_1 = firing_rates_mean_task_1_1[:min_trials_in_task,:] # Select the last min_trials_in_task in task one firing_rates_mean_task_1_2 = firing_rates_all_time[task_1] firing_rates_mean_task_1_2 = firing_rates_all_time[task_2[0]-1-min_trials_in_task:task_2[0]-1,:] # Select the first min_trials_in_task in task two firing_rates_mean_task_2_1 = firing_rates_all_time[task_2] firing_rates_mean_task_2_1 = firing_rates_all_time[task_2[0]:task_2[0]+min_trials_in_task,:] firing_rates_mean_task_2_2 = firing_rates_all_time[task_2] firing_rates_mean_task_2_2 = firing_rates_all_time[task_3[0]-1-min_trials_in_task:task_3[0]-1,:] # Select the first min_trials_in_task in task three firing_rates_mean_task_3_1 = firing_rates_all_time[task_3] firing_rates_mean_task_3_1 = firing_rates_all_time[task_3[0]:task_3[0]+min_trials_in_task,:] firing_rates_mean_task_3_2 = firing_rates_all_time[task_3] firing_rates_mean_task_3_2 = firing_rates_all_time[task_3[-1]-min_trials_in_task:task_3[-1],:] # Finding the angle between initiation and every ms # C = 2πr; Circumference of a circle C = session.aligned_rates.shape[2] p = math.pi r = C/(2*p) angle_sin_list = [] angle_cos_list = [] for i in range(C): L = 0+ (i+1) ang = (180*L)/(p*r) ang_sin = np.sin(np.deg2rad(ang)) ang_cos = np.cos(np.deg2rad(ang)) angle_sin_list.append(ang_sin) angle_cos_list.append(ang_cos) firing_rates_mean_1_1 = np.concatenate(firing_rates_mean_task_1_1, axis = 1) firing_rates_mean_1_2 = np.concatenate(firing_rates_mean_task_1_2, axis = 1) firing_rates_mean_2_1 = np.concatenate(firing_rates_mean_task_2_1, axis = 1) firing_rates_mean_2_2 = np.concatenate(firing_rates_mean_task_2_2, axis = 1) firing_rates_mean_3_1 = np.concatenate(firing_rates_mean_task_3_1, axis = 1) firing_rates_mean_3_2 = np.concatenate(firing_rates_mean_task_3_2, axis = 1) l = firing_rates_mean_1_1.shape[1] # Creating a vector which identifies trial stage in the firing rate vector Y_cos = np.tile(angle_cos_list,int(l/len(angle_cos_list))) Y_sin = np.tile(angle_sin_list,int(l/len(angle_sin_list))) Y = np.vstack((Y_cos,Y_sin)) #kernel = RBF(length_scale = 2) kernel = Matern(nu = 3/2) model_nb = GPR(kernel = kernel) #model_nb = LinearRegression() model_nb.fit(np.transpose(firing_rates_mean_1_2), np.transpose(Y)) y_pred_class_between_t_1_2 = model_nb.predict(np.transpose(firing_rates_mean_2_1)) correct_between_t_1 = model_nb.score(np.transpose(firing_rates_mean_2_1),np.transpose(Y)) model_nb.fit(np.transpose(firing_rates_mean_1_1),np.transpose(Y)) y_pred_class_within_t_1_2 = model_nb.predict(np.transpose(firing_rates_mean_1_2)) correct_within_t_1 = model_nb.score(np.transpose(firing_rates_mean_1_2),np.transpose(Y)) model_nb.fit(np.transpose(firing_rates_mean_2_2),np.transpose(Y)) y_pred_class_between_t_2_3 = model_nb.predict(np.transpose(firing_rates_mean_3_1)) correct_between_t_2 = model_nb.score(np.transpose(firing_rates_mean_3_1),np.transpose(Y)) model_nb.fit(np.transpose(firing_rates_mean_2_1),np.transpose(Y)) y_pred_class_within_t_2_3 = model_nb.predict(np.transpose(firing_rates_mean_2_2)) correct_within_t_2 = model_nb.score(np.transpose(firing_rates_mean_2_2),np.transpose(Y)) model_nb.fit(np.transpose(firing_rates_mean_3_1),np.transpose(Y)) y_pred_class_within_t_3 = model_nb.predict(np.transpose(firing_rates_mean_3_2)) correct_within_t_3 = model_nb.score(np.transpose(firing_rates_mean_3_2),np.transpose(Y)) correct_list_within.append(correct_within_t_1) correct_list_within.append(correct_within_t_2) correct_list_within.append(correct_within_t_3) correct_list_between.append(correct_between_t_1) correct_list_between.append(correct_between_t_2) all_y_within_1.append(y_pred_class_within_t_1_2) all_y_between_1.append(y_pred_class_between_t_1_2) all_Ys.append(Y) print(correct_list_within) print(correct_list_between) return correct_list_within, correct_list_between,all_y_within_1,all_y_between_1,all_Ys
def fit(self, X, y): """Build an ensemble of individual TDE classifiers. Using the training set (X,y), through randomising over the parameter space to a set number of times then selecting new parameters using Gaussian processes. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, coerce_to_numpy=True) time_limit = self.time_limit_in_minutes * 60 self.n_instances, self.n_dims, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index self.classifiers = [] self.weights = [] self.prev_parameters_x = [] self.prev_parameters_y = [] # Window length parameter space dependent on series length max_window_searches = self.series_length / 4 max_window = int(self.series_length * self.max_win_len_prop) win_inc = int((max_window - self.min_window) / max_window_searches) if win_inc < 1: win_inc = 1 if self.min_window > max_window + 1: raise ValueError( f"Error in TemporalDictionaryEnsemble, min_window =" f"{self.min_window} is bigger" f" than max_window ={max_window}," f" series length is {self.series_length}" f" try set min_window to be smaller than series length in " f"the constructor, but the classifier may not work at " f"all with very short series") possible_parameters = self._unique_parameters(max_window, win_inc) num_classifiers = 0 start_time = time.time() train_time = 0 subsample_size = int(self.n_instances * 0.7) lowest_acc = 1 lowest_acc_idx = 0 if time_limit > 0: self.n_parameter_samples = 0 rng = check_random_state(self.random_state) if self.bigrams is None: if self.n_dims > 1: use_bigrams = False else: use_bigrams = True else: use_bigrams = self.bigrams # use time limit or n_parameter_samples if limit is 0 while (train_time < time_limit or num_classifiers < self.n_parameter_samples) and len(possible_parameters) > 0: if num_classifiers < self.randomly_selected_params: parameters = possible_parameters.pop( rng.randint(0, len(possible_parameters))) else: gp = GaussianProcessRegressor(random_state=self.random_state) gp.fit(self.prev_parameters_x, self.prev_parameters_y) preds = gp.predict(possible_parameters) parameters = possible_parameters.pop( rng.choice(np.flatnonzero(preds == preds.max()))) subsample = rng.choice(self.n_instances, size=subsample_size, replace=False) X_subsample = X[subsample] y_subsample = y[subsample] tde = IndividualTDE( *parameters, alphabet_size=self.alphabet_size, bigrams=use_bigrams, dim_threshold=self.dim_threshold, max_dims=self.max_dims, random_state=self.random_state, ) tde.fit(X_subsample, y_subsample) tde.subsample = subsample tde.accuracy = self._individual_train_acc( tde, y_subsample, subsample_size, -999999 if num_classifiers < self.max_ensemble_size else lowest_acc, ) weight = math.pow(tde.accuracy, 4) if num_classifiers < self.max_ensemble_size: if tde.accuracy < lowest_acc: lowest_acc = tde.accuracy lowest_acc_idx = num_classifiers self.weights.append(weight) self.classifiers.append(tde) elif tde.accuracy > lowest_acc: self.weights[lowest_acc_idx] = weight self.classifiers[lowest_acc_idx] = tde lowest_acc, lowest_acc_idx = self._worst_ensemble_acc() self.prev_parameters_x.append(parameters) self.prev_parameters_y.append(tde.accuracy) num_classifiers += 1 train_time = time.time() - start_time self.n_estimators = len(self.classifiers) self.weight_sum = np.sum(self.weights) self._is_fitted = True return self
class GCP(BaseTuner): def __init__(self, tunables, gridding=0, r_minimum=2): """ Extra args: r_minimum: the minimum number of past results this selector needs in order to use gaussian process for prediction. If not enough results are present during a fit(), subsequent calls to propose() will revert to uniform selection. """ super(GCP, self).__init__(tunables, gridding=gridding) self.r_minimum = r_minimum def fit(self, X, y): def jitter(x, range): y = np.copy(x) scale_exp_min = np.abs(np.ceil(np.log10(range[0]))) scale_exp_max = np.abs(np.ceil(np.log10(range[1]))) scale_exp = (scale_exp_max + scale_exp_min) / 2. r = np.random.rand(y.size) / (10**scale_exp) y = y + r return y # Print msg. when going into gcp.fit strMessage = "rows in X = %d, r_minimum = %d" % (X.shape[0], self.r_minimum) logger.debug(strMessage) # Use X and y to train a Gaussian Copula Process. super(GCP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return # -- Non-parametric model of 'y', estimated with kernel density kernel_pdf = st.gaussian_kde(y) kernel_cdf = make_cdf(kernel_pdf) kernel_ppf = make_ppf(kernel_pdf) y_kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf} self.y_kernel_model = y_kernel_model # - Transform y-->F-->vF-->norm.ppf-->v vF = y_kernel_model['cdf'](y) v = st.norm.ppf(vF) # -- Non-parametric model of each feature in 'X', estimated with kernel density X_kernel_model = [] for ki in range(X.shape[1]): columnX = X[:, ki] if self.tunables[ki][1].is_integer: columnX = jitter(columnX, self.tunables[ki][1].range) kernel_pdf = st.gaussian_kde(columnX) kernel_cdf = make_cdf(kernel_pdf) kernel_ppf = make_ppf(kernel_pdf) kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf} X_kernel_model.append(kernel_model) self.X_kernel_model = X_kernel_model # -- Transform X-->F-->uF-->norm.ppf-->U U = np.empty_like(X) for ki in range(X.shape[1]): uF = X_kernel_model[ki]['cdf'](X[:, ki]) U[:, ki] = st.norm.ppf(uF) # - Instantiate a GP and fit it with (U, v) self.gcp = GaussianProcessRegressor(normalize_y=True) self.gcp.fit(U, v) def predict(self, X): if self.X.shape[0] < self.r_minimum: # we probably don't have enough logger.warn('GP: not enough data, falling back to uniform sampler') return Uniform(self.tunables).predict(X) def get_valid_row(U): ind_OK = np.full(U.shape[0], 1, dtype=bool) for ki in range(U.shape[1]): ind_OK = np.logical_and(ind_OK, np.logical_not(np.isinf(U[:, ki]))) V = np.copy(U[ind_OK, :]) return V, ind_OK # -- Load non-parametric model x_kernel_model = self.X_kernel_model y_kernel_model = self.y_kernel_model # -- Transform X into U before using the GP learned U = np.empty_like(X) for ki in range(X.shape[1]): uF = x_kernel_model[ki]['cdf'](X[:, ki]) U[:, ki] = st.norm.ppf(uF) # -- Get U_safe and print msg. to inform of how many rows are valid U_safe, ind_OK = get_valid_row(U) strMessage = "Num. of valid rows in X = %d" % (np.sum(ind_OK)) logger.debug(strMessage) # -- use GP to estimate mean and stdev only of safe U's mu_v, stdev_v = self.gcp.predict(U_safe, return_std=True) # -- Transform back mu_u-->NormStd-->mu_uF mu_vF = st.norm.cdf(mu_v) stdev_vF = st.norm.cdf(stdev_v) # -- Transform back mu_uF-->F.ppf-->mu_y # VERSION 1: # It should be used in case of mu_y and stdev_y can have a size lower than X. # Otherwise, swap to version 2. mu_y = y_kernel_model['ppf'](mu_vF) stdev_y = y_kernel_model['ppf'](stdev_vF) ''' # VERSION 2: # It should be used in case of mu_y and stdev_y must have the same length # than X. Otherwise, Version 1 is faster. # -- Transform back mu_uF-->F.ppf-->mu_y # mu_y has the same length than U, but is positive only for safe rows mu_y = np.zeros([U.shape[0]]) stdev_y = np.zeros([U.shape[0]]) mu_y[ind_OK] = y_kernel_model['ppf'](mu_vF) stdev_y[ind_OK] = y_kernel_model['ppf'](stdev_vF) ''' return np.array(list(zip(mu_y, stdev_y))) def _acquire(self, predictions): """ Predictions from the GCP will be in the form (prediction, error). The default acquisition function returns the index with the highest predicted value, not factoring in error. """ return np.argmax(predictions[:, 0])
class GPTS_Learner(Learner): '''Gaussian Process Thompson Sampling Learner inheriting from the Learner class.''' def __init__(self, n_arms, arms, kernel=None, plain_gp=False): '''Initialize the Gaussian Process Thompson Sampling Learner with a number of arms, the arms and a kernel.''' super(GPTS_Learner, self).__init__(n_arms) # supercharges the init from the learner # Assignments and Initializations self.arms = arms self.means = np.linspace(0, 40, self.n_arms) self.sigmas = np.ones(self.n_arms) * 10 self.pulled_arms = [] alpha = 10.0 # When no kernel is set, Radial-basis function one is chosen with 9 restarts, otherwise no restart is needed if not kernel: # The kernel is set as the product of a constant and a Radial-basis with values 1 and range 1e-3 to 1e3 kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-3, 1e3)) n_restarts = 9 else: n_restarts = 0 if plain_gp: self.gp = GaussianProcessRegressor() else: # Sets the Gaussian Process Regressor from the given kernel self.gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha**2, normalize_y=True, n_restarts_optimizer=n_restarts) def update_observations(self, pulled_arm, reward): '''Updates the information on the rewards keeping track of the pulled arm (supercharges update_observations in Learner).''' super(GPTS_Learner, self).update_observations( pulled_arm, reward) # supercharge update_observations from the learner # Keeps track of the pulled arm self.pulled_arms.append(self.arms[pulled_arm]) def update_model(self): '''Updates the model with the new means and sigmas.''' # Sets the trimmed pulled arms vs rewards x = np.atleast_2d(self.pulled_arms).T y = self.collected_rewards # Fits the Gaussian process self.gp.fit(x, y) # Evaluates current means and sigmas with a lower bound on the standard deviation of 0.01 (for convergence) self.means, self.sigmas = self.gp.predict(np.atleast_2d(self.arms).T, return_std=True) self.sigmas = np.maximum(self.sigmas, 1e-2) def update(self, pulled_arm, reward): '''Proceeds of 1 time step updating both the observations and the model.''' self.t += 1 self.update_observations(pulled_arm, reward) self.update_model() def pull_arm(self, budget): '''Pulls the arm from the current multidimensional random normal distribution, returning the index of the best arm satisfying the budget allocation.''' sampled_values = np.random.normal( self.means, self.sigmas ) # pulls some random arms basing on current means and sigmas feasible_idxs = np.argwhere( self.arms <= budget) # finds the indices which satisfy the budget allocation return np.argmax( sampled_values[feasible_idxs] ) # returns the index of the best arm satisfying the budget allocation
from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, WhiteKernel rng = np.random.RandomState(0) X = rng.uniform(0, 5, 20)[:, np.newaxis] y = 0.5 * np.sin(3 * X[:, 0]) + rng.normal(0, 0.5, X.shape[0]) # First run plt.figure(0) kernel = 1.0 * RBF(length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, y) X_ = np.linspace(0, 5, 100) y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True) plt.plot(X_, y_mean, 'k', lw=3, zorder=9) plt.fill_between(X_, y_mean - np.sqrt(np.diag(y_cov)), y_mean + np.sqrt(np.diag(y_cov)), alpha=0.5, color='k') plt.plot(X_, 0.5*np.sin(3*X_), 'r', lw=3, zorder=9) plt.scatter(X[:, 0], y, c='r', s=50, zorder=10) plt.title("Initial: %s\nOptimum: %s\nLog-Marginal-Likelihood: %s" % (kernel, gp.kernel_, gp.log_marginal_likelihood(gp.kernel_.theta))) plt.tight_layout() # Second run plt.figure(1) kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e+1))
gp = GaussianProcessRegressor(kernel=RBF(l, (l, l))) #Fit --> Training g = gp.fit(a[:, np.newaxis], wde - base(a)) #Plotting points (if log use np.logspace) a_sampling = np.linspace(inia, enda, ODEsteps) print a_sampling #transforming a_sampling in z_sampling z_sampling = np.zeros(ODEsteps) for i in range(ODEsteps): z_sampling[i] = -1 + 1 / a_sampling[i] print z_sampling #Predict points w_pred, sigma = gp.predict(a_sampling[:, np.newaxis], return_std=True) w_pred = w_pred + base(a_sampling) #Plot the result: remove it from final verions fig = plt.figure(figsize=(14, 12)) plt.plot(a_sampling, w_pred, label='l=%s' % l) plt.legend(fontsize=20) plt.scatter(a, wde) fig.savefig('test_figure.png') # print to file #f = open(filename,'w') # print len(z_sampling) #for i in range(0, ODEsteps): # print >>f, z_sampling[i], w_pred[i] np.savetxt(filename, np.array([z_sampling, w_pred]).T, fmt="%15.8e")
X = np.atleast_2d([1., 9.]).T y = blackbox_func(X).ravel() # Gaussian Processs Upper Confidence Bound (GP-UCB)アルゴリズム # --> 収束するまで繰り返す(収束条件などチューニングポイント) n_iteration = 50 for i in range(n_iteration): # 既に分かっている値でガウス過程フィッティング # --> カーネル関数やパラメータはデフォルトにしています(チューニングポイント) gp = GaussianProcessRegressor() gp = KNeighborsRegressor(n_neighbors=2) gp.fit(X, y) # 事後分布が求まる posterior_mean = gp.predict(x_grid) # posterior_sig = dist_knn(X, x_grid) posterior_sig = dist_knn(X, x_grid, i + 1) # 目的関数を最大化する x を次のパラメータとして選択する # --> βを大きくすると探索重視(初期は大きくし探索重視しイテレーションに同期して減衰させ活用を重視させるなど、チューニングポイント) idx = acq_ucb(posterior_mean, posterior_sig, beta=100.0) x_next = x_grid[idx] plot(x_grid, y, X, posterior_mean, posterior_sig, title='Iteration=%2d, x_next = %f' % (i + 2, x_next))
class GPUCB: def create_theta(self, x): theta = dict() for i in range(self.dim): theta[self.space.continuous_space[i]. label] = self.space.continuous_space[i].convert(x[i]) return theta def log_generation(self, x, y): self.evals += 1 self.fval = y theta = self.create_theta(x) print("theta:{}, fval:{}".format(theta, self.fval)) if self.fval < self.best_fval: self.best_fval = self.fval self.best_params = theta self.log["evals"].append(self.evals) self.log["fval"].append(self.fval) self.log["best_fval"].append(self.best_fval) for i in range(self.dim): self.log[self.space.continuous_space[i].label].append( self.space.continuous_space[i].convert(x[i])) def __init__(self, **params): self.seed = params["seed"] np.random.seed(self.seed) self.B = params["B"] self.obj_func = params["obj_func"] self.dim = params["obj_func"].dim # GP self.xs = np.zeros((0, self.dim)) self.ys = np.zeros((0, 1)) # kernel matern = Matern(nu=2.5, length_scale=[1.0 for _ in range(self.dim)]) white = WhiteKernel(noise_level=1.0) constant = ConstantKernel(constant_value=1.0) self.gp = GaussianProcessRegressor(kernel=constant * matern + white, normalize_y=True, n_restarts_optimizer=15) # BO self.beta = 2.0 self.num_initial_samples = 5 self.acq_initial_points = 50 # for logging self.evals = 0 self.fval = None self.best_params = None self.best_fval = np.inf self.space = params["space"] self.log = opt_util.basic_log_setup(self.space) def ucb(self, x): mean, std = self.gp.predict(x, return_std=True) # Note that our implementation targets minimization, so this is lcb, in fact. return mean - self.beta * std def argmin_acq(self): x_initials = [ np.random.uniform(0.0, 1.0, size=self.dim) for _ in range(self.acq_initial_points) ] x_best = None fval_best = np.inf for xp in x_initials: res = minimize( lambda x: self.ucb(x.reshape( 1, -1)), # (1, -1) means it contains a single sample xp.reshape(1, -1), bounds=[(0.0, 1.0) for _ in range(self.dim)], method="L-BFGS-B", ) if res.fun < fval_best: x_best = res.x fval_best = res.fun return x_best def optimize(self): for t in range(self.B): # Step 1. select sample to evaluate if t < self.num_initial_samples: x = np.random.uniform(0.0, 1.0, size=self.dim) else: x = self.argmin_acq() # Step 2. evaluate the sample selected in Step 1. y = self.obj_func.evaluate(x) self.log_generation(x, y) # Step 3. accumulate the observed data self.xs = np.concatenate( (self.xs, np.array(x).reshape(1, self.dim))) self.ys = np.concatenate((self.ys, np.array(y).reshape(1, 1)))
def fit_gp(x, y, x0=None, symmetry_list=None, y_errors=None, hypercube_rescale=False, fname_export="gp_fit"): """ x = array so x[0] , x[1], x[2] are points. """ # If we are loading a fit, override everything else if opts.fit_load_gp: print( " WARNING: Do not re-use fits across architectures or versions : pickling is not transferrable " ) my_gp = joblib.load(opts.fit_load_gp) return lambda x: my_gp.predict(x) # Amplitude: # - We are fitting lnL. # - We know the scale more or less: more than 2 in the log is bad # Scale # - because of strong correlations with chirp mass, the length scales can be very short # - they are rarely very long, but at high mass can be long # - I need to allow for a RANGE length_scale_est = [] length_scale_bounds_est = [] for indx in np.arange(len(x[0])): # These length scales have been tuned by expereience length_scale_est.append( 2 * np.std(x[:, indx])) # auto-select range based on sampling retained length_scale_min_here = np.max( [1e-3, 0.2 * np.std(x[:, indx] / np.sqrt(len(x)))]) length_scale_bounds_est.append( (length_scale_min_here, 5 * np.std(x[:, indx])) ) # auto-select range based on sampling *RETAINED* (i.e., passing cut). Note that for the coordinates I usually use, it would be nonsensical to make the range in coordinate too small, as can occasionally happens print(" GP: Input sample size ", len(x), len(y)) print(" GP: Estimated length scales ") print(length_scale_est) print(length_scale_bounds_est) if not (hypercube_rescale): # These parameters have been hand-tuned by experience to try to set to levels comparable to typical lnL Monte Carlo error kernel = WhiteKernel( noise_level=0.1, noise_level_bounds=(1e-2, 1)) + C( 0.5, (1e-3, 1e1)) * RBF(length_scale=length_scale_est, length_scale_bounds=length_scale_bounds_est) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=8) gp.fit(x, y) print(" Fit: std: ", np.std(y - gp.predict(x)), "using number of features ", len(y)) if opts.fit_save_gp: print(" Attempting to save fit ", opts.fit_save_gp + ".pkl") joblib.dump(gp, opts.fit_save_gp + ".pkl") return lambda x: gp.predict(x) else: x_scaled = np.zeros(x.shape) x_center = np.zeros(len(length_scale_est)) x_center = np.mean(x) print(" Scaling data to central point ", x_center) for indx in np.arange(len(x)): x_scaled[indx] = (x[indx] - x_center) / length_scale_est # resize kernel = WhiteKernel(noise_level=0.1, noise_level_bounds=( 1e-2, 1)) + C(0.5, (1e-3, 1e1)) * RBF(len(x_center), (1e-3, 1e1)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=8) gp.fit(x_scaled, y) print(" Fit: std: ", np.std(y - gp.predict(x_scaled)), "using number of features ", len(y)) # should NOT be perfect return lambda x, x0=x_center, scl=length_scale_est: gp.predict( (x - x0) / scl)
class AugmentedGaussianProcess: """A pair of Gaussian processes: one for the samples and another for the error. Arguments: * sample_kernel: the kernel used for the sample predictor * error_kernel: the kernel used for the error predictor; if not specified, defaults to the sample kernel plus a white noise term. Keyword-only arguments: * fit_white_noise: if True, add a white noise term to the kernel and include the white noise term in the sample error rather than the model error. Attributes: * submodel_samples: the sample predictor (replaced by each call to fit()) * submodel_errors: the error predictor Note that direct access to the submodels doesn't include the corrections applied by fit_white_noise. """ def __init__(self, sample_kernel, error_kernel=None, *, fit_white_noise=False): if fit_white_noise: sample_kernel = sample_kernel + kernels.WhiteKernel() if error_kernel is None: if fit_white_noise: error_kernel = sample_kernel else: error_kernel = sample_kernel + kernels.WhiteKernel() self.fit_white_noise = fit_white_noise self.sample_kernel = sample_kernel self.submodel_samples = GaussianProcessRegressor(self.sample_kernel) self.submodel_errors = GaussianProcessRegressor(error_kernel) def fit(self, X, Y, Y_err): """Fit the model to a set of data with errors.""" self.submodel_samples = GaussianProcessRegressor(self.sample_kernel, alpha=Y_err**2) self.submodel_samples.fit(X, Y) self.submodel_errors.fit(X, Y_err) def predict(self, X, return_std=False, return_efficiency=False): """Predict the mean, possibly also the standard error and sampling efficiency. If return_std is False, returns the predicted mean. If return_std is True, also returns the standard error of the prediction. If return_efficiency is also True, also returns the sampling efficicency, defined as the portion of the total sampling error attributable to the model uncertainty. """ if return_std: mean, std = self.submodel_samples.predict(X, return_std=True) sigma = self.predict_sample_error(X) if self.fit_white_noise: white_noise_level = self.submodel_samples.kernel_.k2.noise_level std = np.sqrt(std**2 - white_noise_level) if return_efficiency: efficiency = 1 - sigma / np.sqrt(sigma**2 + std**2) return mean, std, efficiency else: return mean, std else: return self.submodel_samples.predict(X) def predict_sample_error(self, X): """Predict the sample error.""" sigma = self.submodel_errors.predict(X) if self.fit_white_noise: white_noise_level = self.submodel_samples.kernel_.k2.noise_level sigma = np.sqrt(sigma**2 + white_noise_level) return sigma
class SW_GPTS_Learner: def __init__(self, arms, sigma=5, window_length=0): self.arms = arms self.n_arms = len(arms) self.means = np.zeros(self.n_arms) self.sigmas = np.ones(self.n_arms) * sigma self.pulled_arms = np.array([]) self.collected_rewards = np.array([]) self.window_length = window_length alpha = 1.5 theta = 1 l = 1 kernel = C(theta, (1e-5, 1e5)) * RBF(l, (1e-10, 1e10)) self.gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha**2, normalize_y=True, n_restarts_optimizer=10) def sample_values(self): """ Sample the value of all the arms from the learner Gaussian Process :return: (numpy.array) An array containing the sampled values """ sampled_values = np.random.normal(self.means, self.sigmas) return sampled_values def update_observations(self, arm_index, reward): """ Update the parameters based on an external sampling on the real function Parameters: arm_index (int): the index of the arm that has been pulled reward (float): the reward received by pulling the arm """ self.pulled_arms = np.append(self.pulled_arms, arm_index) self.collected_rewards = np.append(self.collected_rewards, reward) if len(self.pulled_arms) > self.window_length: self.pulled_arms = self.pulled_arms[-self.window_length:] self.collected_rewards = self.collected_rewards[-self. window_length:] def update_model(self): """ Update the gaussian process based on the recent observations """ x = np.atleast_2d(self.pulled_arms).T y = self.collected_rewards if len(y) == 1: return self.gp.fit(x, y) self.means = self.gp.predict(np.array(list(np.ndindex(len( self.arms))))) self.sigmas = np.maximum(self.sigmas, 1e-2) def update(self, arm_index, reward): """ Update the parameters of the model and the gaussian process based on an external sampling of the real function Parameters: arm_index (int): the index of the arm that has been pulled reward (float): the reward received by pulling the arm """ self.update_observations(arm_index, reward) self.update_model() def pull_arm(self): """ Choose which arm to pull based on the result of a sampling of the gaussian process Returns: (int): the index of the arm that maximizes the observation """ sampled_values = np.random.normal(self.means, self.sigmas) return np.argmax(sampled_values) # FOR TESTING PURPOSES def plot(self, ax, function): ax.plot(self.arms, function(self.arms)) ax.plot(self.pulled_arms, self.collected_rewards, 'ro') ax.plot(self.arms, self.means) def predict(self, point): return self.gp.predict([[point]])
class BayesianLearning(BasePredictor): def __init__(self, surrogate=None, n_features=None): # initialzing some of the default values # The default surrogate function is gaussian_process with matern kernel if surrogate is None: if n_features is not None: # anisotropic kernel length_scale = [2.] * n_features else: length_scale = 2. self.surrogate = GaussianProcessRegressor( kernel=Matern(nu=2.5, length_scale=length_scale, length_scale_bounds=(0.1, 1024)), n_restarts_optimizer=3, # random_state=1, # optimizer=None, normalize_y=False) else: self.surrogate = surrogate # keep track of the iteration counts self.iteration_count = 0 # The size of the exploration domain, default to 1000 self.domain_size = 1000 """ This is based on the upper confidence bound algorithm used for the aquision function Input: X is the values from which we need to select the best value using the aquision function. return: The value which is best bases on the UCB and also the mean of this value. """ def Upper_Confidence_Bound(self, X): ''' Compute the upper confidence bound as per UCL paper algorithm 2 GP-BUCB: C used here is C1 value which empirically works well''' mu, sigma = self.surrogate.predict(X, return_std=True) mu = mu.reshape(mu.shape[0], 1) sigma = sigma.reshape(sigma.shape[0], 1) tolerance = 1e-6 sigma_inv_sq = 1.0 / ( tolerance + (sigma * sigma) ) # tolerance is used to avoid the divide by zero error C = 8 / (np.log(1 + sigma_inv_sq)) alpha_inter = self.domain_size * (self.iteration_count) * ( self.iteration_count) * math.pi * math.pi / (6 * 0.1) if alpha_inter == 0: print('Error: alpha_inter is zero in Upper_Confidence_Bound') alpha = 2 * math.log(alpha_inter) # We have set delta = 0.1 alpha = math.sqrt(alpha) beta = np.exp(2 * C) * alpha beta = np.sqrt(beta) Value = mu + (beta) * sigma x_index = np.argmax(Value) mu_value = mu[x_index] return X[x_index], mu_value """ Check if the returned index value is already present in X_Sample """ def Upper_Confidence_Bound_Remove_Duplicates(self, X, X_Sample, batch_size): mu, sigma = self.surrogate.predict(X, return_std=True) mu = mu.reshape(mu.shape[0], 1) sigma = sigma.reshape(sigma.shape[0], 1) tolerance = 1e-6 sigma_inv_sq = 1.0 / ( tolerance + (sigma * sigma) ) # tolerance is used to avoid the divide by zero error C = 8 / (np.log(1 + sigma_inv_sq)) alpha_inter = self.domain_size * (self.iteration_count) * ( self.iteration_count) * math.pi * math.pi / (6 * 0.1) if alpha_inter == 0: print('Error: alpha_inter is zero in Upper_Confidence_Bound') alpha = 2 * math.log(alpha_inter) # We have set delta = 0.1 alpha = math.sqrt(alpha) beta = np.exp(2 * C) * alpha beta = np.sqrt(beta) if batch_size == 1: exploration_factor = alpha else: exploration_factor = beta Value = mu + exploration_factor * sigma return self.remove_duplicates(X, X_Sample, mu, Value) """ Returns the acqutition function """ def Get_Upper_Confidence_Bound(self, X): mu, sigma = self.surrogate.predict(X, return_std=True) mu = mu.reshape(mu.shape[0], 1) sigma = sigma.reshape(sigma.shape[0], 1) alpha_inter = self.domain_size * (self.iteration_count) * ( self.iteration_count) * math.pi * math.pi / (6 * 0.1) if alpha_inter == 0: print('Error: alpha_inter is zero in Upper_Confidence_Bound') alpha = 2 * math.log(alpha_inter) # We have set delta = 0.1 alpha = math.sqrt(alpha) Value = mu + (alpha) * sigma return Value """ Returns the most optmal x along with mean value from the domain of x and making sure it is not a Duplicate (depending on closeness) used in batch setting: As mean is also returned """ def remove_duplicates(self, X, X_Sample, mu, Value): # print('*'*200) v_sorting_index = np.argsort(-Value, axis=0) index = 0 # go through all the values in X_Sample and check if anyvalue is close # to the optimal x value, if yes, don't consider this optimal x value while index < v_sorting_index.shape[0]: x_optimal = X[v_sorting_index[index]] # check if x_optimal is in X_Sample check_closeness = self.closeness(x_optimal, X_Sample) if check_closeness == False: # No close element to x_optimal in X_Sample break # we will look for next optimal value to try else: index = index + 1 # If entire domain is same to the already selected samples, we will just pick the best by value then if (index == v_sorting_index.shape[0]): index = 0 return X[v_sorting_index[index]], mu[v_sorting_index[index]] """ Returns the most optmal x only from the domain of x and making sure it is not a Duplicate (depending on closeness) Intended for usage in serial and clustering setting: As no mean is also returned, and no hullicination is considered """ def remove_duplicates_serial(self, X, X_Sample, Value): # print('*'*200) v_sorting_index = np.argsort(-Value, axis=0) index = 0 # go through all the values in X_Sample and check if anyvalue is close # to the optimal x value, if yes, don't consider this optimal x value while index < v_sorting_index.shape[0]: x_optimal = X[v_sorting_index[index]] # check if x_optimal is in X_Sample check_closeness = self.closeness(x_optimal, X_Sample) if check_closeness == False: # No close element to x_optimal in X_Sample break # we will look for next optimal value to try else: index = index + 1 # If entire domain is same to the already selected samples, we will just pick the best by value then if (index == v_sorting_index.shape[0]): index = 0 return X[v_sorting_index[index]] def closeness(self, x_optimal, X_Sample): # check if x_optimal is close to X_Sample tolerance = 1e-3 for i in range(X_Sample.shape[0]): diff = np.sum(np.absolute(X_Sample[i] - x_optimal)) if (diff < tolerance): # print('Removed Duplicate') return True return False """ This is the main function which returns the next batch to try along with the mean values for this batch """ def get_next_batch(self, X, Y, X_tries, batch_size=3): # print('In get_next_batch') X_temp = X Y_temp = Y batch = [] for i in range(batch_size): self.iteration_count = self.iteration_count + 1 self.surrogate.fit(X_temp, Y_temp) X_next, u_value = self.Upper_Confidence_Bound_Remove_Duplicates( X_tries, X_temp, batch_size) u_value = u_value.reshape(-1, 1) Y_temp = np.vstack((Y_temp, u_value)) X_temp = np.vstack((X_temp, X_next)) batch.append([X_next]) batch = np.array(batch) batch = batch.reshape(-1, X.shape[1]) return batch """ Using clustering to select next batch """ def get_next_batch_clustering(self, X, Y, X_tries, batch_size=3): # print('In get_next_batch') X_temp = X Y_temp = Y self.surrogate.fit(X_temp, Y_temp) self.iteration_count = self.iteration_count + 1 Acquition = self.Get_Upper_Confidence_Bound(X_tries) if batch_size > 1: kmeans = KMeans(n_clusters=4, random_state=0).fit(Acquition) cluster_pred = kmeans.labels_.reshape(kmeans.labels_.shape[0]) # select the best cluster in the acquition function, and now cluster in the domain space itself acq_cluster_max_index = np.argmax(kmeans.cluster_centers_) # select the points in acq_cluster_max_index x_best_acq_domain = [] x_best_acq_value = [] for i in range(X_tries.shape[0]): if cluster_pred[i] == acq_cluster_max_index: x_best_acq_domain.append(X_tries[i]) x_best_acq_value.append(Acquition[i]) x_best_acq_domain = np.array(x_best_acq_domain) x_best_acq_value = np.array(x_best_acq_value) # Do the domain space based clustering on the best points kmeans = KMeans(n_clusters=batch_size, random_state=0).fit(x_best_acq_domain) cluster_pred_domain = kmeans.labels_.reshape( kmeans.labels_.shape[0]) # partition the space into the cluster in X and select the best X from each space partitioned_space = dict() partitioned_acq = dict() for i in range(batch_size): partitioned_space[i] = [] partitioned_acq[i] = [] for i in range(x_best_acq_domain.shape[0]): partitioned_space[cluster_pred_domain[i]].append( x_best_acq_domain[i]) partitioned_acq[cluster_pred_domain[i]].append( x_best_acq_value[i]) batch = [] for i in partitioned_space: x_local = partitioned_space[i] acq_local = partitioned_acq[i] acq_local = np.array(acq_local) x_index = np.argmax(acq_local) x_final_selected = x_local[x_index] batch.append([x_final_selected]) else: # batch_size ==1 batch = [] x_index = np.argmax(Acquition) x_final_selected = self.remove_duplicates_serial( X_tries, X_temp, Acquition) # x_final_selected = X_tries[x_index] batch.append([x_final_selected]) batch = np.array(batch) batch = batch.reshape(-1, X.shape[1]) return batch """ Get the predictions from the surrogate function along with the variance """ def predict(self, X): pred_y, sigma = self.surrogate.predict(X, return_std=True) return pred_y, sigma """ fit the optimizer on the X and Y values """ def fit(self, X, Y): self.surrogate.fit(X, Y)
data = pd.read_csv(data_file) list(data.columns) Y = data["target"] data = data.drop(columns=['ID', 'target']) except Exception as e: print(e) # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(data, Y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(data, return_std=True) from sklearn.metrics import mean_squared_error from math import sqrt y_pred = np.around(y_pred, decimals=2) ypredDF = pd.DataFrame(y_pred.round(), columns=["YHat"]) ypredDF["Y"] = Y ypredDF.shape trainrms = sqrt(mean_squared_error(ypredDF['Y'], ypredDF['YHat'])) print("RBF : trainrms {}".format(trainrms)) print(ypredDF.loc[ypredDF['Y'] == ypredDF['YHat']])
class R_MODEL(object): def __init__(self, env, context_dim, pd_dim=12): self.env = env self.context_dim = context_dim self.pd_dim = pd_dim self.observation_dim = 12 # single-hole assembly 中为12,分别为Px, Py, Pz, Ox, Oy, Oz, Fx, Fy, Fz, Tx, Ty, Tz self.action_dim = 6 # DDPG输出动作的维度,此任务中为6,分别为Px, Py, Pz, Ox, Oy, Oz self.action_bound = 1 # DDPG输出动作的上下界 self.MAX_EP = 5 # 对于一个context,训练DDPG的episode数 self.MAX_EP_STEPS = 400 # maximized DDPG step number self.var = 0.6 # control exploration # New gpr model self.kernel = DotProduct() + WhiteKernel() # the GP model between dA and dX self.state_transfer_model = GaussianProcessRegressor( kernel=self.kernel, random_state=0) # the GP model between POS and F self.contact_model = GaussianProcessRegressor(kernel=self.kernel, random_state=0) # New DDPG self.ddpg = DDPG(self.observation_dim, self.action_dim, self.action_bound) # run DDPG training to collect data and train state transfer model def train_state_model(self, gpreps, n): # run DDPG training and collect data for model learning memory = self.__run_ddpg(gpreps, n) # process the memory for forward training I, J, X, Y, U = [], [], [], [], [] dA, dX, POS, F = [], [], [], [] # dA: action in real space of each step # dX: position change during each step # POS: position of each step # F: contact force of each step uk = np.array([0, 0, 0, 0, 0, 0]) uk_1 = np.array([0, 0, 0, 0, 0, 0]) for t in range(len(memory)): i, j, kp, kd, x, y, u, r, d = memory[ t] # i: episode number, j: step number I.append(i) J.append(j) X.append(x) Y.append(y) U.append(u) pos = X[t][:6] force = X[t][-6:] POS.append(pos) F.append(force) if t >= 2: if I[t] == I[t - 1] and I[t - 1] == I[ t - 2] and J[t] == J[t - 1] + 1 and J[t - 1] == J[ t - 2] + 1: # if the consecutive 3 data come from 3 consecutive steps in a single episode: ds = (Y[t] - X[t])[:6] dX.append(ds) # reproduce the PD control rk = np.array([0, 0, 15, 0, 0, 0]) yk = np.array(X[t][-6:]) ek = rk - yk yk = np.array(X[t - 1][-6:]) ek_1 = rk - yk yk = np.array(X[t - 2][-6:]) ek_2 = rk - yk # discrete PD algorithm uk = uk_1 + kp * (ek - ek_1) + kd * (ek - 2 * ek_1 + ek_2) uk_1 = uk da = uk for i in range(6): da[i] = round(da[i], 4) da = da + da * U[t] dA.append(da) else: # if the episode ends # renew variables uk = np.array([0, 0, 0, 0, 0, 0]) uk_1 = np.array([0, 0, 0, 0, 0, 0]) dA, dX, POS, F = np.array(dA), np.array(dX), np.array(POS), np.array(F) # reward model training self.state_transfer_model = GaussianProcessRegressor( kernel=self.kernel, random_state=0).fit(dA, dX) self.contact_model = GaussianProcessRegressor(kernel=self.kernel, random_state=0).fit( POS, F) def __run_ddpg(self, gpreps, n): # n training cycles for j in range(n): self.env.reset() # Choose pd parameters s = [1, 2] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! w = gpreps.choose_action( s) # Kp = action[:, :6], Kd = action[:, 6:] kp = w[:, :6][0] kd = w[:, 6:][0] self.env.pd_control(kd, kp) # Start DDPG training for i in range(self.MAX_EP): self.env.restart() observation = self.env.init_state ep_reward = 0 for j in range(self.MAX_EP_STEPS): # Add exploration noise action = self.ddpg.select_action(observation) # print(j, 'th step: ', action) action = np.clip(np.random.normal(action, self.var), -self.action_bound, self.action_bound) # Observe and store observation_, uncode_observation, reward, done, safe = self.env.step( action) self.ddpg.store_transition(i, j, kp, kd, observation, observation_, action, reward, done) # Sample and learn if self.ddpg.pointer > 200: self.ddpg.train() self.var *= .9995 # decay the action randomness # update data observation = observation_ ep_reward += reward # 判别结果种类 if not safe: print('Episode', i + 1, 'Assembly Failed', 'step', j, 'reward', ep_reward) break if done: print('Episode', i + 1, 'Assembly Finished', 'step', j, 'reward', ep_reward) break if j == self.MAX_EP_STEPS - 1: print('Episode:', i + 1, ' Assembly Unfinished', 'reward', ep_reward) # if ep_reward > -300:RENDER = True break return self.ddpg.replay_buffer # get an artificial trajectory and compute the reward def trajectory(self, context, w): # set pd parameters kp = w[:, :6][0] kd = w[:, 6:][0] self.env.pd_control(kd, kp) # Start artificial trajectory observation = np.array([ 0., -0.327, -53.77, 0., 0., 0., -0.001, 0., -0.604, 0., 0.001, 0. ]) # init observation ep_reward = 0 for j in range(self.MAX_EP_STEPS): action = self.ddpg.select_action(observation) action = np.clip(np.random.normal(action, self.var), -self.action_bound, self.action_bound) action = cal.actions(observation, action, True) ds = self.state_transfer_model.predict(np.array([action]), return_std=0, return_cov=0)[0] new_pos = ds + observation[:6] new_force = self.contact_model.predict(np.array([new_pos]), return_std=0, return_cov=0)[0] observation_ = np.hstack((new_pos, new_force)) # judge the safety and done, then calculate the reward reward = -0.01 ep_reward += reward if observation_[6] >= 50 or observation_[7] >= 50 or observation_[ 8] >= 200 or observation_[9] >= 3 or observation_[ 10] >= 3 or observation_[11] >= 3: reward = (-1 + (observation_[2] + 52.7) / 40) ep_reward += reward break if observation_[2] > -12: reward = 1 - j / self.MAX_EP_STEPS ep_reward += reward break return ep_reward
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel from sklearn.metrics import mean_squared_error kernel = DotProduct() + WhiteKernel() kernel_2 = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-1, 1e-1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=2, n_restarts_optimizer=5, random_state=0, normalize_y=True) gp.fit(x_train[:, 0].reshape((-1, 1)), y_train) y_pred, sigma = gp.predict(x_valid[:, 0].reshape((-1, 1)), return_std=True) print("Mean squared Error: ", mean_squared_error(y_pred, y_valid)) #print(gp.score(x_valid,y_valid)) #Plotting the graph with confidence interval plt.figure() plt.title("XGBoost Regression") plt.plot(np.arange(len(y_valid)), y_valid, 'r:', label='original') plt.plot(np.arange(len(y_valid)), y_pred, label='Prediction') x_v = np.arange(len(y_valid)) plt.fill(np.concatenate([x_v, x_v[::-1]]), np.concatenate( [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None',
gp_matern.fit(X, y) gp_lls.fit(X, y) print "Learned kernel Matern: %s" % gp_matern.kernel_ print "Log-marginal-likelihood Matern: %s" \ % gp_matern.log_marginal_likelihood(gp_matern.kernel_.theta) print "Learned kernel LLS: %s" % gp_lls.kernel_ print "Log-marginal-likelihood LLS: %s" \ % gp_lls.log_marginal_likelihood(gp_lls.kernel_.theta) # Compute GP mean and standard deviation on test data X_ = np.linspace(-1, 1, 500) y_mean_lls, y_std_lls = gp_lls.predict(X_[:, np.newaxis], return_std=True) y_mean_matern, y_std_matern = \ gp_matern.predict(X_[:, np.newaxis], return_std=True) plt.figure(figsize=(7, 7)) plt.subplot(2, 1, 1) plt.plot(X_, f(X_), c='k', label="true function") plt.scatter(X[:, 0], y, color='k', label="samples") plt.plot(X_, y_mean_lls, c='r', label="GP LLS") plt.fill_between(X_, y_mean_lls - y_std_lls, y_mean_lls + y_std_lls, alpha=0.5, color='r') plt.plot(X_, y_mean_matern, c='b', label="GP Matern") plt.fill_between(X_, y_mean_matern - y_std_matern, y_mean_matern + y_std_matern, alpha=0.5, color='b') plt.legend(loc="best") plt.title("Comparison of learned models")
score=reg.score(X_test, y_test) print(score) score=reg.score(X_test,y_test) pred=reg.predict(X_test) print(mean_squared_error(y_test,pred)) visualize_scatterplot(pred,y_test,score,method='linear') #MLP regr = MLPRegressor(random_state=1,max_iter=10000).fit(X_train, y_train) pred=regr.predict(X_test) score=regr.score(X_test,y_test) print(mean_squared_error(y_test,pred)) print(score) visualize_scatterplot(pred,y_test,score,method="MLP") #Gaussian Process kernel = DotProduct() + WhiteKernel() gpr = GaussianProcessRegressor(kernel=kernel, random_state=0).fit(X_train, y_train) pred=gpr.predict(X_test) score=gpr.score(X_test, y_test) print(mean_squared_error(y_test,pred)) print(score) visualize_scatterplot(pred,y_test,score,method="Gaussian")
def test_GP_example(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C np.random.seed(1) def f(x): """The function to predict.""" return x * np.sin(x) # ---------------------------------------------------------------------- # First the noiseless case X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T # Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE fig = plt.figure() plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') # ---------------------------------------------------------------------- # now the noisy case X = np.linspace(0.1, 9.9, 20) X = np.atleast_2d(X).T # Observations and noise y = f(X).ravel() dy = 0.5 + 1.0 * np.random.random(y.shape) noise = np.random.normal(0, dy) y += noise # Instanciate a Gaussian Process model gp = GaussianProcessRegressor(kernel=kernel, alpha=(dy / y) ** 2, n_restarts_optimizer=10) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.xlabel('$x$') plt.ylabel('$f(x)$') plt.ylim(-10, 20) plt.legend(loc='upper left') plt.show()
autoscaled_x = (x - x.mean()) / x.std() autoscaled_x_for_prediction = (x_for_prediction - x.mean()) / x.std() autoscaled_y = (y - y.mean()) / y.std() mean_of_y = y.mean() std_of_y = y.std() # Gaussian process regression estimated_y_for_prediction = np.zeros( [x_for_prediction.shape[0], number_of_y_variables]) std_of_estimated_y_for_prediction = np.zeros( [x_for_prediction.shape[0], number_of_y_variables]) plt.rcParams['font.size'] = 18 for y_number in range(number_of_y_variables): model = GaussianProcessRegressor(ConstantKernel() * RBF() + WhiteKernel()) model.fit(autoscaled_x, autoscaled_y.iloc[:, y_number]) estimated_y_for_prediction_tmp, std_of_estimated_y_for_prediction_tmp = model.predict( autoscaled_x_for_prediction, return_std=True) estimated_y_for_prediction[:, y_number] = estimated_y_for_prediction_tmp std_of_estimated_y_for_prediction[:, y_number] = std_of_estimated_y_for_prediction_tmp estimated_y = model.predict(autoscaled_x) estimated_y = estimated_y * std_of_y.iloc[y_number] + mean_of_y.iloc[ y_number] plt.figure(figsize=figure.figaspect(1)) plt.scatter(y.iloc[:, y_number], estimated_y) y_max = max(y.iloc[:, y_number].max(), estimated_y.max()) y_min = min(y.iloc[:, y_number].min(), estimated_y.min()) plt.plot([y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min)], [y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min)], 'k-') plt.ylim(y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min))
normalize_y=False, optimizer='fmin_l_bfgs_b', random_state=None) ''' OKish Parameter Values gp = GaussianProcessRegressor(alpha=1e-7, copy_X_train=True, kernel=1**2 + Matern(length_scale=0.2, nu=0.5) + WhiteKernel(noise_level=0.1), n_restarts_optimizer=10, normalize_y=False, optimizer='fmin_l_bfgs_b', random_state=None) ''' # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(x_train, y_train) #x = np.linspace(min(X),max(X),len(X))[:,np.newaxis] y_pred, sigma = gp.predict(x_test, return_std=True) ## Percentage within the specified factor Fac = 5 IDX = y_pred / y_test >= 1 K = y_pred[IDX] Q = y_test[IDX] L = y_pred[~IDX] M = y_test[~IDX] Upper_indices = [i for i, x in enumerate(K <= Fac * Q) if x == True] Lower_indices = [i for i, x in enumerate(L >= M / Fac) if x == True] Percent_within_Fac = (len(Upper_indices) + len(Lower_indices)) / len(y_pred) * 100 print("Percentage captured within a factor of {} = {:.2f}".format( Fac, Percent_within_Fac))
plt.show() ################# # SKLearn equivalent method from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import ConstantKernel, RBF rbf = ConstantKernel(1.0) * RBF(length_scale=1.0) gpr = GaussianProcessRegressor(kernel=rbf, alpha=noise**2) # Reuse training data from previous 1D example gpr.fit(X_train, Y_train) # Compute posterior predictive mean and covariance mu_s, cov_s = gpr.predict(X, return_cov=True) # Obtain optimized kernel parameters l = gpr.kernel_.k2.get_params()['length_scale'] sigma_f = np.sqrt(gpr.kernel_.k1.get_params()['constant_value']) # Compare with previous results assert(np.isclose(l_opt, l)) assert(np.isclose(sigma_f_opt, sigma_f)) # Plot the results plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train) ############ # https://sheffieldml.github.io/GPy/
def test_predict_cov_vs_std(kernel): # Test that predicted std.-dev. is consistent with cov's diagonal. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_mean, y_cov = gpr.predict(X2, return_cov=True) y_mean, y_std = gpr.predict(X2, return_std=True) assert_almost_equal(np.sqrt(np.diag(y_cov)), y_std)
# print("Log-marginal-likelihood: %.3f" # % gp_test.log_marginal_likelihood(gp_test.kernel_.theta)) gp_test.fit(XT, y) print("GPML kernel: %s" % gp_test.kernel_) print("Log-marginal-likelihood: %.3f" % gp_test.log_marginal_likelihood(gp_test.kernel_.theta)) X_ = [] for i in range(15): X_.append([i+0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) XT_ = scaler.transform(X_) print 'X_ ', XT_ y_pred, y_std = gp_test.predict(XT_, return_std=True) # Plot the predict result X = np.array(X) y = np.array(y) X_ = np.array(X_) plt.scatter(X[:, 0], y, c='k') plt.plot(X_[:, 0], y_pred) plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std, alpha = 0.5, color='k') plt.xlim(X_[:, 0].min(), X_[:, 0].max()) plt.xlabel("x") plt.ylabel(r"u") plt.title(r"Test SquareExpWithBool Kernel") plt.tight_layout() plt.show()
# :class:`~sklearn.gaussian_process.kernels.RBF` will serve at fitting the # non-linearity between the data and the target. # # However, we will show that the hyperparameter space contains several local # minima. It will highlights the importance of initial hyperparameter values. # # We will create a model using a kernel with a high noise level and a large # length scale, which will explain all variations in the data by noise. from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, WhiteKernel kernel = 1.0 * RBF(length_scale=1e1, length_scale_bounds=( 1e-2, 1e3)) + WhiteKernel(noise_level=1, noise_level_bounds=(1e-5, 1e1)) gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0) gpr.fit(X_train, y_train) y_mean, y_std = gpr.predict(X, return_std=True) # %% plt.plot(X, y, label="Expected signal") plt.scatter(x=X_train[:, 0], y=y_train, color="black", alpha=0.4, label="Observations") plt.errorbar(X, y_mean, y_std) plt.legend() plt.xlabel("X") plt.ylabel("y") _ = plt.title( f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: " f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}",
#plt.ylim(0, 2000) #plt.legend(loc='upper right') #plt.show() x_T = np.atleast_2d(np.linspace(-0.07, 0.01, 1000)).T x_n = np.atleast_2d(np.linspace(-0.075, 0.01, 1000)).T kernel_T = C(10.0, (1e-5, 1e5)) * RBF(10.0, (1e-5, 1e5)) kernel_n = C(0.01, (1e-5, 1e-1)) * RBF(0.01, (1e-5, 1e5)) gp_L_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(L_mode_T_x))**2., n_restarts_optimizer=10) L_mode_T_X = np.atleast_2d(L_mode_T_x).T # Fit to data using Maximum Likelihood Estimation of the parameters gp_L_mode_T.fit(L_mode_T_X, L_mode_T_y) # Make the prediction on the meshed x-axis (ask for MSE as well) L_mode_T_yp, L_mode_T_sigma = gp_L_mode_T.predict(x_T, return_std=True) gp_H_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(H_mode_T_x))**2., n_restarts_optimizer=10) H_mode_T_X = np.atleast_2d(H_mode_T_x).T # Fit to data using Maximum Likelihood Estimation of the parameters gp_H_mode_T.fit(H_mode_T_X, H_mode_T_y) # Make the prediction on the meshed x-axis (ask for MSE as well) H_mode_T_yp, H_mode_T_sigma = gp_H_mode_T.predict(x_T, return_std=True) gp_I_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(I_mode_T_x))**2., n_restarts_optimizer=10) I_mode_T_X = np.atleast_2d(I_mode_T_x).T # Fit to data using Maximum Likelihood Estimation of the parameters gp_I_mode_T.fit(I_mode_T_X, I_mode_T_y) # Make the prediction on the meshed x-axis (ask for MSE as well)
gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) + WhiteKernel(1e-1) gpr = GaussianProcessRegressor(kernel=gp_kernel) stime = time.time() gpr.fit(X, y) print("Time for GPR fitting: %.3f" % (time.time() - stime)) # Predict using kernel ridge X_plot = np.linspace(0, 20, 10000)[:, None] stime = time.time() y_kr = kr.predict(X_plot) print("Time for KRR prediction: %.3f" % (time.time() - stime)) # Predict using gaussian process regressor stime = time.time() y_gpr = gpr.predict(X_plot, return_std=False) print("Time for GPR prediction: %.3f" % (time.time() - stime)) stime = time.time() y_gpr, y_std = gpr.predict(X_plot, return_std=True) print("Time for GPR prediction with standard-deviation: %.3f" % (time.time() - stime)) # Plot results plt.figure(figsize=(10, 5)) lw = 2 plt.scatter(X, y, c="k", label="data") plt.plot(X_plot, np.sin(X_plot), color="navy", lw=lw, label="True") plt.plot(X_plot, y_kr, color="turquoise",
def test_scikit_GP_SNRefsdal(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # add data dm = -29.38 # D = 7.5e6 pc # dm = -30.4 # D = 12.e6 pc image = "S1" bname = 'F160W' curves = snrefsdal.read_curves(snrefsdal.path_data, image) lc = curves.get(bname) # lc.mshift = dm t = lc.Time y = lc.Mag yerr = lc.MagErr # # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters X = np.atleast_2d(t).T gp.fit(X, y) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters # gp.fit(t, y) # Make the prediction on the meshed x-axis (ask for MSE as well) # y_star, err_y_star = gp.predict(t, return_std=True) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(t, return_std=True) # k = gptools.SquaredExponentialKernel(param_bounds=[(min(np.abs(y)), max(np.abs(y))), # (0, np.std(t))]) # # k = gptools.SquaredExponentialKernel(param_bounds=[(min(np.abs(y)), max(np.abs(y))), # # (0, np.std(t))]) # gp = gptools.GaussianProcess(k) # # gp = gptools.GaussianProcess(k, mu=gptools.LinearMeanFunction()) # gp.add_data(t, y, err_y=yerr) # # gp.optimize_hyperparameters() # y_star, err_y_star = gp.predict(t) fig = plt.figure() ax = fig.add_axes((0.1, 0.3, 0.8, 0.65)) ax.invert_yaxis() ax.plot(t, y, color='blue', label='L bol', lw=2.5) ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.') # # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP') ax.plot(t, y_pred, '-', color='gray') # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3) ax.fill(np.concatenate([t, t[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.show()
print("GPML kernel: %s" % gp_test.kernel_) print("Log-marginal-likelihood: %.3f" % gp_test.log_marginal_likelihood(gp_test.kernel_.theta)) print("GPML kernel: %s" % gp_test.kernel_) print("Log-marginal-likelihood: %.3f" % gp_test.log_marginal_likelihood_data(XT, y)) start_time = time() X_ = [] for i in range(3): X_.append([i+0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) XT_ = scaler.transform(X_) print 'X_ ', XT_ y_pred, y_std = gp_test.predict(XT_, return_std=True) print 'y_pred: ', y_pred print 'time used for prediction: ', time() - start_time # Plot the predict result X = np.array(X) y = np.array(y) X_ = np.array(X_) plt.scatter(X[:, 0], y, c='k') plt.plot(X_[:, 0], y_pred) plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std, alpha = 0.5, color='k') plt.xlim(X_[:, 0].min(), X_[:, 0].max()) plt.xlabel("x") plt.ylabel(r"u") plt.title(r"Test SquareExpWithBool Kernel")
gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \ + WhiteKernel(1e-1) gpr = GaussianProcessRegressor(kernel=gp_kernel) stime = time.time() gpr.fit(X, y) print("Time for GPR fitting: %.3f" % (time.time() - stime)) # Predict using kernel ridge X_plot = np.linspace(0, 20, 10000)[:, None] stime = time.time() y_kr = kr.predict(X_plot) print("Time for KRR prediction: %.3f" % (time.time() - stime)) # Predict using kernel ridge stime = time.time() y_gpr = gpr.predict(X_plot, return_std=False) print("Time for GPR prediction: %.3f" % (time.time() - stime)) stime = time.time() y_gpr, y_std = gpr.predict(X_plot, return_std=True) print("Time for GPR prediction with standard-deviation: %.3f" % (time.time() - stime)) # Plot results plt.figure(figsize = (10,5)) plt.scatter(X, y, c='k', label='data') plt.plot(X_plot, np.sin(X_plot), c='k', label='True') plt.plot(X_plot, y_kr, c='g', label='KRR (%s)' % kr.best_params_) plt.plot(X_plot, y_gpr, c='r', label='GPR (%s)' % gpr.kernel_) plt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='r', alpha=0.2)
# heteroscedastic prototypes = KMeans(n_clusters=8).fit(X).cluster_centers_ kernel = C(1.0, (1e-10, 1000)) * RBF(length_scale = [10., 100.], length_scale_bounds=[(1e-3, 1e3),(1e-4, 1e4)]) \ + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 50.0), gamma=1.0, gamma_bounds="fixed") #gp.fit(X[:, np.newaxis], y) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y.reshape(-1,1)) #removing reshape results in a different error # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(inputs_x_array, return_std=True) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(inputs_x_array[:,0],inputs_x_array[:,1],y_pred) ax.scatter(X[:,0],X[:,1],y,color='orange') ax.set_xlabel('X Label (radius)') ax.set_ylabel('Y Label (time)') ax.set_zlabel('Z Label (density)') plt.show() index_y1 = 3 #only valid until len_x2 print("Time is "+str(inputs_x_array[:,1][index_y1::len_x2][0])+"s") plt.figure() plt.scatter(inputs_x_array[:,0][index_y1::len_x2],y_pred[index_y1::len_x2]) #from x1_min to x1_max plt.xlabel('X Label (radius)')
def run(args, bkghist, trainHisto, optKernel): GPh = GPHisto(bkghist) # The distributions with no window removed. X_t = GPh.getXArr() y_t = GPh.getYArr() dy_t = GPh.getErrArr() if args.noWindow: X = X_t y = y_t dy = dy_t x = GPh.getXArr() gp = None kernel = optKernel gp = GaussianProcessRegressor(kernel=kernel ,optimizer=None ,alpha=dy**2 ) gp.fit(X,y) print gp.kernel_ length = float(re.search('length_scale=(\d+(\.\d+)?)', gp.kernel_.__repr__()).group(1)) y_pred, sigma = gp.predict(x, return_std=True) outhist = GPh.getHisto(y_pred, sigma, 'GP Fit') #bkg = GPh.getHisto(y, dy, 'Full Background') ### RooFit part myy = RooRealVar('myy','myy',105,160) #nSig = RooRealVar('nSig','nSig',-200,1000) sigMass = 125 #pdf = RooGP.RooGP("mypdf", "CustomPDF",myy ,nSig, sigMass, trainHisto, dataHisto) pdf = RooGPBkg.RooGPBkg("bkgPDF", "bkg only PDF",myy, trainHisto, bkghist) data = RooDataHist("dh", "dh", RooArgList(myy), bkghist) c1 = TCanvas('c1','c1') frame = myy.frame() data.plotOn(frame, RooFit.MarkerColor(kRed)) fitResult = pdf.fitTo(data, RooFit.Save()) # pdf.gpHisto.Draw() # data.Draw('same') # outhist.Draw('samehist') # outhist.SetFillColorAlpha(kWhite, 0) # bkghist.Draw('same') # bkghist.SetMarkerColor(kBlack) # outhist.Divide(bkghist) # outhist.Draw() pdf.plotOn(frame) #fitResult.plotOn(frame) frame.Draw() #nSig.Print() c1.Print(args.outDir+'/test_GP.pdf') pass #Run
dy = GPh.getErrArr() x = np.atleast_2d(np.linspace(start=0., stop=10, num=1000)).T # Predict at each data point #kernel = C(1.0, (sigmaMin, 1e5)) * RBF(2.0, (lMin, 1e3)) #squared exponential kernel kernel = C(1.0, 1e-3,1e5) * FallExp() * Gibbs() gp = GaussianProcessRegressor(kernel=kernel #,optimizer=None ,alpha=(dy**2) ,n_restarts_optimizer=15 ) gp.fit(X,y) print gp.kernel_ y_pred, sigma = gp.predict(x, return_std=True) outhist = TH1F('GP','GP', 1000,0,10) for index,cont in enumerate(y_pred): outhist.SetBinContent(index+1, cont) outhist.SetBinError(index+1, 1.96*sigma[index]) canv = TCanvas('plot') canv.cd() funcHist = func.GetHistogram() funcHist.Scale(hist.Integral()*hist.GetBinWidth(2)/(funcHist.Integral()*funcHist.GetBinWidth(2))) hist.SetLineColor(kBlue) hist.SetMarkerStyle(20)
class SurrogateACESOptimizer(ACESOptimizer): def __init__(self, context_boundaries, n_context_samples, kappa, active=True, **kwargs): super(SurrogateACESOptimizer, self).__init__( context_boundaries=context_boundaries, active=active, **kwargs) self.n_context_samples = n_context_samples self.kappa = kappa def init(self, n_params, n_context_dims): super(SurrogateACESOptimizer, self).init(n_params, n_context_dims) def _determine_contextparams(self, optimizer): """Select context and params jointly using ACES.""" # Choose the first samples uniform randomly if len(optimizer.X_) < optimizer.initial_random_samples: cx = np.random.uniform(self.cx_boundaries[:, 0], self.cx_boundaries[:, 1]) return cx[:self.context_dims], cx[self.context_dims:] # Prepare entropy search objective self._init_es_ensemble() # Generate data for function mapping # query_context x query_parameters x eval_context -> entropy reduction n_query_points = 500 n_data_dims = 2 * self.context_dims + self.dimension X = np.empty((n_query_points, n_data_dims)) y = np.empty(n_query_points) for i in range(n_query_points): # Select query point and evaluation context randomly query = np.random.uniform(self.cx_boundaries[:, 0], self.cx_boundaries[:, 1]) ind = np.random.choice(self.n_context_samples) # Store query point in X and value of entropy-search in y X[i, :self.context_dims + self.dimension] = query X[i, self.context_dims + self.dimension:] = \ self.context_samples[ind] - query[:self.context_dims] y[i] = self.entropy_search_ensemble[ind](query)[0] # Fit GP model to this data kernel = C(1.0, (1e-10, 100.0)) \ * RBF(length_scale=(1.0,)*n_data_dims, length_scale_bounds=[(0.01, 10.0),]*n_data_dims) \ + WhiteKernel(1.0, (1e-10, 100.0)) self.es_surrogate = GaussianProcessRegressor(kernel=kernel) self.es_surrogate.fit(X, y) # Select query based on mean entropy reduction in surrogate model # predictions contexts = np.random.uniform(self.context_boundaries[:, 0], self.context_boundaries[:, 1], (250, self.context_dims)) def objective_function(cx): X_query = np.empty((250, n_data_dims)) X_query[:, :self.context_dims + self.dimension] = cx X_query[:, self.context_dims + self.dimension:] = \ contexts - cx[:self.context_dims] es_pred, es_cov = \ self.es_surrogate.predict(X_query, return_cov=True) return es_pred.mean() + self.kappa * np.sqrt(es_cov.mean()) cx = global_optimization( objective_function, boundaries=self.cx_boundaries, optimizer=self.optimizer, maxf=optimizer.maxf) return cx[:self.context_dims], cx[self.context_dims:] def _init_es_ensemble(self): # Determine samples at which CES will be evaluated by # 1. uniform random sampling self.context_samples = \ np.random.uniform(self.context_boundaries[:, 0], self.context_boundaries[:, 1], (self.n_context_samples*25, self.context_dims)) # 2. subsampling via k-means clustering kmeans = KMeans(n_clusters=self.n_context_samples, n_jobs=1) self.context_samples = \ kmeans.fit(self.context_samples).cluster_centers_ # 3. Create entropy search ensemble self.entropy_search_ensemble = [] for i in range(self.n_context_samples): cx_boundaries_i = np.copy(self.cx_boundaries) cx_boundaries_i[:self.context_dims] = \ self.context_samples[i][:, np.newaxis] entropy_search_fixed_context = deepcopy(self.acquisition_function) entropy_search_fixed_context.set_boundaries(cx_boundaries_i) self.entropy_search_ensemble.append(entropy_search_fixed_context) def _create_acquisition_function(self, name, model, **kwargs): if not name in ["EntropySearch", "MinimalRegretSearch"]: raise ValueError("%s acquisition function not supported." % name) return create_acquisition_function(name, model, **kwargs)
# Generating sample randomly data_x = [[4.0 * (-0.5 + random.random()), 4.0 * (-0.5 + random.random())] for i in range(200)] data_y = [[x[0] * math.sin(3.0 * x[1])] for x in data_x] # Training GPR (Gaussian Process for Regression) so that GPR can map from x to y. # You can play with different kernels #kernel= C(1.0, (1e-3, 1e3)) * RBF(1.0, (0.1, 10.0)) #kernel= C(1.0, (1.0, 1.0)) * RBF(1.0, (0.1, 10.0)) #kernel= C(1.0, (1e-3, 1e3)) * RBF(3.0, (3.0, 3.0)) #kernel= RBF(1.0, (0.1, 10.0)) kernel = RBF(3.0, (3.0, 3.0)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gp.fit(data_x, data_y) f = lambda x: gp.predict([x])[0, 0] # Now we can compute y=f(x) for any x print('f([0.0,0.0])=', f([0.0, 0.0])) print('f([1.0,1.0])=', f([1.0, 1.0])) print('f([1.5,2.0])=', f([1.5, 2.0])) #Plot gp.predict(x) plot, plot3d = PlotF(f, xmin=[-2, -2], xmax=[2, 2], dx=0.1, show=False) #Plot data points plot3d.scatter(np.array(data_x).T[0], np.array(data_x).T[1], data_y, marker='*', color='red') plot.show()
def main(_): num_parallel_thetas = FLAGS.num_parallel_thetas num_theta_batches = FLAGS.num_theta_batches num_steps_autoencoder = 0 if FLAGS.uniform_weights else TRAINING_STEPS input_dim = len(FEATURES) training_df = pd.read_csv(FLAGS.training_data_path, header=0, sep=',') testing_df = pd.read_csv(FLAGS.testing_data_path, header=0, sep=',') validation_df = pd.read_csv(FLAGS.validation_data_path, header=0, sep=',') add_price_quantiles(training_df) add_price_quantiles(testing_df) add_price_quantiles(validation_df) train_labels = np.log(training_df['price']) validation_labels = np.log(validation_df['price']) test_labels = np.log(testing_df['price']) train_features = training_df[FEATURES] validation_features = validation_df[FEATURES] test_features = testing_df[FEATURES] validation_price = validation_df['price'] test_price = testing_df['price'] tf.reset_default_graph() x = tf.placeholder(tf.float32, shape=(None, input_dim), name='x') y = tf.placeholder(tf.float32, shape=(None, 1), name='y') xy = tf.concat([x, y], axis=1) autoencoder_layer1 = tf.layers.dense( inputs=xy, units=100, activation=tf.sigmoid) autoencoder_embedding_layer = tf.layers.dense( inputs=autoencoder_layer1, units=FLAGS.embedding_dim, activation=tf.sigmoid) autoencoder_layer3 = tf.layers.dense( inputs=autoencoder_embedding_layer, units=100, activation=tf.sigmoid) autoencoder_out_x = tf.layers.dense( inputs=autoencoder_layer3, units=input_dim) autoencoder_out_y = tf.layers.dense(inputs=autoencoder_layer3, units=1) autoencoder_y_loss = tf.losses.mean_squared_error( labels=y, predictions=autoencoder_out_y) autoencoder_x_loss = tf.losses.mean_squared_error( labels=x, predictions=autoencoder_out_x) autoencoder_loss = autoencoder_x_loss + autoencoder_y_loss autoencoder_optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize( autoencoder_loss) parallel_outputs = [] parallel_losses = [] parallel_optimizers = [] parallel_thetas = tf.placeholder( tf.float32, shape=(num_parallel_thetas, FLAGS.embedding_dim), name='parallel_thetas') unstack_parallel_thetas = tf.unstack(parallel_thetas, axis=0) embedding = tf.placeholder( tf.float32, shape=(None, FLAGS.embedding_dim), name='embedding') with tf.variable_scope('regressors'): for theta_index in range(num_parallel_thetas): output = regressor(x) theta = tf.reshape( unstack_parallel_thetas[theta_index], shape=[FLAGS.embedding_dim, 1]) optimizer, loss = optimization(output, y, embedding, theta, LEARNING_RATE) parallel_outputs.append(output) parallel_losses.append(loss) parallel_optimizers.append(optimizer) init = tf.global_variables_initializer() regressors_init = tf.variables_initializer( tf.global_variables(scope='regressors')) kernel = RBF( length_scale=FLAGS.sampling_radius, length_scale_bounds=(FLAGS.sampling_radius * 1e-3, FLAGS.sampling_radius * 1e3)) * ConstantKernel(1.0, (1e-3, 1e3)) thetas = np.zeros(shape=(0, FLAGS.embedding_dim)) validation_metrics = [] test_metrics = [] with tf.Session() as sess: sess.run(init) # Training autoencoder for _ in range(num_steps_autoencoder): batch_index = random.sample(range(len(train_labels)), BATCH_SIZE) batch_x = train_features.iloc[batch_index, :].values batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1) _, _ = sess.run([autoencoder_optimizer, autoencoder_loss], feed_dict={ x: batch_x, y: batch_y, }) # GetCandidatesAlpha (Algorithm 2 in paper) for theta_batch_index in range(num_theta_batches): sess.run(regressors_init) if FLAGS.uniform_weights: theta_batch = np.zeros(shape=(num_parallel_thetas, FLAGS.embedding_dim)) elif theta_batch_index == 0: # We first start uniformly. theta_batch = sample_from_ball( size=(num_parallel_thetas, FLAGS.embedding_dim), sampling_radius=FLAGS.sampling_radius) else: # Use UCB to generate candidates. theta_batch = np.zeros(shape=(0, FLAGS.embedding_dim)) sample_thetas = np.copy(thetas) sample_validation_metrics = validation_metrics[:] candidates = sample_from_ball( size=(10000, FLAGS.embedding_dim), sampling_radius=FLAGS.sampling_radius) for theta_index in range(num_parallel_thetas): gp = GaussianProcessRegressor( kernel=kernel, alpha=1e-4).fit(sample_thetas, sample_validation_metrics) metric_mles, metric_stds = gp.predict(candidates, return_std=True) metric_lcbs = metric_mles - FLAGS.p_q_value * metric_stds best_index = np.argmin(metric_lcbs) best_theta = [candidates[best_index]] best_theta_metric_ucb = metric_mles[best_index] \ + FLAGS.p_q_value * metric_stds[best_index] theta_batch = np.concatenate([theta_batch, best_theta]) # Add candidate to the GP, assuming the metric observation is the LCB. sample_thetas = np.concatenate([sample_thetas, best_theta]) sample_validation_metrics.append(best_theta_metric_ucb) # Training regressors for _ in range(TRAINING_STEPS): batch_index = random.sample(range(len(train_labels)), BATCH_SIZE) batch_x = train_features.iloc[batch_index, :].values batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1) batch_embedding = sess.run( autoencoder_embedding_layer, feed_dict={ x: batch_x, y: batch_y, }) _, _ = sess.run( [parallel_optimizers, parallel_losses], feed_dict={ x: batch_x, y: batch_y, embedding: batch_embedding, parallel_thetas: theta_batch, }) parallel_validation_outputs = sess.run( parallel_outputs, feed_dict={ x: validation_features.values, y: validation_labels.values.reshape(len(validation_labels), 1), }) parallel_validation_metrics = [ metric(validation_labels, validation_output, validation_price) for validation_output in parallel_validation_outputs ] thetas = np.concatenate([thetas, theta_batch]) validation_metrics.extend(parallel_validation_metrics) parallel_test_outputs = sess.run( parallel_outputs, feed_dict={ x: test_features.values, y: test_labels.values.reshape(len(test_labels), 1), }) parallel_test_metrics = [ metric(test_labels, test_output, test_price) for test_output in parallel_test_outputs ] test_metrics.extend(parallel_test_metrics) best_observed_index = np.argmin(validation_metrics) print('[metric] validation={}'.format( validation_metrics[best_observed_index])) print('[metric] test={}'.format(test_metrics[best_observed_index])) return 0
best_model_dict = gp_hyper.cv_results_['params'][gp_hyper.best_index_] best_kernel = best_model_dict["kernel"] # Define the kernel as product with the constant kernel kernel = C(1.0, (1e-3, 1e3)) * best_kernel best_model = GaussianProcessRegressor(kernel=best_kernel, alpha=1e-4 * sigma, n_restarts_optimizer=args.restarts) # Fit to data using the Maximum Likelihood Estimation of the parameters print("\nFitting Gaussian Process: {}".format(best_kernel)) best_model.fit(tau, sigma) # Make the predictin on the meshed x-axis (ask for MSE as well) print("\nMaking predictions") sigma_pred2, std_dev2 = best_model.predict(tau_exact, return_std=True) # Computing chi2 estimator num = pow(sigma_exact - sigma_pred2, 2) # Correct negative variances corrected_std = np.maximum(pow(std_dev2, 2), 1e-10) chi2 = np.sum(num / corrected_std) / len(sigma_pred2) # Computing ratio GP vs exact ratio_GP2 = abs(sigma_pred2 / sigma_exact) print("sigma pred: {}".format(sigma_pred2.shape)) print("sigma exact: {}".format(sigma_exact.shape)) print("ratio: {}".format(ratio_GP2.shape)) print("chi2: {}".format(chi2)) # Plot the function, the prediction and the confidence intervals for the best model
class GPRPredictor: def __init__(self, res, **kwargs): """ Class to evaluate GPR fits constructed by GPRFitter class in pySurrogate/fit_gpr.py """ self.data_mean = res['data_mean'] self.data_std = res['data_std'] # load GPR fit self.GPR_obj = GaussianProcessRegressor() GPR_params = res['GPR_params'] self._set_gpr_params(self.GPR_obj, GPR_params) # load LinearRegression fit lin_reg_params = res['lin_reg_params'] if lin_reg_params is not None: self.linearModel = linear_model.LinearRegression() self._set_lin_reg_params(self.linearModel, lin_reg_params) else: self.linearModel = None def _set_kernel_params(self, kernel_params): """ Recursively sets paramters for a kernel and returns the final kernel. """ # copy so as to not overwrite kernel_params = kernel_params.copy() # initialize kernel name = kernel_params['name'] kernel = getattr(gaussian_process.kernels, name) del kernel_params['name'] params = {} for key in kernel_params.keys(): if type(kernel_params[key]) == dict: # recursively set kernels params[key] = self._set_kernel_params(kernel_params[key]) else: params[key] = kernel_params[key] if name == 'Sum' or name == 'Product': kernel = kernel(params['k1'], params['k2']) else: kernel = kernel() kernel.set_params(**params) return kernel def _set_gpr_params(self, gp_obj, gp_params): """ Sets the fitted hyperparameter for a GPR object. This can be used to load a previously constructed fit. NOTE: If you get errors like: "AttributeError: 'GaussianProcessRegressor' object has no attribute -----", try adding that attribute to GPR_SAVE_ATTRS_DICT """ for attr in GPR_SAVE_ATTRS_DICT: if attr == 'kernel_': param = self._set_kernel_params(gp_params[attr]) elif attr == '_y_train_std': # In scikit-learn versions before 0.23, there was no # _y_train_std, which is the same as saying _y_train_std=1. If # this fit was constructed using an earlier version of # scikit-learn, it would not have an attribute called # _y_train_std, so we just set it to 1. This way, the fit can # be evaluated with any version of scikit-learn. if '_y_train_std' not in gp_params.keys(): param = 1 else: param = gp_params[attr] else: param = gp_params[attr] setattr(gp_obj, attr, param) def _set_lin_reg_params(self, lr_obj, lr_params): """ Sets the fitted parameters for a LinearRegression object. This can be used to load a previously constructed fit. NOTE: If you get errors like: "AttributeError: 'LinearRegression' object has no attribute -----", try adding that attribute to LINREG_SAVE_ATTRS_DICT """ for attr in LINREG_SAVE_ATTRS_DICT: param = lr_params[attr] setattr(lr_obj, attr, param) def _reconstruct_normalized_data(self, data_normed, data_normed_err): """ The inverse operation of 'GPRFitter._normalize()' Returns the reconstructed data and error estimate. """ return data_normed * self.data_std + self.data_mean, \ data_normed_err * self.data_std def GPR_predict(self, x, estimate_err=False): """ Evaluates a GPR fit. First evalutates the GPR fit to get the prediction for the normalized data. Then reconstructs the un-normalized data. Finally adds the linear model fit if it was done in GPRFitter. """ # Get fit prediction and error estimate for normalized data fit_res = self.GPR_obj.predict(x, return_cov=estimate_err) if estimate_err: y_normalized_pred, cov_normalized_pred = fit_res err_normalized_pred = np.sqrt(cov_normalized_pred.flatten()) else: y_normalized_pred = fit_res err_normalized_pred = fit_res * 0 # Reconstruct to get un-normalized prediction y_pred, err_pred = self._reconstruct_normalized_data( \ y_normalized_pred, \ err_normalized_pred) if self.linearModel is not None: # Add the linear prediction that was subtracted before # doing the fit y_pred = y_pred + self.linearModel.predict(x) val_dict = { 'y': y_pred, } if estimate_err: val_dict['y_gprErr'] = err_pred return val_dict
x_pr_grid, B_postsamples, T_fwdsamples, seed=200) jnp.save('plot_files/ccopula_lidar_logpdf_pr{}'.format(x_pr_val), logpdf_pr) jnp.save('plot_files/ccopula_lidar_logcdf_pr{}'.format(x_pr_val), logcdf_pr) #Convergence plot seed = 200 T_fwdsamples = 10000 logcdf_pr_conv, logpdf_pr_conv, pdiff, cdiff = check_convergence_pr_cregression( copula_cregression_obj, x, y_pr_grid, x_pr_grid, 1, T_fwdsamples, seed) jnp.save('plot_files/ccopula_lidar_pr_pdiff_pr{}'.format(x_pr_val), pdiff) #Gaussian Process print('Method: GP') from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel from sklearn.gaussian_process import GaussianProcessRegressor kernel = ConstantKernel() * RBF() + WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True) gp.fit(x, y) mean_gp, std_gp = gp.predict(x_plot.reshape(-1, 1), return_std=True) jnp.save('plot_files/gp_lidar_mean', mean_gp) jnp.save('plot_files/gp_lidar_std', std_gp)
# Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # Instanciate a Gaussian Process model kernel = C(1.0, (0.001, 1000)) * RBF(10, (0.01, 100)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE fig = plt.figure() plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label=u'Observations') plt.plot(x, y_pred, 'b-', label=u'Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate( [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.xlabel('$x$')
kernel = RBF(length_scale=1, length_scale_bounds=(1e-1,1e1)) #ESS(length_scale=2, periodicity=10, length_scale_bounds=(5e-3, 5e3),periodicity_bounds=(5e-3, 5e3)) gp = GaussianProcessRegressor(kernel=kernel, normalize_y=False,\ n_restarts_optimizer=15) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X_avg1, y_avg1) #print "Fitting done" #print gp.get_params() print "Marginal likelihood: ", gp.log_marginal_likelihood() #print gp.score(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred0, sigma0 = gp.predict(X_avg1, return_std=True) y_pred, sigma = gp.predict(x_avg1, return_std=True) print "Predicting done" #print "__________\n", gp.score(y_pred.reshape(-1, 1), y_test) # Plot the function, the prediction and the 95% confidence interval based on # the MSE fig = plt.figure() nounou_minute = [] plt.plot(X_avg1, y_pred0, "g--") plt.plot(x_avg1, y_pred, 'b--') plt.fill(np.concatenate([X_avg1, X_avg1[::-1]]), np.concatenate([y_pred0-1.96*sigma0, (y_pred0+1.96*sigma0)[::-1]]), alpha=0.5, fc="g", ec="None")
def run(args, mass, winLow, winHigh): f = TFile(args.input) bkghist_template = f.Get('hmgg_c0') bkghist_template.Rebin(8) stats = 100000 seed = 10 bkghist = toyModel(bkghist_template, stats, seed) if args.doSig: #get signal hist sighist = buildSignal(125,1000, bkghist.GetNbinsX()) #inject signal into background bkghist.Add(sighist) GPh = GPHisto(bkghist) GPh.setWindow(winLow,winHigh) X = GPh.getXWindowArr() y = GPh.getYWindowArr() dy = GPh.getErrWindowArr() X_t = GPh.getXArr() y_t = GPh.getYArr() dy_t = GPh.getErrArr() if args.noWindow: X = X_t y = y_t dy = dy_t #X, y, dy = histoToArrayTest(bkghist, 120, 140) #X, y, dy = histoToArrayCut(bkghist, 120, 125) #X_t, y_t, dy_t = histoToArray(bkghist) #X = np.atleast_2d(X).T #y = y.ravel() #dy = dy.ravel() #x = np.atleast_2d(np.linspace(start=105, stop=160, num=1000)).T # Predict a relatively smooth function #x = np.atleast_2d(np.linspace(start=105, stop=160, num=219)).T # Predict at each data point x = GPh.getXArr() #kernel = C(800.0, (1e-3, 1e3)) * RBF(100.0, (1e-3, 1e3)) #squared exponential kernel #kernel = C(10.0, (1e-3, 1e15)) * RBF(np.sqrt(2)*(7**2), (1e-3,1e5 )) #squared exponential kernel kernel = C(1000.0, (1e-3, 1e15)) * FallExp(1.0, (1e-5, 1e2), 1.0, (1e-3,1e15)) * Gibbs(1.0, (1e-3, 1e5), 1.0, (1e-3,1e5)) #kernel = C(10.0, (1e-3, 1e6)) * Gibbs(1.0, (1e-3, 1e5), 1.0, (1e-3,1e5)) print "dy[5] =",dy[5] print "err =", bkghist.GetBinError(5), "Original =", bkghist_template.GetBinError(5) gp = GaussianProcessRegressor(kernel=kernel ,optimizer='fmin' ,alpha=dy**2 ,n_restarts_optimizer=15 ) gp.fit(X,y) print gp.kernel_ y_pred, sigma = gp.predict(x, return_std=True) if args.mplot: fig = plt.figure() #plt.plot(X, y, 'r.', markersize=10, label=u'Background') plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=8, label=u'Training Points', zorder=2) plt.errorbar(X_t.ravel(), y_t, dy_t, fmt='k.', markersize=7, label=u'Background', zorder=1) plt.plot(x, y_pred, 'b-', label=u'Prediction', zorder=3) plt.fill(np.concatenate([x, x[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval', zorder=3) plt.xlabel('$M_{\gamma \gamma}$') plt.ylabel('$events$') plt.title('Optimized Kernel: {}'.format(gp.kernel_)) #plt.yscale('log') #plt.ylim(-10, 20) plt.legend(loc='upper right') plt.savefig(args.tag+'GPFit.pdf') #plt.show() else: outfile = TFile('out.root','RECREATE') #outhist = arrayToHisto('GP Fit', 105, 160, y_pred, sigma) outhist = GPh.getHisto(y_pred, 1.96*sigma, 'GP Fit') if args.noWindow: bkgWindow = GPh.getHisto(y, dy, 'Full Background') else: bkgWindow = GPh.getWinHisto(y, dy, 'Full Background') bkgSubtracted = bkghist.Clone('bkgSubtracted') bkgSubtracted.Add(outhist,-1) #Subtract background prediction from background with injected signal. canv = TCanvas('canv', 'canv') pad1 = TPad("pad1", "pad1", 0, 0.3, 1, 1.0) pad1.SetBottomMargin(0) pad1.SetGridx() pad1.Draw() pad1.cd() outhist.SetStats(0) bkghist_template.SetStats(0) bkgWindow.SetStats(0) bkgWindow.SetMarkerColor(kBlue) bkgWindow.SetLineColor(kBlue) outhist.SetMarkerColor(kBlack) outhist.SetLineColor(kBlack) print outhist.GetBinError(10) #bkgNorm = bkgWindow.Integral(1, bkgWindow.FindBin(winLow)) #tmpNorm = bkghist_template.Integral(1,bkghist_template.FindBin(winLow)) bkgNorm = bkgWindow.Integral() tmpNorm = bkghist_template.Integral() bkghist_template.Scale(bkgNorm/tmpNorm) bkghist_template.SetTitle(str(gp.kernel_)+" nToys: "+str(stats)) print "Bin 24: {0} : {1} : {2}".format((outhist.GetBinContent(24)-outhist.GetBinError(24)), bkghist_template.GetBinContent(24), (outhist.GetBinContent(24)+outhist.GetBinError(24)) ) ####### Poly2 fit #canv4 = TCanvas('c4','c4') expPol_func = TF1("expPol","[0]*exp((x-100)/100 * ([1] + [2]*(x-100)/100))",105,160) expPol_func.SetParameters(0,0,0) expPol_func.SetParLimits(1,-10.,10.) expPol_func.SetParLimits(2,-10.,10.) bkgWindow.Fit("expPol","","",105,160) expFitResult = bkgWindow.GetFunction("expPol") expPolHist = expFitResult.GetHistogram() print expPolHist.GetNbinsX() #expPolHist.Divide(outhist) #expPolHist.Draw() #canv4.Print(args.tag+'/expPol_GP_ratio.pdf') bkghist_template.Draw('') bkgWindow.Draw('same') outhist.Draw('histsame') #outhist.GetYaxis().SetLabelSize(0.) axis = TGaxis( -5, 20, -5, 220, 20,220,510,"") axis.SetLabelFont(43) axis.SetLabelSize(15) axis.Draw() canv.cd() pad2 = TPad("pad2", "pad2", 0, 0.02, 1, 0.3) pad2.SetTopMargin(0) pad2.SetBottomMargin(0.28) pad2.SetGridx() pad2.Draw() pad2.cd() h3 = bkghist_template.Clone("h3") h3.SetLineColor(kBlack) h3.SetMinimum(0.95) h3.SetMaximum(1.05) h3.Sumw2() h3.SetStats(0) h3.Divide(outhist) h3.SetMarkerColor(kBlack) h3.SetMarkerStyle(20) h3.SetMarkerSize(0.5) h3.Draw("ep") h4 = bkghist_template.Clone("h4") h4.SetLineColor(kRed) h4.SetMinimum(0.95) h4.SetMaximum(1.05) h4.Sumw2() h4.SetStats(0) h4.Divide(expFitResult) h4.SetMarkerColor(kRed) h4.SetMarkerStyle(20) h4.SetMarkerSize(0.5) h4.Draw("epsame") line = TLine(105,1,160,1) line.Draw('same') # outhist settings outhist.SetLineColor(kBlack); outhist.SetFillColorAlpha(33, 0.5) outhist.SetLineWidth(2); # Y axis outhist plot settings outhist.GetYaxis().SetTitleSize(20); outhist.GetYaxis().SetTitleFont(43); outhist.GetYaxis().SetTitleOffset(1.55); # bkghist settings bkghist.SetLineColor(kBlack); bkghist.SetMarkerSize(0.7) bkghist.SetLineWidth(2); # Ratio plot (h3) settings h3.SetTitle(""); # Remove the ratio title # Y axis ratio plot settings h3.GetYaxis().SetTitle("data/fit "); h3.GetYaxis().SetNdivisions(505); h3.GetYaxis().SetTitleSize(20); h3.GetYaxis().SetTitleFont(43); h3.GetYaxis().SetTitleOffset(1.); h3.GetYaxis().SetLabelFont(43); # Absolute font size in pixel (precision 3) h3.GetYaxis().SetLabelSize(15); # X axis ratio plot settings h3.GetXaxis().SetTitleSize(20); h3.GetXaxis().SetTitleFont(43); h3.GetXaxis().SetTitleOffset(3.); h3.GetXaxis().SetLabelFont(43); # Absolute font size in pixel (precision 3) h3.GetXaxis().SetLabelSize(15) canv.SetBottomMargin(0) canv.Write() #canv.Print(winLow+'_'+winHigh+'_GPFit.pdf') #canv.Print(args.tag+'/GPFit_'+str(winLow)+'_'+str(winHigh)+'.pdf') canv.Print(args.tag+'/GPFit_'+str(seed)+'.pdf') if args.doSig: ### Plot signal stuff canv2 = TCanvas('c2','c2') canv2.cd() sighist.SetMarkerColor(kBlack) sighist.SetMarkerStyle(20) bkgSubtracted.GetXaxis().SetRangeUser(105,158) bkgSubtracted.Draw('hist') sighist.Draw('samep') #canv2.Write() canv2.Print(args.tag+'/SigYield_root.pdf') canv3 = TCanvas('c3', 'c3') canv3.cd() ratio = sighist.Clone('ratio') ratio.Divide(bkgSubtracted) ratio.GetYaxis().SetRangeUser(-5,5) ratio.Draw() #canv3.Write() canv3.Print(args.tag+'/SigYield_Ratio_root.pdf') """ dscb_func = TF1("dscb", DSCB, 105, 160, 7) dscb_func.SetParameters(1 # Normalization ,mass # mu ,1.475 # alpha_low ,1.902 # alpha_high ,12.1 # n_low ,11.6 # n_high ,1.86 ) # sigma #dscb_func.FixParameter(0,1) #Normalization Dont want to fix this dscb_func.FixParameter(1,mass) #Mass Fixed to middle of window dscb_func.FixParameter(2, 1.475) #alpha_low dscb_func.FixParameter(3, 1.902) #alpha_high dscb_func.FixParameter(4, 12.1) #n_low dscb_func.FixParameter(5, 11.6) #n_high dscb_func.FixParameter(6, 1.68) # sigma bkgSubtracted.Fit("dscb","","", winLow, winHigh) fitResult = bkgSubtracted.GetFunction("dscb") norm = fitResult.GetParameter(0) ss = fitResult.Integral(winLow,winHigh) """ #canv.cd() #bkgSubtracted.GetXaxis().SetRangeUser(120,130) #bkgSubtracted.Draw() #fitResult.Draw('same') #canv.Print(args.tag+'/fitResult.pdf') #print fitResult.Integral(120,130) """ canv4 = TCanvas('c4','c4') gp_pred_full = GPh.getHisto(y_pred_full, sigma_full, 'GP Fit full') gp_pred_full.Divide(outhist) gp_pred_full.GetYaxis().SetRangeUser(0.95,1.05) gp_pred_full.Draw() canv4.Print(args.tag+'/Full_window_ratio.pdf') """ f.Close()
from matplotlib.colors import LogNorm from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, WhiteKernel rng = np.random.RandomState(0) X = rng.uniform(0, 5, 20)[:, np.newaxis] y = 0.5 * np.sin(3 * X[:, 0]) + rng.normal(0, 0.5, X.shape[0]) # First run plt.figure(0) kernel = 1.0 * RBF(length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, y) X_ = np.linspace(0, 5, 100) y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True) plt.plot(X_, y_mean, 'k', lw=3, zorder=9) plt.fill_between(X_, y_mean - np.sqrt(np.diag(y_cov)), y_mean + np.sqrt(np.diag(y_cov)), alpha=0.5, color='k') plt.plot(X_, 0.5 * np.sin(3 * X_), 'r', lw=3, zorder=9) plt.scatter(X[:, 0], y, c='r', s=50, zorder=10, edgecolors=(0, 0, 0)) plt.title("Initial: %s\nOptimum: %s\nLog-Marginal-Likelihood: %s" % (kernel, gp.kernel_, gp.log_marginal_likelihood(gp.kernel_.theta))) plt.tight_layout() # Second run plt.figure(1) kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
def bo(X, y): data = list(zip(X, y)) x = np.atleast_2d(np.linspace(0, 10, 1024)).T x_= np.atleast_2d(np.linspace(0, 10, 1024)).T kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16, )#normalize_y=True) gp.fit(X, y) # FIXME is it possible for mu(x) < min{x \in observed_x}? # is this due to that GaussainProcess's prior states that mu(x) = 0? # will this effect the performance of GO, since everything not observed will automatically give an additional boost since the prior plays a bigger role (look it up) [we know that the loss we in the end are optimizing is \in [0, 1] y_pred, sigma = gp.predict(x, return_std=True) #http://www.scipy-lectures.org/advanced/mathematical_optimization/ # x_min = fmin(negate(silly_f), 5) # TODO better maximizer # Strong points: it is robust to noise, as it does not rely on computing gradients. Thus it can work on functions that are not locally smooth such as experimental data points, as long as they display a large-scale bell-shape behavior. However it is slower than gradient-based methods on smooth, non-noisy functions. #opt_result = minimize(negate(silly_f), 5, bounds=[(0, 10)]) # TODO better maximizer #print(opt_result) #assert(opt_result.success) #x_min = opt_result.x # x_min = brent(negate(silly_f), brack=(0, 10)) # NOTE 1D only, NOTE not guaranteed to be within range brack=(0, 10) (see documentation) # TODO getting the gradient the gaussian would unlock all gradient based optimization methods!! (including L_BFGS) a = a_EI(gp, x_obs=X, y_obs=y, theta=0.01) a_x = np.apply_along_axis(a, 1, x) (x_min_,) = max(x, key=a) # TODO have a reasonable optimization (this doesn't scale well) #(x_min_,) = brute( # negate(a), # ranges=((0, 10),), # Ns=64, # finish=fmin, #) # FIXME brute can return numbers outside of the range! X = np.linspace(0, 10, 32), Ns=64, ranges=((0, 10) (x_min_ = 10.22...) # I think it occurs when the function is pretty flat (but not constant) # TODO verify that finish function gets the same range as brute and don't wonder off (perhaps this is intended behaviour?) # TODO check https://github.com/scipy/scipy/blob/master/scipy/optimize/optimize.py#L2614 to see if it's possible for x_min to end up outside of the range (and if then when) print(x_min_) #plot_2d(x=x, x_=x_, y_pred=y_pred, sigma = sigma, a_x=a_x) #plot(x=x, y_pred=y_pred, x_obs=X, y_obs=y, x_min_=x_min_, sigma=sigma, a_x=a_x) #plt.show() # evaluate fx_min_ = f(x_min_) bo( X=np.vstack( (X,[x_min_,]) ), y=np.hstack( (y,[fx_min_,]) ), )
regressor = GaussianProcessRegressor(copy_X_train=False, alpha=0.01778279410038923, kernel=kernels.RationalQuadratic(alpha=1, length_scale=1), n_restarts_optimizer=4, normalize_y=False) """ regressor = GaussianProcessRegressor(copy_X_train=False) parameters = {'kernel':(kernels.RationalQuadratic(), kernels.RBF(), kernels.WhiteKernel()), 'alpha': np.logspace(-10, 1, 5), 'n_restarts_optimizer': range(1,5), 'normalize_y': [True, False]} clf = GridSearchCV(regressor, parameters, scoring=rmsle_scorer, verbose=10) X_train, y_train = resample(X, y, n_samples=500) clf.fit(X_train, y_train) print("best_estimator_:", clf.best_estimator_) print("best_score_:", clf.best_score_) print("best_params_:", clf.best_params_) print("best_score_:", clf.best_score_) regressor.set_params(**clf.best_params_) """ X_train, y_train = resample(X, y, n_samples=5000) regressor.fit(X_train, y_train) print("Training done, testing...") # Since we can't load the whole dataset, do batch testing batch_size = 5000 X_test, y_test = resample(X, y, n_samples=100000) y_pred = np.ndarray((0,)) for i in range(0, X_test.shape[0], batch_size): y_pred = np.hstack((y_pred, regressor.predict(X_test[i: i + batch_size]))) print("RMSLE =", root_mean_squared_log_error(y_test, y_pred)) # Last result: 0.469685