def test_gpr_interpolation(kernel): # Test the interpolating property for different kernels. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_pred, y_cov = gpr.predict(X, return_cov=True) assert_almost_equal(y_pred, y) assert_almost_equal(np.diag(y_cov), 0.)
def get_globals(): X = np.array([ [0.00, 0.00], [0.99, 0.99], [0.00, 0.99], [0.99, 0.00], [0.50, 0.50], [0.25, 0.50], [0.50, 0.25], [0.75, 0.50], [0.50, 0.75], ]) def get_y(X): return -(X[:, 0] - 0.3) ** 2 - 0.5 * (X[:, 1] - 0.6)**2 + 2 y = get_y(X) mesh = np.dstack( np.meshgrid(np.arange(0, 1, 0.01), np.arange(0, 1, 0.01)) ).reshape(-1, 2) GP = GaussianProcessRegressor( kernel=Matern(), n_restarts_optimizer=25, ) GP.fit(X, y) return {'x': X, 'y': y, 'gp': GP, 'mesh': mesh}
def test_y_normalization(): """ Test normalization of the target values in GP Fitting non-normalizing GP on normalized y and fitting normalizing GP on unnormalized y should yield identical results """ y_mean = y.mean(0) y_norm = y - y_mean for kernel in kernels: # Fit non-normalizing GP on normalized y gpr = GaussianProcessRegressor(kernel=kernel) gpr.fit(X, y_norm) # Fit normalizing GP on unnormalized y gpr_norm = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_norm.fit(X, y) # Compare predicted mean, std-devs and covariances y_pred, y_pred_std = gpr.predict(X2, return_std=True) y_pred = y_mean + y_pred y_pred_norm, y_pred_std_norm = gpr_norm.predict(X2, return_std=True) assert_almost_equal(y_pred, y_pred_norm) assert_almost_equal(y_pred_std, y_pred_std_norm) _, y_cov = gpr.predict(X2, return_cov=True) _, y_cov_norm = gpr_norm.predict(X2, return_cov=True) assert_almost_equal(y_cov, y_cov_norm)
def test_predict_cov_vs_std(): """ Test that predicted std.-dev. is consistent with cov's diagonal.""" for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_mean, y_cov = gpr.predict(X2, return_cov=True) y_mean, y_std = gpr.predict(X2, return_std=True) assert_almost_equal(np.sqrt(np.diag(y_cov)), y_std)
def test_lml_improving(): """ Test that hyperparameter-tuning improves log-marginal likelihood. """ for kernel in kernels: if kernel == fixed_kernel: continue gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta), gpr.log_marginal_likelihood(kernel.theta))
def bo_(x_obs, y_obs): kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, y_obs) xs = list(repeat(np.atleast_2d(np.linspace(0, 10, 128)).T, 2)) x = cartesian_product(*xs) a = a_EI(gp, x_obs=x_obs, y_obs=y_obs) argmin_a_x = x[np.argmax(a(x))] # heavy evaluation print("f({})".format(argmin_a_x)) f_argmin_a_x = f2d(np.atleast_2d(argmin_a_x)) plot_2d(gp, x_obs, y_obs, argmin_a_x, a, xs) plt.show() bo_( x_obs=np.vstack((x_obs, argmin_a_x)), y_obs=np.hstack((y_obs, f_argmin_a_x)), )
def test_gpr_interpolation(): """Test the interpolating property for different kernels.""" for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_pred, y_cov = gpr.predict(X, return_cov=True) assert_true(np.allclose(y_pred, y)) assert_true(np.allclose(np.diag(y_cov), 0.))
def test_acquisition_api(): rng = np.random.RandomState(0) X = rng.randn(10, 2) y = rng.randn(10) gpr = GaussianProcessRegressor() gpr.fit(X, y) for method in [gaussian_ei, gaussian_lcb, gaussian_pi]: assert_array_equal(method(X, gpr).shape, 10) assert_raises(ValueError, method, rng.rand(10), gpr)
def test_converged_to_local_maximum(kernel): # Test that we are in local maximum after hyperparameter-optimization. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) lml, lml_gradient = \ gpr.log_marginal_likelihood(gpr.kernel_.theta, True) assert_true(np.all((np.abs(lml_gradient) < 1e-4) | (gpr.kernel_.theta == gpr.kernel_.bounds[:, 0]) | (gpr.kernel_.theta == gpr.kernel_.bounds[:, 1])))
def test_lml_gradient(): """ Compare analytic and numeric gradient of log marginal likelihood. """ for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) lml, lml_gradient = gpr.log_marginal_likelihood(kernel.theta, True) lml_gradient_approx = approx_fprime( kernel.theta, lambda theta: gpr.log_marginal_likelihood(theta, False), 1e-10 ) assert_almost_equal(lml_gradient, lml_gradient_approx, 3)
def test_prior(kernel): # Test that GP prior has mean 0 and identical variances. gpr = GaussianProcessRegressor(kernel=kernel) y_mean, y_cov = gpr.predict(X, return_cov=True) assert_almost_equal(y_mean, 0, 5) if len(gpr.kernel.theta) > 1: # XXX: quite hacky, works only for current kernels assert_almost_equal(np.diag(y_cov), np.exp(kernel.theta[0]), 5) else: assert_almost_equal(np.diag(y_cov), 1, 5)
def test_sample_statistics(): """ Test that statistics of samples drawn from GP are correct.""" for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) y_mean, y_cov = gpr.predict(X2, return_cov=True) samples = gpr.sample_y(X2, 300000) # More digits accuracy would require many more samples assert_almost_equal(y_mean, np.mean(samples, 1), 2) assert_almost_equal(np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1)
class SmoothFunctionCreator(): def __init__(self, seed=42): self._gp = GaussianProcessRegressor() x_train = np.array([0.0, 2.0, 6.0, 10.0])[:, np.newaxis] source_train = np.array([0.0, 1.0, -1.0, 0.0]) self._gp.fit(x_train, source_train) self._random_state = np.random.RandomState(seed) def sample(self, n_samples): x = np.linspace(0.0, 10.0, 100)[:, np.newaxis] source = self._gp.sample_y(x, n_samples, random_state=self._random_state) target = gaussian_filter1d(source, 1, order=1, axis=0) target = np.tanh(10.0 * target) return source, target
def fit_GP(x_train): y_train = gaussian(x_train, mu, sig).ravel() # Instanciate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(1, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(x_train, y_train) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) return y_train, y_pred, sigma
def test_no_fit_default_predict(): # Test that GPR predictions without fit does not break by default. default_kernel = (C(1.0, constant_value_bounds="fixed") * RBF(1.0, length_scale_bounds="fixed")) gpr1 = GaussianProcessRegressor() _, y_std1 = gpr1.predict(X, return_std=True) _, y_cov1 = gpr1.predict(X, return_cov=True) gpr2 = GaussianProcessRegressor(kernel=default_kernel) _, y_std2 = gpr2.predict(X, return_std=True) _, y_cov2 = gpr2.predict(X, return_cov=True) assert_array_almost_equal(y_std1, y_std2) assert_array_almost_equal(y_cov1, y_cov2)
def plot_gp(x_min, x_max, x, y, train_features, train_labels): fig = plt.figure(figsize=(16, 10)) fig.suptitle('Gaussian Process and Utility Function After {} Steps'.format(len(train_features)), fontdict={'size':30}) gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) axis = plt.subplot(gs[0]) acq = plt.subplot(gs[1]) gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, ) gp.fit(train_features, train_labels) mu, sigma = gp.predict(x, return_std=True) axis.plot(x, y, linewidth=3, label='Target') axis.plot(train_features.flatten(), train_labels, 'D', markersize=8, label=u'Observations', color='r') axis.plot(x, mu, '--', color='k', label='Prediction') axis.fill(np.concatenate([x, x[::-1]]), np.concatenate([mu - 1.9600 * sigma, (mu + 1.9600 * sigma)[::-1]]), alpha=.6, fc='c', ec='None', label='95% confidence interval') axis.set_xlim((x_min, x_max)) axis.set_ylim((None, None)) axis.set_ylabel('f(x)', fontdict={'size':20}) axis.set_xlabel('x', fontdict={'size':20}) bounds = np.asarray([[x_min, x_max]]) acquisition_fucntion_kappa = 5 mean, std = gp.predict(x, return_std=True) acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std acq.plot(x, acquisition_fucntion_values, label='Utility Function', color='purple') acq.plot(x[np.argmax(acquisition_fucntion_values)], np.max(acquisition_fucntion_values), '*', markersize=15, label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1) acq.set_xlim((x_min, x_max)) acq.set_ylim((0, np.max(acquisition_fucntion_values) + 0.5)) acq.set_ylabel('Utility', fontdict={'size':20}) acq.set_xlabel('x', fontdict={'size':20}) axis.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.) acq.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)
def test_K_inv_reset(kernel): y2 = f(X2).ravel() # Test that self._K_inv is reset after a new fit gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert hasattr(gpr, '_K_inv') assert gpr._K_inv is None gpr.predict(X, return_std=True) assert gpr._K_inv is not None gpr.fit(X2, y2) assert gpr._K_inv is None gpr.predict(X2, return_std=True) gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2) gpr2.predict(X2, return_std=True) # the value of K_inv should be independent of the first fit assert_array_equal(gpr._K_inv, gpr2._K_inv)
def __init__(self, name, title, Myy, trainHisto, dataHisto): super(RooGPBkg, self).__init__(self, name, title, Myy) self.name = name self.title = title self.Myy = Myy self.trainHisto = trainHisto self.dataHisto = dataHisto self.kernel = C((2.98e4)**2, (1e-3, 1e15)) * RBF(60, (1,1e5 )) #squared exponential kernel trainHisto.Scale(dataHisto.Integral()/trainHisto.Integral()) self.opt_kernel = self.setTrainMC(trainHisto) #Need to think a bit more about how to set the range correctly here. self.sigFunction = ROOT.TF1("dscb", DSCB, 105, 160, 2) self.currentNSig = None self.currentSBHist = None GPh = GPHisto(dataHisto) X = GPh.getXArr() Y = GPh.getYArr() dataErrs = GPh.getErrArr() self.gp = GaussianProcessRegressor(kernel=self.opt_kernel, #previously optimized kernel optimizer=None, # Dont reoptimize hyperparamters. alpha=dataErrs**2) self.gp.fit(X,Y) y_pred, sigma = self.gp.predict(X, return_std=True) #self.gpHisto = arrayToHisto("GPhisto", 105, 160, y_pred) self.gpHisto = GPh.getHisto(y_pred, sigma, "GPhisto")
def fit_gpr_model(self): ''' create and fit the gaussian process model. the results in the model object are stored in a class variable ''' # prepare input for fitting function X = self.hist[['wness_bin_center', 'dw23_bin_center']].values y = self.hist['entries'].values # uncertainty of counts from a poisson distribution dy = np.sqrt(y) # "nugget" is used to inform fitting algorithm of input uncertainty nugget = (dy / y) ** 2 inds = np.where(np.isnan(nugget)) nugget[inds] = 1.0 if self.flatten_wness: y = self.do_flatten_wness() # define kernel if self.noise: self.kernel = 1.0 * RBF([.1, .1]) + WhiteKernel(0.1) else: self.kernel = RBF([.1, .1]) # Instanciate a Gaussian Process model self.gp = GaussianProcessRegressor(kernel=self.kernel, alpha=nugget, normalize_y=False, n_restarts_optimizer=10) # Fit to data using Maximum Likelihood Estimation of the parameters self.gp.fit(X, y) print self.gp.kernel_
def __init__(self, f, pbounds, random_state=None, verbose=1): """ :param f: Function to be maximized. :param pbounds: Dictionary with parameters names as keys and a tuple with minimum and maximum values. :param verbose: Whether or not to print progress. """ # Store the original dictionary self.pbounds = pbounds self.random_state = ensure_rng(random_state) # Data structure containing the function to be optimized, the bounds of # its domain, and a record of the evaluations we have done so far self.space = TargetSpace(f, pbounds, random_state) # Initialization flag self.initialized = False # Initialization lists --- stores starting points before process begins self.init_points = [] self.x_init = [] self.y_init = [] # Counter of iterations self.i = 0 # Internal GP regressor self.gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), n_restarts_optimizer=25, random_state=self.random_state ) # Utility Function placeholder self.util = None # PrintLog object self.plog = PrintLog(self.space.keys) # Output dictionary self.res = {} # Output dictionary self.res['max'] = {'max_val': None, 'max_params': None} self.res['all'] = {'values': [], 'params': []} # non-public config for maximizing the aquisition function # (used to speedup tests, but generally leave these as is) self._acqkw = {'n_warmup': 100000, 'n_iter': 250} # Verbose self.verbose = verbose
def test_GP_brownian_motion(self): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # add data t = np.linspace(0, 10, 100) # # Instanciate a Gaussian Process model # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) # Instanciate a Gaussian Process model kernel = lambda x, y: 1. * min(x, y) # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters X = np.atleast_2d(t).T gp.fit(X, y) # gp = GaussianProcessRegressor() # Fit to data using Maximum Likelihood Estimation of the parameters # gp.fit(t, y) # Make the prediction on the meshed x-axis (ask for MSE as well) # y_star, err_y_star = gp.predict(t, return_std=True) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(t, return_std=True) fig = plt.figure() ax = fig.add_axes((0.1, 0.3, 0.8, 0.65)) ax.invert_yaxis() ax.plot(t, y, color='blue', label='L bol', lw=2.5) ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.') # # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP') ax.plot(t, y_pred, '-', color='gray') # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3) ax.fill(np.concatenate([t, t[::-1]]), np.concatenate([y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]), alpha=.5, fc='b', ec='None', label='95% confidence interval') plt.show()
def plot_gaussian(data, col): ''' Plots the gaussian process regression with a characteristic length scale of 10 years. Essentially this highlights the 'slow trend' in the data. Parameters ---------- data: dataframe pandas dataframe containing 'date', 'linMean' which is the average runtime and 'linSD' which is the standard deviation. col: string the color in which the plot the data ''' #extract the results from the dataframe Year = np.array(data[u'date'].tolist()) Mean = np.array(data[u'linMean'].tolist()) SD = np.array(data[u'linSD'].tolist()) #initialize the gaussian process. Note that the process is calculated with a #length scale of 10years to give the 'slow trend' in the results. length_scale = 10. kernel = 1.* RBF(length_scale) gp = GaussianProcessRegressor(kernel=kernel, sigma_squared_n=(SD) ** 2, \ normalize_y=True) #now fit the data and get the predicted mean and standard deviation #Note: for reasons that are unclear, GaussianProcessRegressor won't take 1D #arrays so the data are converted to 2D and then converted back for plotting gp.fit(np.atleast_2d(Year).T, np.atleast_2d(Mean).T) Year_array = np.atleast_2d(np.linspace(min(Year)-2, max(Year)+2, 100)).T Mean_prediction, SD_prediction = gp.predict(Year_pred, return_std=True) Year_array=Year_array.ravel() Mean_prediction=Mean_prediction.ravel() #plot the predicted best fit plt.plot(Year_array, Mean_prediction, col, alpha=1) #plot the 95% confidence interval plt.fill_between(Year_array, (Mean_prediction - 1.9600 * SD_prediction), \ y2=(Mean_prediction + 1.9600 * SD_prediction), alpha=0.5, \ color=col) plt.draw()
def fit(self, X, y): """ Use X and y to train a Gaussian process. """ super(GP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return self.gp = GaussianProcessRegressor(normalize_y=True) self.gp.fit(X, y)
def fit(self, X, y): def jitter(x, range): y = np.copy(x) scale_exp_min = np.abs(np.ceil(np.log10(range[0]))) scale_exp_max = np.abs(np.ceil(np.log10(range[1]))) scale_exp = (scale_exp_max + scale_exp_min) / 2. r = np.random.rand(y.size) / (10**scale_exp) y = y + r return y # Print msg. when going into gcp.fit strMessage = "rows in X = %d, r_minimum = %d" % (X.shape[0], self.r_minimum) logger.debug(strMessage) # Use X and y to train a Gaussian Copula Process. super(GCP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return # -- Non-parametric model of 'y', estimated with kernel density kernel_pdf = st.gaussian_kde(y) kernel_cdf = make_cdf(kernel_pdf) kernel_ppf = make_ppf(kernel_pdf) y_kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf} self.y_kernel_model = y_kernel_model # - Transform y-->F-->vF-->norm.ppf-->v vF = y_kernel_model['cdf'](y) v = st.norm.ppf(vF) # -- Non-parametric model of each feature in 'X', estimated with kernel density X_kernel_model = [] for ki in range(X.shape[1]): columnX = X[:, ki] if self.tunables[ki][1].is_integer: columnX = jitter(columnX, self.tunables[ki][1].range) kernel_pdf = st.gaussian_kde(columnX) kernel_cdf = make_cdf(kernel_pdf) kernel_ppf = make_ppf(kernel_pdf) kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf} X_kernel_model.append(kernel_model) self.X_kernel_model = X_kernel_model # -- Transform X-->F-->uF-->norm.ppf-->U U = np.empty_like(X) for ki in range(X.shape[1]): uF = X_kernel_model[ki]['cdf'](X[:, ki]) U[:, ki] = st.norm.ppf(uF) # - Instantiate a GP and fit it with (U, v) self.gcp = GaussianProcessRegressor(normalize_y=True) self.gcp.fit(U, v)
def theta(self, weights): self.weights = np.exp(np.asarray(weights, dtype=np.float)) # Parse weights into its components self.theta_gp, self.theta_l, self.length_scales = \ self._parse_weights(self.weights) # Train length-scale Gaussian Process kernel = RBF(self.theta_l, length_scale_bounds="fixed") self.gp_l = GaussianProcessRegressor(kernel=kernel) self.gp_l.fit(self.X_, np.log10(self.length_scales))
def test_custom_optimizer(): """ Test that GPR can use externally defined optimizers. """ # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) theta_opt, func_min = initial_theta, obj_func(initial_theta, eval_gradient=False) for _ in range(50): theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]), np.minimum(1, bounds[:, 1]))) f = obj_func(theta, eval_gradient=False) if f < func_min: theta_opt, func_min = theta, f return theta_opt, func_min for kernel in kernels: if kernel == fixed_kernel: continue gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer) gpr.fit(X, y) # Checks that optimizer improved marginal likelihood assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta), gpr.log_marginal_likelihood(gpr.kernel.theta))
def _determine_contextparams(self, optimizer): """Select context and params jointly using ACES.""" # Choose the first samples uniform randomly if len(optimizer.X_) < optimizer.initial_random_samples: cx = np.random.uniform(self.cx_boundaries[:, 0], self.cx_boundaries[:, 1]) return cx[:self.context_dims], cx[self.context_dims:] # Prepare entropy search objective self._init_es_ensemble() # Generate data for function mapping # query_context x query_parameters x eval_context -> entropy reduction n_query_points = 500 n_data_dims = 2 * self.context_dims + self.dimension X = np.empty((n_query_points, n_data_dims)) y = np.empty(n_query_points) for i in range(n_query_points): # Select query point and evaluation context randomly query = np.random.uniform(self.cx_boundaries[:, 0], self.cx_boundaries[:, 1]) ind = np.random.choice(self.n_context_samples) # Store query point in X and value of entropy-search in y X[i, :self.context_dims + self.dimension] = query X[i, self.context_dims + self.dimension:] = \ self.context_samples[ind] - query[:self.context_dims] y[i] = self.entropy_search_ensemble[ind](query)[0] # Fit GP model to this data kernel = C(1.0, (1e-10, 100.0)) \ * RBF(length_scale=(1.0,)*n_data_dims, length_scale_bounds=[(0.01, 10.0),]*n_data_dims) \ + WhiteKernel(1.0, (1e-10, 100.0)) self.es_surrogate = GaussianProcessRegressor(kernel=kernel) self.es_surrogate.fit(X, y) # Select query based on mean entropy reduction in surrogate model # predictions contexts = np.random.uniform(self.context_boundaries[:, 0], self.context_boundaries[:, 1], (250, self.context_dims)) def objective_function(cx): X_query = np.empty((250, n_data_dims)) X_query[:, :self.context_dims + self.dimension] = cx X_query[:, self.context_dims + self.dimension:] = \ contexts - cx[:self.context_dims] es_pred, es_cov = \ self.es_surrogate.predict(X_query, return_cov=True) return es_pred.mean() + self.kappa * np.sqrt(es_cov.mean()) cx = global_optimization( objective_function, boundaries=self.cx_boundaries, optimizer=self.optimizer, maxf=optimizer.maxf) return cx[:self.context_dims], cx[self.context_dims:]
def test_random_starts(): # Test that an increasing number of random-starts of GP fitting only # increases the log marginal likelihood of the chosen theta. n_samples, n_features = 25, 2 rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) * 2 - 1 y = np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1) \ + rng.normal(scale=0.1, size=n_samples) kernel = C(1.0, (1e-2, 1e2)) \ * RBF(length_scale=[1.0] * n_features, length_scale_bounds=[(1e-4, 1e+2)] * n_features) \ + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-5, 1e1)) last_lml = -np.inf for n_restarts_optimizer in range(5): gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, random_state=0,).fit(X, y) lml = gp.log_marginal_likelihood(gp.kernel_.theta) assert_greater(lml, last_lml - np.finfo(np.float32).eps) last_lml = lml
class GP(BaseTuner): def __init__(self, tunables, gridding=0, r_minimum=2): """ Extra args: r_minimum: the minimum number of past results this selector needs in order to use gaussian process for prediction. If not enough results are present during a fit(), subsequent calls to propose() will revert to uniform selection. """ super(GP, self).__init__(tunables, gridding=gridding) self.r_minimum = r_minimum def fit(self, X, y): """ Use X and y to train a Gaussian process. """ super(GP, self).fit(X, y) # skip training the process if there aren't enough samples if X.shape[0] < self.r_minimum: return self.gp = GaussianProcessRegressor(normalize_y=True) self.gp.fit(X, y) def predict(self, X): if self.X.shape[0] < self.r_minimum: # we probably don't have enough logger.warn('GP: not enough data, falling back to uniform sampler') return Uniform(self.tunables).predict(X) y, stdev = self.gp.predict(X, return_std=True) return np.array(list(zip(y, stdev))) def _acquire(self, predictions): """ Predictions from the GP will be in the form (prediction, error). The default acquisition function returns the index with the highest predicted value, not factoring in error. """ return np.argmax(predictions[:, 0])
def bo_(x_obs, y_obs, n_iter): if n_iter > 0: kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, 1-y_obs) a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs) argmax_f_x_ = x[np.argmax(a(x))] # heavy evaluation f_argmax_f_x_ = cross_validation(argmax_f_x_) y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T return f_argmax_f_x_ + bo_( x_obs=np.vstack((x_obs, argmax_f_x_)), y_obs=np.vstack((y_obs, y_ob)), n_iter=n_iter-1, ) else: return []
def do_expt(seed): np.random.seed(seed) perm = np.random.permutation(nseq) perm = perm[:n_bo_init] Xinit = Xall[perm] yinit = yall[perm] rnd_solver = RandomDiscreteOptimizer(Xall, n_iter=n_bo_iter + n_bo_init) """ # Embed sequence then pass to kernel. # We use Matern kernel 1.5 since this only assumes first-orer differentiability. kernel = ConstantKernel(1.0) * EmbedKernel(length_scale=1.0, nu=1.5, embed_fn=lambda x: embedder.predict(x)) gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2) acq_fn = expected_improvement n_seq = np.shape(Xall)[0] acq_solver = EnumerativeDiscreteOptimizer(Xall, n_iter=n_seq) bo_embed_solver_slow = BayesianOptimizer( Xinit, yinit, gpr, acq_fn, acq_solver, n_iter=n_bo_iter) """ kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=1.5) gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2) acq_fn = EI bo_oracle_embed_solver = BayesianOptimizerEmbedEnum(Xall, oracle_embed_fn, Xinit, yinit, gpr, acq_fn, n_iter=n_bo_iter) kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=1.5) gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2) acq_fn = EI bo_predictor_embed_solver = BayesianOptimizerEmbedEnum(Xall, predictor_embed_fn, Xinit, yinit, gpr, acq_fn, n_iter=n_bo_iter) kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=1.5) gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2) acq_fn = EI bo_super_embed_solver = BayesianOptimizerEmbedEnum(Xall, super_embed_fn, Xinit, yinit, gpr, acq_fn, n_iter=n_bo_iter) kernel = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=1.5) gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2) acq_fn = EI bo_onehot_embed_solver = BayesianOptimizerEmbedEnum(Xall, onehot_embed_fn, Xinit, yinit, gpr, acq_fn, n_iter=n_bo_iter) """ # Pass integers to kernel. kernel = ConstantKernel(1.0) * EmbedKernel(length_scale=1.0, nu=1.5, embed_fn=lambda x: x) gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise**2) acq_fn = expected_improvement n_seq = 4**seq_len acq_solver = EnumerativeStringOptimizer(seq_len, n_iter=n_seq) bo_int_solver = BayesianOptimizer(Xinit, yinit, gpr, acq_fn, acq_solver, n_iter=n_bo_iter) """ methods = [] methods.append((bo_oracle_embed_solver, 'BO-oracle-embed-enum')) methods.append((bo_predictor_embed_solver, 'BO-predictor_embed-enum')) methods.append((bo_super_embed_solver, 'BO-super-embed-enum')) methods.append((bo_onehot_embed_solver, 'BO-onehot-enum')) #methods.append((bo_int_solver, 'BO-int-enum')) methods.append((rnd_solver, 'RndSolver')) # Always do random last ytrace = dict() for solver, name in methods: print("Running {}".format(name)) time_start = time() solver.maximize(oracle) print('time spent by {} = {:0.3f}\n'.format(name, time() - time_start)) ytrace[name] = np.maximum.accumulate(solver.val_history) plt.figure() styles = ['k-o', 'r:o', 'b--o', 'g-o', 'c:o', 'm--o', 'y-o'] for i, tuple in enumerate(methods): style = styles[i] name = tuple[1] plt.plot(ytrace[name], style, label=name) plt.axvline(n_bo_init) plt.legend() plt.title("seed = {}".format(seed)) plt.show()
class BayesianOptimizer(BaseOptimizer): """ optimize with bayesian optimizer """ def __init__(self, configFile, domain): """ intialize """ defValues = {} defValues["opti.initial.model.training.size"] = (1000, None) defValues["opti.acquisition.samp.size"] = (100, None) defValues["opti.prob.acquisition.strategy"] = ("pi", None) defValues["opti.acquisition.lcb.mult"] = (2.0, None) super(BayesianOptimizer, self).__init__(configFile, defValues, domain) self.model = GaussianProcessRegressor() def run(self): """ run optimizer """ assert Candidare.fixedSz, "BayesianOptimizer works only for fixed size solution" for sampler in self.compDataDistr: assert sampler.isNumeric( ), "BayesianOptimizer works only for numerical data" #inir=tial population and moel fit trSize = self.config.getIntConfig( "opti.initial.model.training.size")[0] features, targets = self.createSamples(trSize) self.model.fit(features, targets) #iterate acqSampSize = self.config.getIntConfig("opti.acquisition.samp.size")[0] prAcqStrategy = self.config.getIntConfig( "opti.prob.acquisition.strategy")[0] acqLcbMult = self.config.getFloatConfig( "opti.prob.acquisition.strategy")[0] for i in range(self.numIter): ofeature, otarget = optAcquire(features, targets, acqSampSize, prAcqStrategy, acqLcbMult) features = np.vstack((features, [ofeature])) targets = np.vstack((targets, [otarget])) self.model.fit(features, targets) ix = np.argmax(targets) def optAcquire(features, targets, acqSampSize, prAcqStrategy, acqLcbMult): """ run optimizer """ mu = self.model.predict(features) best = min(mu) sfeatures, stargets = self.createSamples(acqSampSize) smu, sstd = self.model.predict(sfeatures, return_std=True) if prAcqStrategy == "pi": imp = best - smu z = imp / (sstd + 1E-9) scores = norm.cdf(z) elif prAcqStrategy == "ei": imp = best - smu z = imp / (sstd + 1E-9) scores = imp * norm.cdf(z) + sstd * norm.pdf(z) elif prAcqStrategy == "lcb": scores = smu - acqLcbMult * sstd else: raise ValueError( "invalid acquisition strategy for next best candidate") ix = np.argmax(scores) sfeature = sfeatures[ix] starget = stargets[ix] return (sfeature, starget) def createSamples(self, size): """ sample features and targets """ features = list() targets = list() for i in range(size): cand = self.createCandidate() features.append(cand.getSolnAsFloat()) targets.append(cand.cost) features = np.asarray(features) targets = np.asarray(targets).reshape(size, 1) return (features, targets)
'weights': ['uniform', 'distance'], 'p': np.arange(1, 2, 0.25) } dt_params = { 'criterion': ['mse', 'friedman_mse', 'mae'], 'max_depth': np.arange(1, 50, 5) } models_list = [('LR', LinearRegression(), {}), ('Ridge', Ridge(), ridge_params), ('Lasso', Lasso(), lasso_params), ('ElasticNet', ElasticNet(), elasticnet_params), ('SGDRegressor', SGDRegressor(), sgdregressor_params), ('SVR', SVR(), svr_params), ('KNN', KNeighborsRegressor(), knn_params), ('GaussianProcess', GaussianProcessRegressor(), {}), ('DTree', DecisionTreeRegressor(), dt_params)] rmsle_scores = [] r2_scores = [] model_names = [] best_estimators = [] for name, model, model_params in list(models_list): print('-' * 100) print('Fitting ', name) model_names.append(name) model_grid = GridSearchCV(estimator=model, param_grid=model_params, scoring='neg_root_mean_squared_error', verbose=0,
# パラメータの取りうる範囲 x_grid = np.atleast_2d(np.linspace(0, 10, 1001)[:1000]).T # 初期値として x=1, 9 の 2 点の探索をしておく. X = np.atleast_2d([1., 9.]).T y = blackbox_func(X).ravel() # Gaussian Processs Upper Confidence Bound (GP-UCB)アルゴリズム # --> 収束するまで繰り返す(収束条件などチューニングポイント) n_iteration = 50 for i in range(n_iteration): # 既に分かっている値でガウス過程フィッティング # --> カーネル関数やパラメータはデフォルトにしています(チューニングポイント) gp = GaussianProcessRegressor() gp = KNeighborsRegressor(n_neighbors=2) gp.fit(X, y) # 事後分布が求まる posterior_mean = gp.predict(x_grid) # posterior_sig = dist_knn(X, x_grid) posterior_sig = dist_knn(X, x_grid, min([i + 1, 5])) # 目的関数を最大化する x を次のパラメータとして選択する # --> βを大きくすると探索重視(初期は大きくし探索重視しイテレーションに同期して減衰させ活用を重視させるなど、チューニングポイント) idx = acq_ucb(posterior_mean, posterior_sig, beta=100.0) x_next = x_grid[idx] plot(x_grid, y,
def generate_gp(points, data, hp0, kernel_type='squaredexponential', fixed=False, hyper_limits=None, n_restarts_optimizer=9): """Gaussian Process for ndim dimensional parameter space. Parameters ---------- points : array of shape (npoints, ndim). Coordinates in paramete space of sampled data. data : array of shape (npoints,). Data at each of the sampled points. hp0 : array of shape (ndim+2,) Initial hyperparameter guess for optimizer. Order is (sigma_f, ls_0, ls_1, ..., sigma_n). kernel_type : 'squaredexponential', 'matern32', 'matern52' limits : array of shape (ndim+2, 2) Lower and upper bounds on the value of each hyperparameter. n_restarts_optimizer : int Number of random points in the hyperparameter space to restart optimization routine for searching for the maximum log-likelihood. Total number of optimizations will be n_restarts_optimizer+1. Returns ------- gp : GaussianProcessRegressor """ # ******* Generate kernel ******* # ConstantKernel = c multiplies *all* elements of kernel matrix by c # If you want to specify sigma_f (where c=sigma_f^2) then use # sigma_f^2 and bounds (sigma_flow^2, sigma_fhigh^2) # WhiteKernel = c \delta_{ij} multiplies *diagonal* elements by c # If you want to specify sigma_n (where c=sigma_n^2) then use # sigma_n^2 and bounds (sigma_nlow^2, sigma_nhigh^2) # radial part uses the length scales [l_0, l_1, ...] not [l_0^2, l_1^2, ...] # Constant and noise term if fixed == True: const = ConstantKernel(hp0[0]**2) noise = WhiteKernel(hp0[-1]**2) elif fixed == False: const = ConstantKernel(hp0[0]**2, hyper_limits[0]**2) noise = WhiteKernel(hp0[-1]**2, hyper_limits[-1]**2) else: raise Exception, "'fixed' must be True or False." # Radial term if fixed == True: if kernel_type == 'squaredexponential': radial = RBF(hp0[1:-1]) elif kernel_type == 'matern32': radial = Matern(hp0[1:-1], nu=1.5) elif kernel_type == 'matern52': radial = Matern(hp0[1:-1], nu=2.5) else: raise Exception, "Options for kernel_type are: 'squaredexponential', 'matern32', 'matern52'." elif fixed == False: if kernel_type == 'squaredexponential': radial = RBF(hp0[1:-1], hyper_limits[1:-1]) elif kernel_type == 'matern32': radial = Matern(hp0[1:-1], hyper_limits[1:-1], nu=1.5) elif kernel_type == 'matern52': radial = Matern(hp0[1:-1], hyper_limits[1:-1], nu=2.5) else: raise Exception, "Options for kernel_type are: 'squaredexponential', 'matern32', 'matern52'." else: raise Exception, "'fixed' must be True or False." kernel = const * radial + noise # ******* Initialize GaussianProcessRegressor and optimize hyperparameters if not fixed ******* if fixed == True: gp = GaussianProcessRegressor(kernel=kernel, optimizer=None) # Supply the points and data, but don't optimize the hyperparameters gp.fit(points, data) return gp elif fixed == False: gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer) # Optimize the hyperparameters by maximizing the log-likelihood gp.fit(points, data) return gp else: raise Exception, "'fixed' must be True or False."
def HGPfunc(x, y, y_gn, plot, h1low, h1high, h2low, h2high, h1low_z, h1high_z, h2low_z, h2high_z): y = y.reshape(-1, 1) y_gn = y_gn.reshape(-1, 1) x = x.reshape(-1, 1) if plot: plt.plot(x, y, '+') plt.xlabel("Pch (dBm)") plt.ylabel("SNR (dB)") plt.savefig('Adataset.png', dpi=200) plt.show() n = np.size(x) scaler = StandardScaler().fit(y) y = scaler.transform(y) #scaler_gn = StandardScaler().fit(y_gn) #y_gn = scaler_gn.transform(y_gn) def sqexp(X, Y, k1, k2): X = np.atleast_2d(X) if Y is None: dists = pdist(X / k2, metric='sqeuclidean') K = np.exp(-.5 * dists) # convert from upper-triangular matrix to square matrix K = squareform(K) np.fill_diagonal(K, 1) # return gradient K_gradient = (K * squareform(dists))[:, :, np.newaxis] #K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \ # anisotropic case, see https://github.com/scikit-learn/scikit-learn/blob/95d4f0841d57e8b5f6b2a570312e9d832e69debc/sklearn/gaussian_process/kernels.py # / (k2 ** 2) #K_gradient *= K[..., np.newaxis] return k1 * K, K_gradient else: dists = cdist(X / k2, Y / k2, metric='sqeuclidean') K = np.exp(-.5 * dists) return k1 * K # heteroscedastic versions of functions global Kyinvh Kyinvh = 0.0 global Kfh Kfh = 0.0 def lmlh(params, y, R, y_gn): #print(params) # show progress of fit [k1, k2] = params global Kfh Kfh = sqexp(x, None, k1, k2**0.5)[0] #print(np.size(Kfh)) Ky = Kfh + R # calculate initial kernel with noise global Kyinvh Kyinvh = inv(Ky) return -(-0.5 * mul(mul(T(y), Kyinvh), y) - 0.5 * np.log( (det(Ky))) - 0.5 * n * np.log(2 * np.pi)) + -( -0.5 * mul(mul(T(y_gn), Kyinvh), y_gn) - 0.5 * np.log( (det(Ky))) - 0.5 * n * np.log(2 * np.pi) ) # marginal likelihood - (5.8) def lmlgh(params, y, R, y_gn): k1, k2 = params al = mul(Kyinvh, y) al_gn = mul(Kyinvh, y_gn) dKdk1 = Kfh * (1 / k1) dKdk2 = sqexp(x, None, k1, k2**0.5)[1].reshape(n, n) lmlg1 = -(0.5 * np.trace(mul(mul(al, T(al)) - Kyinvh, dKdk1))) + -( 0.5 * np.trace(mul(mul(al_gn, T(al_gn)) - Kyinvh, dKdk1))) lmlg2 = -(0.5 * np.trace(mul(mul(al, T(al)) - Kyinvh, dKdk2))) + -( 0.5 * np.trace(mul(mul(al_gn, T(al_gn)) - Kyinvh, dKdk2))) return np.ndarray((2, ), buffer=np.array([lmlg1, lmlg2]), dtype=float) def GPRfith(xs, k1, k2, R, Rs): Ky = sqexp(x, None, k1, k2**0.5)[0] + R Ks = sqexp(xs, x, k1, k2**0.5) Kss = sqexp(xs, None, k1, k2)[0] L = cholesky(Ky) al = solve(T(L), solve(L, y)) fmst = mul(Ks, al) varfmst = np.empty([n, 1]) for i in range(np.size(xs)): v = solve(L, T(Ks[:, i])) varfmst[i] = Kss[i, i] - mul(T(v), v) + Rs[i, i] lmlopt = -0.5 * mul(T(y), al) - np.trace( np.log(L)) - 0.5 * n * np.log(2 * np.pi) #return fmst, varfmst[::-1], lmlopt return fmst, varfmst, lmlopt def hypopth(y, numrestarts, R, y_gn): numh = 2 # number of hyperparameters in kernel function k1s4 = np.empty([numrestarts, 1]) k2s4 = np.empty([numrestarts, 1]) for i in range(numrestarts): #k1is4 = np.random.uniform(1e-2,1e3) #k2is4 = np.random.uniform(1e-1,1e3) k1is4 = np.random.uniform(h1low, h1high) k2is4 = np.random.uniform(h2low, h2high) kis4 = np.ndarray((numh, ), buffer=np.array([k1is4, k2is4]), dtype=float) s4res = minimize(lmlh, kis4, args=(y, R, y_gn), method='L-BFGS-B', jac=lmlgh, bounds=((h1low, h1high), (h2low, h2high)), options={'maxiter': 1e2}) step4res = [] if s4res.success: step4res.append(s4res.x) print("successful k1:" + str(k1is4)) print("successful k2: " + str(k2is4)) else: print("error " + str(k1is4)) print("error " + str(k2is4)) #raise ValueError(s4res.message) #k1is4 = np.random.uniform(1e-2,1e3) #k2is4 = np.random.uniform(2e-1,1e3) k1is4 = np.random.uniform(h1low, h1high) k2is4 = np.random.uniform(h2low, h2high) print("error in hypopth() - reinitialising hyperparameters") continue k1s4[i] = step4res[0][0] k2s4[i] = step4res[0][1] lmltest = [ lmlh([k1s4[i], k2s4[i]], y, R, y_gn) for i in range(numrestarts) ] #k1f = k1s4[np.argmin(lmltest)] #k2f = k2s4[np.argmin(lmltest)] k1f = k1s4[np.argmax(lmltest)] k2f = k2s4[np.argmax(lmltest)] #lml(params,y,sig) return k1f, k2f def hetloopSK(fmst, varfmst, numiters, numrestarts): s = 200 #k1is3, k2is3, k1is4,k2is4 = np.random.uniform(1e-2,1e2,4) MSE = np.empty([numiters, 1]) NLPD = np.empty([numiters, 1]) fmstf = np.empty([numiters, n]) varfmstf = np.empty([numiters, n]) lmloptf = np.empty([numiters, 1]) rf = np.empty([numiters, n]) i = 0 while i < numiters: breakwhile = False # Step 2: estimate empirical noise levels z #k1is4,k2is4 = np.random.uniform(1e-2,1e2,2) #k1is3, k1is4 = np.random.uniform(1e-2,1e2,2) #k2is3, k2is4 = np.random.uniform(1e-1,1e2,2) k1is3 = np.random.uniform(h1low_z, h1high_z, 1) k2is3 = np.random.uniform(h2low_z, h2high_z, 1) z = np.empty([n, 1]) for j in range(n): #np.random.seed() normdraw = normal(fmst[j], varfmst[j]**0.5, s).reshape(s, 1) z[j] = np.log((1 / s) * 0.5 * sum((y[j] - normdraw)**2)) if math.isnan(z[j]): # True for NaN values breakwhile = True break if breakwhile: print("Nan value in z -- skipping iter " + str(i)) i = i + 1 continue # Step 3: estimate GP2 on D' - (x,z) kernel2 = C(k1is3, (h1low_z, h1high_z)) * RBF(k2is3, (h2low_z, h2high_z)) gpr2 = GaussianProcessRegressor(kernel=kernel2, n_restarts_optimizer=numrestarts, normalize_y=False, alpha=np.var(z)) gpr2.fit(x, z) ystar2, sigma2 = gpr2.predict(x, return_std=True) sigma2 = (sigma2**2 + 1)**0.5 # Step 4: train heteroscedastic GP3 using predictive mean of G2 to predict log noise levels r r = exp(ystar2) R = r * np.identity(n) k1s4, k2s4 = hypopth(y, numrestarts, R, y_gn) # needs to be modified fmst4, varfmst4, lmlopt4 = GPRfith(x, k1s4, k2s4, R, R) # needs to be modified # test for convergence MSE[i] = (1 / n) * sum(((y - fmst4)**2) / np.var(y)) #NLPD[i] = sum([(1/n)*(-np.log(norm.pdf(x[j], fmst4[j], varfmst4[j]**0.5))) for j in range(n) ]) nlpdarg = np.zeros([n, 1]) #nlpdtest = np.zeros([n,1]) for k in range(n): nlpdarg[k] = -np.log10( norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5)) #nlpdtest[k] = norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5) #print("mean NLPD log arg " + str(nlpdtest) ) #test3[k] = -np.log(norm.pdf(x[k], fmst[k], varfmst[k]**0.5)) NLPD[i] = sum(nlpdarg) * (1 / n) print("MSE = " + str(MSE[i])) print("NLPD = " + str(NLPD[i])) print("finished iteration " + str(i + 1)) fmstf[i, :] = fmst4.reshape(n) varfmstf[i, :] = varfmst4.reshape(n) lmloptf[i] = lmlopt4 fmst = fmst4 varfmst = varfmst4 rf[i, :] = r.reshape(n) #k1is3 = k1s4 #k2is3 = k2s4 i = i + 1 return fmstf, varfmstf, lmloptf, MSE, rf, NLPD # , NLPD numiters = 10 numrestarts = 20 #kernel1 = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-3, 1e3)) + W(1.0, (1e-5, 1e5)) #gpr1 = GaussianProcessRegressor(kernel=kernel1, n_restarts_optimizer = 0, normalize_y=True) kernel1 = C(1.0, (h1low, h1high)) * RBF(1.0, (h2low, h2high)) gpr1 = GaussianProcessRegressor(kernel=kernel1, n_restarts_optimizer=numrestarts, normalize_y=False, alpha=np.var(y)) gpr1.fit(x, y) ystar1, sigma1 = gpr1.predict(x, return_std=True) var1 = (sigma1**2 + np.var(y)) #sigma1 = np.reshape(sigma1,(np.size(sigma1), 1)) start_time = time.time() fmstf, varfmstf, lmlopt, mse, _, NLPD = hetloopSK(ystar1, var1, numiters, numrestarts) duration = time.time() - start_time ind = numiters - 1 #ind = fmst4 = fmstf[ind] varfmst4 = varfmstf[ind] sigs4 = varfmst4**0.5 fmstps4 = fmst4 + sigs4 fmst4i = scaler.inverse_transform(fmst4) fmstps4i = scaler.inverse_transform(fmstps4) print("HGP fitting duration: " + str(duration)) return fmst4i, fmstps4i, lmlopt, mse, NLPD
import numpy as np import pandas as pd from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import Matern from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PowerTransformer from tpot.builtins import ZeroCount from tpot.export_utils import set_param_recursive # NOTE: Make sure that the outcome column is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1) training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'], random_state=123) # Average CV score on the training set was: 0.9858937143431208 exported_pipeline = make_pipeline( ZeroCount(), PowerTransformer(), GaussianProcessRegressor(kernel=Matern(length_scale=2.9000000000000004, nu=1.5), n_restarts_optimizer=155, normalize_y=True)) # Fix random state for all the steps in exported pipeline set_param_recursive(exported_pipeline.steps, 'random_state', 123) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
def test_y_multioutput(): # Test that GPR can deal with multi-dimensional target values y_2d = np.vstack((y, y * 2)).T # Test for fixed kernel that first dimension of 2d GP equals the output # of 1d GP and that second dimension is twice as large kernel = RBF(length_scale=1.0) gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None, normalize_y=False) gpr.fit(X, y) gpr_2d = GaussianProcessRegressor(kernel=kernel, optimizer=None, normalize_y=False) gpr_2d.fit(X, y_2d) y_pred_1d, y_std_1d = gpr.predict(X2, return_std=True) y_pred_2d, y_std_2d = gpr_2d.predict(X2, return_std=True) _, y_cov_1d = gpr.predict(X2, return_cov=True) _, y_cov_2d = gpr_2d.predict(X2, return_cov=True) assert_almost_equal(y_pred_1d, y_pred_2d[:, 0]) assert_almost_equal(y_pred_1d, y_pred_2d[:, 1] / 2) # Standard deviation and covariance do not depend on output for target in range(y_2d.shape[1]): assert_almost_equal(y_std_1d, y_std_2d[..., target]) assert_almost_equal(y_cov_1d, y_cov_2d[..., target]) y_sample_1d = gpr.sample_y(X2, n_samples=10) y_sample_2d = gpr_2d.sample_y(X2, n_samples=10) assert y_sample_1d.shape == (5, 10) assert y_sample_2d.shape == (5, 2, 10) # Only the first target will be equal assert_almost_equal(y_sample_1d, y_sample_2d[:, 0, :]) # Test hyperparameter optimization for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr.fit(X, y) gpr_2d = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_2d.fit(X, np.vstack((y, y)).T) assert_almost_equal(gpr.kernel_.theta, gpr_2d.kernel_.theta, 4)
def test_warning_bounds(): kernel = RBF(length_scale_bounds=[1e-5, 1e-3]) gpr = GaussianProcessRegressor(kernel=kernel) warning_message = ( "The optimal value found for dimension 0 of parameter " "length_scale is close to the specified upper bound " "0.001. Increasing the bound and calling fit again may " "find a better value." ) with pytest.warns(ConvergenceWarning, match=warning_message): gpr.fit(X, y) kernel_sum = WhiteKernel(noise_level_bounds=[1e-5, 1e-3]) + RBF( length_scale_bounds=[1e3, 1e5] ) gpr_sum = GaussianProcessRegressor(kernel=kernel_sum) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) gpr_sum.fit(X, y) assert len(record) == 2 assert ( record[0].message.args[0] == "The optimal value found for " "dimension 0 of parameter " "k1__noise_level is close to the " "specified upper bound 0.001. " "Increasing the bound and calling " "fit again may find a better value." ) assert ( record[1].message.args[0] == "The optimal value found for " "dimension 0 of parameter " "k2__length_scale is close to the " "specified lower bound 1000.0. " "Decreasing the bound and calling " "fit again may find a better value." ) X_tile = np.tile(X, 2) kernel_dims = RBF(length_scale=[1.0, 2.0], length_scale_bounds=[1e1, 1e2]) gpr_dims = GaussianProcessRegressor(kernel=kernel_dims) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) gpr_dims.fit(X_tile, y) assert len(record) == 2 assert ( record[0].message.args[0] == "The optimal value found for " "dimension 0 of parameter " "length_scale is close to the " "specified lower bound 10.0. " "Decreasing the bound and calling " "fit again may find a better value." ) assert ( record[1].message.args[0] == "The optimal value found for " "dimension 1 of parameter " "length_scale is close to the " "specified lower bound 10.0. " "Decreasing the bound and calling " "fit again may find a better value." )
def test_gpr_fit_error(params, TypeError, err_msg): """Check that expected error are raised during fit.""" gpr = GaussianProcessRegressor(**params) with pytest.raises(TypeError, match=err_msg): gpr.fit(X, y)
def test_lml_precomputed(kernel): # Test that lml of optimized kernel is stored correctly. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) assert gpr.log_marginal_likelihood(gpr.kernel_.theta) == pytest.approx( gpr.log_marginal_likelihood() )
# ---------------------------------------------------------------------- # First the noiseless case X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T # Observations y = f(X).ravel() # Mesh the input space for evaluations of the real function, the prediction and # its MSE x = np.atleast_2d(np.linspace(0, 10, 1000)).T # Instantiate a Gaussian Process model kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) # Fit to data using Maximum Likelihood Estimation of the parameters gp.fit(X, y) # Make the prediction on the meshed x-axis (ask for MSE as well) y_pred, sigma = gp.predict(x, return_std=True) # Plot the function, the prediction and the 95% confidence interval based on # the MSE plt.figure() plt.plot(x, f(x), 'r:', label=r'$f(x) = x\,\sin(x)$') plt.plot(X, y, 'r.', markersize=10, label='Observations') plt.plot(x, y_pred, 'b-', label='Prediction') plt.fill(np.concatenate([x, x[::-1]]), np.concatenate(
class BayesianOptimization(Observable): """ This class takes the function to optimize as well as the parameters bounds in order to find which values for the parameters yield the maximum value using bayesian optimization. Parameters ---------- f: function Function to be maximized. pbounds: dict Dictionary with parameters names as keys and a tuple with minimum and maximum values. random_state: int or numpy.random.RandomState, optional(default=None) If the value is an integer, it is used as the seed for creating a numpy.random.RandomState. Otherwise the random state provieded it is used. When set to None, an unseeded random state is generated. verbose: int, optional(default=2) The level of verbosity. bounds_transformer: DomainTransformer, optional(default=None) If provided, the transformation is applied to the bounds. Methods ------- probe() Evaluates the function on the given points. Can be used to guide the optimizer. maximize() Tries to find the parameters that yield the maximum value for the given function. set_bounds() Allows changing the lower and upper searching bounds """ def __init__(self, f, pbounds, random_state=None, verbose=2, bounds_transformer=None): self._random_state = ensure_rng(random_state) # Data structure containing the function to be optimized, the bounds of # its domain, and a record of the evaluations we have done so far self._space = TargetSpace(f, pbounds, random_state) self._queue = Queue() # Internal GP regressor self._gp = GaussianProcessRegressor( kernel=Matern(nu=2.5), alpha=1e-6, normalize_y=True, n_restarts_optimizer=5, random_state=self._random_state, ) self._verbose = verbose self._bounds_transformer = bounds_transformer if self._bounds_transformer: if hasattr(self._bounds_transformer, "bounds")==False: #raise TypeError('test') try: self._bounds_transformer.initialize(self._space) except (AttributeError, TypeError): raise TypeError('The transformer must be an instance of ' 'DomainTransformer') super(BayesianOptimization, self).__init__(events=DEFAULT_EVENTS) @property def space(self): return self._space @property def max(self): return self._space.max() @property def res(self): return self._space.res() def register(self, params, target): """Expect observation with known target""" self._space.register(params, target) self.dispatch(Events.OPTIMIZATION_STEP) def probe(self, params, lazy=True): """ Evaluates the function on the given points. Useful to guide the optimizer. Parameters ---------- params: dict or list The parameters where the optimizer will evaluate the function. lazy: bool, optional(default=True) If True, the optimizer will evaluate the points when calling maximize(). Otherwise it will evaluate it at the moment. """ if lazy: self._queue.add(params) else: self._space.probe(params) self.dispatch(Events.OPTIMIZATION_STEP) def suggest(self, utility_function): """Most promising point to probe next""" if len(self._space) == 0: return self._space.array_to_params(self._space.random_sample()) # Sklearn's GP throws a large number of warnings at times, but # we don't really need to see them here. with warnings.catch_warnings(): warnings.simplefilter("ignore") self._gp.fit(self._space.params, self._space.target) # Finding argmax of the acquisition function. suggestion = acq_max( ac=utility_function.utility, gp=self._gp, y_max=self._space.target.max(), bounds=self._space.bounds, random_state=self._random_state ) return self._space.array_to_params(suggestion) def _prime_queue(self, init_points): """Make sure there's something in the queue at the very beginning.""" if self._queue.empty and self._space.empty: init_points = max(init_points, 1) for _ in range(init_points): self._queue.add(self._space.random_sample()) def _prime_subscriptions(self): if not any([len(subs) for subs in self._events.values()]): _logger = _get_default_logger(self._verbose) self.subscribe(Events.OPTIMIZATION_START, _logger) self.subscribe(Events.OPTIMIZATION_STEP, _logger) self.subscribe(Events.OPTIMIZATION_END, _logger) def maximize(self, init_points=5, n_iter=25, acq='ucb', kappa=2.576, kappa_decay=1, kappa_decay_delay=0, xi=0.0, **gp_params): """ Probes the target space to find the parameters that yield the maximum value for the given function. Parameters ---------- init_points : int, optional(default=5) Number of iterations before the explorations starts the exploration for the maximum. n_iter: int, optional(default=25) Number of iterations where the method attempts to find the maximum value. acq: {'ucb', 'ei', 'poi'} The acquisition method used. * 'ucb' stands for the Upper Confidence Bounds method * 'ei' is the Expected Improvement method * 'poi' is the Probability Of Improvement criterion. kappa: float, optional(default=2.576) Parameter to indicate how closed are the next parameters sampled. Higher value = favors spaces that are least explored. Lower value = favors spaces where the regression function is the highest. kappa_decay: float, optional(default=1) `kappa` is multiplied by this factor every iteration. kappa_decay_delay: int, optional(default=0) Number of iterations that must have passed before applying the decay to `kappa`. xi: float, optional(default=0.0) [unused] """ self._prime_subscriptions() self.dispatch(Events.OPTIMIZATION_START) self._prime_queue(init_points) self.set_gp_params(**gp_params) util = UtilityFunction(kind=acq, kappa=kappa, xi=xi, kappa_decay=kappa_decay, kappa_decay_delay=kappa_decay_delay) iteration = 0 while not self._queue.empty or iteration < n_iter: try: x_probe = next(self._queue) except StopIteration: util.update_params() x_probe = self.suggest(util) iteration += 1 self.probe(x_probe, lazy=False) if self._bounds_transformer: self.set_bounds( self._bounds_transformer.transform(self._space)) self.dispatch(Events.OPTIMIZATION_END) def set_bounds(self, new_bounds): """ A method that allows changing the lower and upper searching bounds Parameters ---------- new_bounds : dict A dictionary with the parameter name and its new bounds """ self._space.set_bounds(new_bounds) def set_gp_params(self, **params): """Set parameters to the internal Gaussian Process Regressor""" self._gp.set_params(**params)
train_labels = feature_matrices['train_labels'] train_values = feature_matrices['train_values'] train_targets = feature_matrices['train_targets'] test_labels = feature_matrices['test_labels'] test_values = feature_matrices['test_values'] test_targets = feature_matrices['test_targets'] n_features = train_values.shape[1] # Model Specific Information # ----------------------- lbound = 1e-2 rbound = 1e1 n_restarts = 25 kernel = C(1.0, (lbound, rbound)) * Matern(n_features * [1], (lbound, rbound), nu=2.5) gp = GPR(kernel=kernel, n_restarts_optimizer=n_restarts) gp.fit(train_values, train_targets) test_model, sigma2_pred_test = gp.predict(test_values, return_std=True) train_model, sigma2_pred_train = gp.predict(train_values, return_std=True) # ----------------------- # Undo normalization in FP generation test_model = np.multiply(test_model, std_target) + mean_target sigma2_pred_test = np.multiply(sigma2_pred_test, std_target) # GP Specific test_targets = np.multiply(test_targets, std_target) + mean_target train_model = np.multiply(train_model, std_target) + mean_target sigma2_pred_train = np.multiply(sigma2_pred_train, std_target) # GP Specific train_targets = np.multiply(train_targets, std_target) + mean_target
ran = RANSACRegressor() tsr = TheilSenRegressor(random_state=42) br = BayesianRidge(n_iter=300, tol=0.001) bgm = BayesianGaussianMixture() knr = KNeighborsRegressor(n_neighbors=5) rnr = RadiusNeighborsRegressor(radius=1.0) pls = PLSRegression(n_components=1) gnb = GaussianNB() mnb = MultinomialNB() svl = SVR(kernel='linear') svr = SVR() las = Lasso() en = ElasticNet() rr = Ridge() kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) estimators = { 'LR ': lr, 'DTR': dtr, 'RFR': rfr, 'OMP': omp, 'RAN': ran, 'BR ': br, 'BGM': bgm, 'KNR': knr, 'RNR': rnr, 'PLS': pls, 'SVL': svl, 'SVR': svr, 'LAS': las,
def test_y_normalization(kernel): """ Test normalization of the target values in GP Fitting non-normalizing GP on normalized y and fitting normalizing GP on unnormalized y should yield identical results. Note that, here, 'normalized y' refers to y that has been made zero mean and unit variance. """ y_mean = np.mean(y) y_std = np.std(y) y_norm = (y - y_mean) / y_std # Fit non-normalizing GP on normalized y gpr = GaussianProcessRegressor(kernel=kernel) gpr.fit(X, y_norm) # Fit normalizing GP on unnormalized y gpr_norm = GaussianProcessRegressor(kernel=kernel, normalize_y=True) gpr_norm.fit(X, y) # Compare predicted mean, std-devs and covariances y_pred, y_pred_std = gpr.predict(X2, return_std=True) y_pred = y_pred * y_std + y_mean y_pred_std = y_pred_std * y_std y_pred_norm, y_pred_std_norm = gpr_norm.predict(X2, return_std=True) assert_almost_equal(y_pred, y_pred_norm) assert_almost_equal(y_pred_std, y_pred_std_norm) _, y_cov = gpr.predict(X2, return_cov=True) y_cov = y_cov * y_std**2 _, y_cov_norm = gpr_norm.predict(X2, return_cov=True) assert_almost_equal(y_cov, y_cov_norm)
def hetloopSK(fmst, varfmst, numiters, numrestarts): s = 200 #k1is3, k2is3, k1is4,k2is4 = np.random.uniform(1e-2,1e2,4) MSE = np.empty([numiters, 1]) NLPD = np.empty([numiters, 1]) fmstf = np.empty([numiters, n]) varfmstf = np.empty([numiters, n]) lmloptf = np.empty([numiters, 1]) rf = np.empty([numiters, n]) i = 0 while i < numiters: breakwhile = False # Step 2: estimate empirical noise levels z #k1is4,k2is4 = np.random.uniform(1e-2,1e2,2) #k1is3, k1is4 = np.random.uniform(1e-2,1e2,2) #k2is3, k2is4 = np.random.uniform(1e-1,1e2,2) k1is3 = np.random.uniform(h1low_z, h1high_z, 1) k2is3 = np.random.uniform(h2low_z, h2high_z, 1) z = np.empty([n, 1]) for j in range(n): #np.random.seed() normdraw = normal(fmst[j], varfmst[j]**0.5, s).reshape(s, 1) z[j] = np.log((1 / s) * 0.5 * sum((y[j] - normdraw)**2)) if math.isnan(z[j]): # True for NaN values breakwhile = True break if breakwhile: print("Nan value in z -- skipping iter " + str(i)) i = i + 1 continue # Step 3: estimate GP2 on D' - (x,z) kernel2 = C(k1is3, (h1low_z, h1high_z)) * RBF(k2is3, (h2low_z, h2high_z)) gpr2 = GaussianProcessRegressor(kernel=kernel2, n_restarts_optimizer=numrestarts, normalize_y=False, alpha=np.var(z)) gpr2.fit(x, z) ystar2, sigma2 = gpr2.predict(x, return_std=True) sigma2 = (sigma2**2 + 1)**0.5 # Step 4: train heteroscedastic GP3 using predictive mean of G2 to predict log noise levels r r = exp(ystar2) R = r * np.identity(n) k1s4, k2s4 = hypopth(y, numrestarts, R, y_gn) # needs to be modified fmst4, varfmst4, lmlopt4 = GPRfith(x, k1s4, k2s4, R, R) # needs to be modified # test for convergence MSE[i] = (1 / n) * sum(((y - fmst4)**2) / np.var(y)) #NLPD[i] = sum([(1/n)*(-np.log(norm.pdf(x[j], fmst4[j], varfmst4[j]**0.5))) for j in range(n) ]) nlpdarg = np.zeros([n, 1]) #nlpdtest = np.zeros([n,1]) for k in range(n): nlpdarg[k] = -np.log10( norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5)) #nlpdtest[k] = norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5) #print("mean NLPD log arg " + str(nlpdtest) ) #test3[k] = -np.log(norm.pdf(x[k], fmst[k], varfmst[k]**0.5)) NLPD[i] = sum(nlpdarg) * (1 / n) print("MSE = " + str(MSE[i])) print("NLPD = " + str(NLPD[i])) print("finished iteration " + str(i + 1)) fmstf[i, :] = fmst4.reshape(n) varfmstf[i, :] = varfmst4.reshape(n) lmloptf[i] = lmlopt4 fmst = fmst4 varfmst = varfmst4 rf[i, :] = r.reshape(n) #k1is3 = k1s4 #k2is3 = k2s4 i = i + 1 return fmstf, varfmstf, lmloptf, MSE, rf, NLPD # , NLPD
def test_no_optimizer(): # Test that kernel parameters are unmodified when optimizer is None. kernel = RBF(1.0) gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None).fit(X, y) assert np.exp(gpr.kernel_.theta) == 1.0
def Regression(train_data, train_solution, test_data, test_solution, method): ## Fix Data Structure ## train_data = train_data.values train_solution = train_solution.values test_data = test_data.values test_solution = test_solution.values ## List of Method Options with Initialization ## if method == 'lin_reg': # linear regression from sklearn.linear_model import LinearRegression reg = LinearRegression() elif method == 'ply_reg': # polynomial regression from sklearn.linear_model import LinearRegression reg = LinearRegression() poly_features = PolynomialFeatures(degree=2) elif method == 'rdg_reg': # ridge regression from sklearn.linear_model import Ridge reg = Ridge() elif method == 'lso_reg': # lasso regression from sklearn.linear_model import Lasso reg = Lasso(alpha=0.00001) elif method == 'ela_net': # elastic net regression from sklearn.linear_model import ElasticNet reg = ElasticNet() elif method == 'svr_lin': # SVM regression from sklearn.svm import LinearSVR reg = LinearSVR(epsilon=0.01, max_iter=10000) elif method == 'svr_2nd': # SVR regression from sklearn.svm import SVR reg = SVR(kernel='poly', degree=2, epsilon=0.01) #C=100 elif method == 'svr_3rd': # SVR regression from sklearn.svm import SVR reg = SVR(kernel='poly', degree=3, epsilon=0.01) #C=100 elif method == 'dcn_tre': # decision tree from sklearn.tree import DecisionTreeRegressor reg = DecisionTreeRegressor() elif method == 'rdm_for': # random forests from sklearn.ensemble import RandomForestRegressor reg = RandomForestRegressor(n_estimators=100, random_state=3) elif method == 'ada_bst': # AdaBoost Regressor from sklearn.ensemble import AdaBoostRegressor reg = AdaBoostRegressor(n_estimators=100, random_state=3) elif method == 'grd_bst': # Gradient Boosting Regressor from sklearn.ensemble import GradientBoostingRegressor reg = GradientBoostingRegressor(random_state=3) elif method == 'gss_prc': # Gaussian Process Regressor from sklearn.gaussian_process import GaussianProcessRegressor reg = GaussianProcessRegressor(random_state=3) elif method == 'knl_rdg': # Kernel Ridge Regression from sklearn.kernel_ridge import KernelRidge reg = KernelRidge() elif method == 'nst_nbr_uni': # K Nearest Neighbors Regressor from sklearn.neighbors import KNeighborsRegressor reg = KNeighborsRegressor(weights='uniform') elif method == 'nst_nbr_dst': # K Nearest Neighbors Regressor from sklearn.neighbors import KNeighborsRegressor reg = KNeighborsRegressor(weights='distance') elif method == 'rad_nbr_uni': # Radius Neighbor Regressor from sklearn.neighbors import RadiusNeighborsRegressor reg = RadiusNeighborsRegressor(weights='uniform') elif method == 'rad_nbr_dst': # Radius Neighbor Regressor from sklearn.neighbors import RadiusNeighborsRegressor reg = RadiusNeighborsRegressor(weights='distance') elif method == 'mlp_reg': from sklearn.neural_network import MLPRegressor reg = MLPRegressor(random_state=3) else: print( 'Error: Regression method not recognized.\nPlease pick a valid method key (example: xxx_xxx).' ) ## Preprocessing and Setup ## from sklearn.preprocessing import StandardScaler scaler = StandardScaler() data = scaler.fit_transform(train_data) scaler = StandardScaler() test_data = scaler.fit_transform(test_data) solution = train_solution.reshape(-1, ) if method == 'ply_reg': data = poly_features.fit_transform(data) reg.fit(data, solution) if len(test_data) < 5: predictions = reg.predict(data) elif len(test_data) > 5: if method == 'ply_reg': test_data = poly_features.transform(test_data) test_solution = test_solution.reshape(-1, ) predictions_test = reg.predict(test_data) solution = test_solution predictions = predictions_test else: print('Error: test_set undetermined.') Matrix_to_save = pd.DataFrame() Matrix_to_save['Solution'] = solution Matrix_to_save['Predictions'] = predictions return Matrix_to_save
# Find the best optimum by starting from n_restart different random points. for x0 in np.random.uniform(bounds[:, 0], bounds[:, 1], size=(25, dim)): res = minimize(min_obj, x0=x0, bounds=bounds, method='L-BFGS-B') if res.fun < 1: min_val = res.fun[0] min_x = res.x X_next = min_x.reshape(-1, 1) return X_next X_init = np.array([[-0.9], [1.1]]) y_init = f(X_init) kernel = ConstantKernel(1.0) + RBF(length_scale=2.0, length_scale_bounds=(0, 10)) gpr = GaussianProcessRegressor(kernel=kernel, random_state=42).fit(X_init, y_init) forest = RandomForestRegressor(n_estimators=100, random_state=42, oob_score=True) X_init = np.linspace(-2, 10, 10).reshape(-1, 1) y_init = f(X_init) for i in range(2): forest.fit(X_init[i:10], y_init[i:10]) y_mean = forest.predict(X) plt.plot(X_init, y_init, "ro", label="Initial samples") plt.plot(X, y_mean, label="Surrogate model") plt.plot(X, f(X), label="Objective")
kdt = KDTree(Otrain, leaf_size=100, metric='euclidean') with open(path + 'kdt_P' + str(l_prior) + '.pkl', 'wb') as f: pickle.dump(kdt, f) else: with open(path + 'kdt_P' + str(l_prior) + '.pkl', 'rb') as f: kdt = pickle.load(f) K = 10 kernel = RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)) i = 0 T = [] Err = [] import time for e, o in zip(Etest, Otest): # print i # print o st = time.time() idx = kdt.query(o[:d].reshape(1,-1), k = K, return_distance=False) O_nn = Otrain[idx,:].reshape(K, d) E_nn = Etrain[idx].reshape(K, 1) gpr = GaussianProcessRegressor(kernel=kernel).fit(O_nn, E_nn) e_mean = gpr.predict(o.reshape(1, -1), return_std=False)[0][0] T.append(time.time() - st) Err.append(np.abs(e-e_mean)) # print e, e_mean, np.abs(e-e_mean), o[-1] if i >=0: print e, e_mean i += 1 print "Time: " + str(np.mean(T)) print "Error: " + str(np.mean(Err))
if opts.analysis_type == "measured": param_array = np.vstack((color,dmdt,dmdti)).T elif opts.analysis_type == "inferred": param_array = np.vstack((mej,vej,Xlan)).T elif opts.analysis_type == "inferred_bulla": param_array = np.vstack((mej,phi,theta)).T param_array_postprocess = np.array(param_array) param_mins, param_maxs = np.min(param_array_postprocess,axis=0),np.max(param_array_postprocess,axis=0) for i in range(len(param_mins)): param_array_postprocess[:,i] = (param_array_postprocess[:,i]-param_mins[i])/(param_maxs[i]-param_mins[i]) nsvds, nparams = param_array_postprocess.shape kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1) gp = GaussianProcessRegressor(kernel=kernel,n_restarts_optimizer=0,alpha=1.0) gp.fit(param_array_postprocess, Mag) M, sigma2_pred = gp.predict(np.atleast_2d(param_array_postprocess), return_std=True) sigma_best = np.median(sigma2_pred) sigma = sigma_best*np.ones(M.shape) elif opts.fit_type == "linear": if opts.analysis_type == "combined": parameters = ["K","alpha","beta","gamma","delta","zeta","sigma"] labels = [r'K', r'$\alpha$', r'$\beta$', r'$\gamma$', r"$\delta$",r"$\zeta$",r'$\sigma$'] n_params = len(parameters) pymultinest.run(myloglike_combined, myprior_combined, n_params, importance_nested_sampling = False, resume = True, verbose = True, sampling_efficiency = 'parameter', n_live_points = n_live_points, outputfiles_basename='%s/2-'%plotDir, evidence_tolerance = evidence_tolerance, multimodal = False, max_iter = max_iter)
ConstantKernel() * Matern(nu=2.5) + WhiteKernel(), ConstantKernel() * Matern(nu=2.5) + WhiteKernel() + ConstantKernel() * DotProduct() ] # オートスケーリング autoscaled_y_train = (y_train - y_train.mean()) / y_train.std() autoscaled_x_train = (x_train - x_train.mean()) / x_train.std() # クロスバリデーションによるカーネル関数の最適化 cross_validation = KFold(n_splits=fold_number, random_state=9, shuffle=True) # クロスバリデーションの分割の設定 r2cvs = [] # 空の list。カーネル関数ごとに、クロスバリデーション後の r2 を入れていきます for index, kernel in enumerate(kernels): print(index + 1, '/', len(kernels)) model = GaussianProcessRegressor(alpha=0, kernel=kernel) estimated_y_in_cv = np.ndarray.flatten( cross_val_predict(model, autoscaled_x_train, autoscaled_y_train, cv=cross_validation)) estimated_y_in_cv = estimated_y_in_cv * y_train.std( ddof=1) + y_train.mean() r2cvs.append(r2_score(y_train, estimated_y_in_cv)) optimal_kernel_number = np.where( r2cvs == np.max(r2cvs))[0][0] # クロスバリデーション後の r2 が最も大きいカーネル関数の番号 optimal_kernel = kernels[optimal_kernel_number] # クロスバリデーション後の r2 が最も大きいカーネル関数 print('クロスバリデーションで選択されたカーネル関数の番号 :', optimal_kernel_number) print('クロスバリデーションで選択されたカーネル関数 :', optimal_kernel) # モデル構築
class CVaR(): """CVaR/VaR surrogate p: function handle for distribution p takes in an integer number of points and returns a 2D array of vectors beta: confidence level for CVaR/VaR num_points_MC: number of points used in Monte Carlo """ def __init__(self,kernel,p,beta = 0.95, num_points_MC = 1000): self.dim = 0; # input data dimension self.X = None; # data points self.fX = None; # function evals self.GP = GPR(kernel=kernel) # gaussian process self.p = p # pdf for U self.beta = beta # confidence level for CVaR self.num_points_MC = num_points_MC # number of points for monte carlo # "fit" a GP to the data def fit(self, X,fX): # update data self.X = X; self.fX = fX; self.dim = X.shape[1] # fit sklearn GP self.GP.fit(X,fX) def predict(self, xx, std = False): """predict C(x) = min_alpha G_beta(x,alpha) xx: 2D array of points std: Bool """ if std == True: print('') print("ERROR: CVaR has no variance") quit() # storage N = np.shape(xx)[0] C = np.zeros(N) # for each x in xx calculate C(x) for i in range(N): # f(x+U) with Monte Carlo on surrogate U = self.p(self.num_points_MC) S = self.GP.predict(xx[i]+U) # sort S in ascending order S.sort() # compute the index of the minimizer I = int(np.ceil(self.num_points_MC*self.beta)) # minimizer VaR = S[I] # CVaR C[i] = S[I] + np.sum(S[I+1:]-S[I])/(1.-self.beta)/self.num_points_MC return C def update(self, xx,yy): """ update gp with new points """ self.X = np.vstack((self.X,xx)) self.fX = np.concatenate((self.fX,[yy])) self.fit(self.X,self.fX)
# In[ ]: sub_knn.head(10) # In[ ]: # In[ ]: # In[ ]: from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import DotProduct, ConstantKernel gpr = GaussianProcessRegressor(random_state=5, alpha=5e-9, n_restarts_optimizer=0, optimizer='fmin_l_bfgs_b', copy_X_train=True) param_grid = { 'normalize_y': [True, False], 'kernel': [DotProduct(), ConstantKernel(1.0, (1e-3, 1e3))] } grid_gpr = GridSearchCV(gpr, param_grid, cv=nr_cv, verbose=1, scoring=score_calc) grid_gpr.fit(X_sc, y_sc)
class GP(Estimator): """Wrapper class for the Gaussian Proccess. Uses the Sklearn implementation of the Gaussian Process. """ def __init__( self, kernel=C(constant_value=10, constant_value_bounds=(1, 1000)) * RBF(length_scale=1, length_scale_bounds=(1e-3, 2)), alpha=1e-7, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=10, normalize_y=True, copy_X_train=True, random_state=None, std_min=0., kernel_once=False, ): super().__init__() self.std_min = std_min self.kernel = kernel self.gp = GaussianProcessRegressor( kernel=kernel.kernel, alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, normalize_y=normalize_y, copy_X_train=copy_X_train, random_state=random_state) self.kernel_once = kernel_once @ignore_warnings(category=ConvergenceWarning) def get_mean(self, samples_x: np.ndarray, samples_y: np.ndarray, test_x: np.ndarray) -> np.ndarray: """ runs the gp estimator to fit the data and evaluate the mean estimate for the test data :param samples_x: All input values of the samples :param samples_y: All target values of the samples :param test_x: test inputs to evaluate the means on :return: mean predictions for the test x """ old_stdout = sys.stdout # backup current stdout sys.stdout = open(os.devnull, "w") self.gp.fit(samples_x, samples_y) sys.stdout = old_stdout return self.gp.predict(test_x) @ignore_warnings(category=ConvergenceWarning) def get_mean_and_cov(self, samples_x: np.ndarray, samples_y: np.ndarray, test_x: np.ndarray) -> List[np.ndarray]: """ runs the gp estimator to fit the data and evaluate the mean estimate and covariance for the test data :param samples_x: All input values of the samples :param samples_y: All target values of the samples :param test_x: test inputs to evaluate the means on :return: mean and covariance for the test x """ old_stdout = sys.stdout # backup current stdout sys.stdout = open(os.devnull, "w") self.gp.fit(samples_x, samples_y) sys.stdout = old_stdout mean, cov = self.gp.predict(test_x, return_cov=True) return [mean, cov] @ignore_warnings(category=ConvergenceWarning) def fit(self, samples_x: np.ndarray, samples_y: np.ndarray) -> 'NoReturn': """fit the the given sample data. :param samples_x: All input values of the samples :param samples_y: All target values of the samples """ params = None if not self.context.bo_step == 0 and self.kernel_once: params = self.gp.kernel_.get_params() params = self.gp.kernel.fix_params(params=params) self.gp.kernel = self.gp.kernel.set_params(**params) self.gp.kernel = self.gp.kernel_.set_params(**params) old_stdout = sys.stdout # backup current stdout sys.stdout = open(os.devnull, "w") self.gp.fit(samples_x, samples_y) sys.stdout = old_stdout if self.context.inspector and self.context.inspector.store_estimators: path = "{}/{}/{}".format(self.context.inspector.inspector_path, self.__class__.__name__, self.context.bo_step) os.makedirs(path, exist_ok=True) with open(path + "/gp.pickle", "wb") as f: pickle.dump(self.gp, f) @ignore_warnings(category=ConvergenceWarning) def estimate(self, samples_x: np.ndarray, samples_y: np.ndarray, test_x: np.ndarray, inspect: bool = True) -> Tuple[np.array, np.array]: """train the underlying model with the given samples and then get the estimation for mu and sigma for the test data :param samples_x: input values of the samples :param samples_y: target values of the samples :param test_x: data to estimate, for which mu and sigma should be calculated :param inspect: should the data be stored in the inspector :return: mu and sigma values for the test data """ start_time = datetime.now() old_stdout = sys.stdout # backup current stdout sys.stdout = open(os.devnull, "w") self.gp.fit(samples_x, samples_y) time_elapsed = datetime.now() - start_time mean, sigma = self.regress(test_x) sys.stdout = old_stdout if inspect: self._inspect(mean, sigma, time_elapsed) self._inspect_on_test_data() return mean, sigma @ignore_warnings(category=ConvergenceWarning) def regress(self, test_x: np.ndarray) -> [np.ndarray, np.ndarray]: """only get the estimation for mu and sigma for the test data. Assumes that the underlying model is already trained :param test_x: data to estimate, for which mu and sigma should be calculated :return: mu and sigma values for the test data """ mus, sigmas = self.gp.predict(test_x, return_std=True) return mus, np.array([[x + self.std_min] for x in sigmas]) @ignore_warnings(category=ConvergenceWarning) def _inspect(self, mu: np.ndarray, sigma: np.ndarray, time_elapsed: int) -> NoReturn: """create a dictionary containing various interesting information for inspection :param mu: estimated mu :param sigma: estimated sigma :param time_elapsed: time spent for the estimation """ if self.context.inspector and self.context.inspector.inspect_estimation: inspection_data = { "estimator": self.__class__.__name__, "final_mu": mu, "final_sigma": sigma, "time_elapsed": time_elapsed, "samples_x": np.copy(self.context.samples_x), "samples_y": np.copy(self.context.samples_y), } self.context.inspector.add_estimation(inspection_data) if self.context.inspector and self.context.inspector.store_estimators: path = "{}/{}/{}".format(self.context.inspector.inspector_path, self.__class__.__name__, self.context.bo_step) os.makedirs(path, exist_ok=True) with open(path + "/gp.pickle", "wb") as f: pickle.dump(self.gp, f) @ignore_warnings(category=ConvergenceWarning) def _inspect_on_test_data(self) -> NoReturn: """run the estimator on syntetic test data :return: """ if self.context.inspector and self.context.inspector.estimate_test_data: inspection_data = { "estimator": self.__class__.__name__, "final_mu": None, "final_sigma": None, "final_acq": None, "samples_x": np.copy(self.context.samples_x), "samples_y": np.copy(self.context.samples_y), } mus, sigmas = self.regress(self.context.inspector.test_x) inspection_data["final_mu"] = mus inspection_data["final_sigma"] = sigmas inspection_data["final_acq"] = self.context.acq.evaluate( mus, sigmas, np.max(self.context.samples_y), inspect=False) self.context.inspector.add_estimation_test_data(inspection_data) """ @staticmethod def get_inspector_mu_on_test_data(context, step): return context.inspector.estimations_on_test_data[step]["final_mu"] @staticmethod def get_inspector_sigma_on_test_data(context, step): return context.inspector.estimations_on_test_data[step]["final_sigma"] @staticmethod def get_inspector_samples_x_on_test_data(context, step): print(context.inspector.estimations_on_test_data) return context.inspector.estimations_on_test_data[step]["samples_x"] @staticmethod def get_inspector_samples_y_on_test_data(context, step): return context.inspector.estimations_on_test_data[step]["samples_y"] @staticmethod def get_inspector_acq_on_test_data(context, step): return context.inspector.estimations_on_test_data[step]["final_acq"] """ def load_model(self, base_path: str, step: int = None, structured: bool = False) -> NoReturn: """ Load saved model :param base_path: basepath of the save model file :param step: step number, needed for stuctured loading :param structured: specify if file is loaded from the base_path or from base_path/GP/step/gp.pickle :return: """ if not structured: with open(base_path, "rb") as f: self.gp = pickle.load(f) else: path = "{}/{}/{}".format(base_path, self.__class__.__name__, step) with open(path + "/gp.pickle", "rb") as f: self.gp = pickle.load(f) @staticmethod def read_from_config(config: 'ConfigObj') -> NoReturn: """read the config file and construct the GP instance accordingly :param config: config object defining the object :return: """ kernel = Kernel.read_from_config(config["Kernel"]) return GP(kernel=kernel, alpha=config.as_float("alpha"), optimizer=config["optimizer"], n_restarts_optimizer=config.as_int("n_restarts_optimizer"), normalize_y=config.as_bool("normalize_y"), copy_X_train=config.as_bool("copy_X_train"), random_state=config_list_int_or_none(config, "random_state"), std_min=config.as_float("std_min"), kernel_once=config.as_bool("kernel_once"))
x, y, test_size=number_of_test_samples, random_state=0) #index = np.argsort(boston.target) #y = boston.target[index] #x = boston.data[index, :] #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=number_of_test_samples, shuffle=False) # autoscaling autoscaled_x_train = (x_train - x_train.mean(axis=0)) / x_train.std(axis=0, ddof=1) autoscaled_y_train = (y_train - y_train.mean()) / y_train.std(ddof=1) autoscaled_x_test = (x_test - x_train.mean(axis=0)) / x_train.std(axis=0, ddof=1) # Gaussian process regression model = GaussianProcessRegressor(ConstantKernel() * RBF() + WhiteKernel(), alpha=0) model.fit(autoscaled_x_train, autoscaled_y_train) # AD ad = ApplicabilityDomain(method_name=method_name, rate_of_outliers=rate_of_outliers) ad.fit(autoscaled_x_train) # calculate y in training data calculated_y_train = model.predict(autoscaled_x_train) * y_train.std( ddof=1) + y_train.mean() # yy-plot plt.rcParams['font.size'] = 18 # 横軸や縦軸の名前の文字などのフォントのサイズ plt.figure(figsize=figure.figaspect(1)) plt.scatter(y_train, calculated_y_train, c='blue') y_max = np.max(np.array([np.array(y_train), calculated_y_train]))
#~ X = root2array('../no_truecc_cut_stride2_offset0.root', #~ branches='recotrklenact', #~ selection='mustopz<1275&&isnumucc==1', #~ step=scaledown).reshape(-1,1) #~ y = root2array('../no_truecc_cut_stride2_offset0.root', #~ branches='trueemu', #~ selection='mustopz<1275&&isnumucc==1', #~ step=scaledown) scaledown = 50 X = joblib.load( '../../svm/muon/outlier_removed_data/muon_trklen_active_step{}neighbor50.pkl' .format(scaledown)) y = joblib.load( '../../svm/muon/outlier_removed_data/muon_truee_active_step{}neighbor50.pkl' .format(scaledown)) # rescale the regressors scaler = preprocessing.StandardScaler().fit(X) # fit the model gp = GaussianProcessRegressor(kernel=RBF(), n_restarts_optimizer=1) Xnorm = scaler.transform(X) gp.fit(Xnorm, y) # get prediction y_pred = gp.predict(Xnorm) # plot fig = plt.figure() np.histogram2d(y, X)
def __init__(self,RiskKernel): self.dim = 0; # input data dimension self.X = None; # data points self.fX = None; # function evals self.RiskKernel = RiskKernel; self.GP = GPR(kernel=RiskKernel.GPkernel)
def show_landscape_gp(result, plan_keys, itr=-1, fix_param=None, log_scale=False): """ :param result: a OptimizerResult object :param plan_keys: list, should contain two names of parameters, e.g. ['Jz', 'Jx'] :param itr: int, default -1, the landscape at which iteration to plot :param fix_param: dict or None, default: None specify the parameter value of parameter not in plan_keys, e.g. {'Jy': 1} if None, it will be automatically set to optimal parameter values :param log_scale: bool, default: False, if True, the colorbar will be in logarithmic scale :return: None """ assert isinstance(result, OptimizerResult) bounds = [ result.parameter_space[plan_keys[0]], result.parameter_space[plan_keys[1]] ] xs = np.linspace(bounds[0][0], bounds[0][1], 100) ys = np.linspace(bounds[1][0], bounds[1][1], 100) def get_param(xi, yi): param_names = list(result.parameter_space.keys()) p_point = np.zeros(len(result.parameter_space)) if len(plan_keys) == len(result.parameter_space): return np.array([xi, yi]) elif fix_param: i = 0 for param_name in param_names: if param_name == plan_keys[0]: p_point[i] = xi elif param_name == plan_keys[1]: p_point[i] = yi else: p_point[i] = fix_param[param_name] i += 1 else: i = 0 for param_name in param_names: if param_name == plan_keys[0]: p_point[i] = xi elif param_name == plan_keys[1]: p_point[i] = yi else: p_point[i] = result.BO_record[itr].max['params'][ param_name] i += 1 return p_point X = np.vstack([ np.array([ result.parameter_record[i][list(result.parameter_space.keys())[j]] for j in range(len(result.parameter_space)) ]) for i in range(len(result.loss_record)) ]) Y = result.loss_record GP = GaussianProcessRegressor( kernel=Matern(nu=2.5, length_scale_bounds=(1e-05, 1000)), alpha=1e-6, optimizer='fmin_l_bfgs_b', normalize_y=True, n_restarts_optimizer=200, ) # GP.fit(X, np.power(10, -Y)) GP.fit(X, Y) predict_values = np.vstack( [GP.predict(np.array([get_param(xi, yi) for xi in xs])) for yi in ys]) fig, ax = plt.subplots(figsize=[5, 5]) predict_values[np.where(predict_values < 0)] = 1e-3 # predict_values = -predict_values if log_scale: ctf = ax.contourf(xs, ys, predict_values, cmap=plt.cm.gnuplot_r, norm=colors.LogNorm(vmin=predict_values.min(), vmax=predict_values.max()), levels=np.power( 10, np.linspace(np.log10(predict_values.min()), np.log10(predict_values.max()), 100))) else: ctf = ax.contourf(xs, ys, predict_values, cmap=plt.cm.gnuplot_r, levels=100) fig.colorbar(ctf) ax.set_xlabel(plan_keys[0]) ax.set_ylabel(plan_keys[1]) return fig, ax