def _generate_gaussian_process_sample(self): if 'length_scale' in self.params: if isinstance(self.params['length_scale'], tuple): min_ls = self.params['length_scale'][0] max_ls = self.params['length_scale'][1] ls = min_ls + (max_ls - min_ls) * np.random.rand() else: ls = self.params['length_scale'] else: ls = 1.0 if 'kernel' in self.params: if self.params['kernel'] == 'RBF': kernel = kernels.RBF(length_scale=ls) elif self.params['kernel'] == 'Matern': kernel = kernels.Matern(length_scale=ls) else: raise Exception('unknown kernel') else: kernel = kernels.RBF(length_scale=ls) gpr = GaussianProcessRegressor(kernel=kernel) X = np.zeros((1, self.domain_dimension)) y = np.zeros(1) gpr.fit(X, y) points = np.random.rand(self.sampling_points_count, self.domain_dimension) values = gpr.sample_y(points, random_state=np.random.randint(100000)) return points, values
def bo_(x_obs, y_obs): kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, y_obs) xs = list(repeat(np.atleast_2d(np.linspace(0, 10, 128)).T, 2)) x = cartesian_product(*xs) a = a_EI(gp, x_obs=x_obs, y_obs=y_obs) argmin_a_x = x[np.argmax(a(x))] # heavy evaluation print("f({})".format(argmin_a_x)) f_argmin_a_x = f2d(np.atleast_2d(argmin_a_x)) plot_2d(gp, x_obs, y_obs, argmin_a_x, a, xs) plt.show() bo_( x_obs=np.vstack((x_obs, argmin_a_x)), y_obs=np.hstack((y_obs, f_argmin_a_x)), )
def cov_function_sklearn(params, nu=5 / 2): """Generates a default covariance function. Args: params: A dictionary with GP hyperparameters. nu: Degree of the matern kernel. Returns: cov_fun: an ARD Matern covariance function with diagonal noise for numerical stability. """ amplitude = params['amplitude'] noise = params['noise'] lengthscale = params['lengthscale'].flatten() amplitude_bounds = PARAMS_BOUNDS['amplitude'] lengthscale_bounds = PARAMS_BOUNDS['lengthscale'] noise_bounds = PARAMS_BOUNDS['noise'] cov_fun = kernels.ConstantKernel( amplitude, constant_value_bounds=amplitude_bounds) * kernels.Matern( lengthscale, nu=nu, length_scale_bounds=lengthscale_bounds) + kernels.WhiteKernel( noise, noise_level_bounds=noise_bounds) return cov_fun
def fit_gaussian(x, y, err): # kernel = kr.RBF(length_scale=10.0) kernel = kr.Matern(length_scale=0.4, nu=2.0) gp = GaussianProcessRegressor(kernel=kernel, alpha=err**2, n_restarts_optimizer=5) gp.fit(x, y) return gp
def _initialise_gp(self, gp, hyper_grid): """ Initialises the Gaussian process surrogate model. If `gp` is `None` uses the default kernel and Gaussian process: `kernel = sklearn.gaussian_process.kernels.Matern(nu=2.5)` `gp = sklearn.gaussian_process.GaussianProcessRegressor( kernel, alpha=1e-6, normalize_y=True, n_restarts_optimizer=5, random_state=self.generator)`, such that `random_state` is always set to the class generator. The data is always scaled using `sklearn.preprocessing.StandardScaler`. If `hyper_grid` is not `None` the best fit combination will be used as a surrogate model (calls `sklearn.model_selection.GridSearchCV`) with 5-fold cross-validation. Parameters ---------- gp : None or `sklearn.gaussian_process.GaussianProcessRegressor` Surrogate model Gaussian process. hyper_grid : None or dict of dictionaries Hyperparameter grid to be explored when fitting the Gaussian process. """ # Set up the Gaussian process, pipeline and grid search if gp is None: kernel = kernels.Matern(nu=2.5) gp = GaussianProcessRegressor(kernel, alpha=1e-6, normalize_y=True, n_restarts_optimizer=5, random_state=self.generator) elif not isinstance(gp, GaussianProcessRegressor): raise TypeError( "`gp` must be of {} type.".format(GaussianProcessRegressor)) else: # Always overwrite the random state gp.random_state = self.generator # Set up the pipeline to scale the data pipe = Pipeline([('scaler', StandardScaler()), ('gp', gp)]) # Optionally set the hyperparameter grid if hyper_grid is None: self._surrogate_model = pipe else: self._surrogate_model = GridSearchCV(pipe, hyper_grid, n_jobs=self.nthreads)
def gaussian_fit(x, y, err): err[np.isnan(err)] = 1e-1 kernel = kr.Matern(length_scale=0.2, nu=2.0) #kernel = kr.Matern(length_scale=0.15, nu=2.0) gp = GaussianProcessRegressor(kernel=kernel, alpha=3 * err**2, n_restarts_optimizer=2) try: gp.fit(x, y) except: print(y) return gp
def gaussian_fit(x, y, err): err[np.isnan(err)] = 1e-1 #kernel = kr.RBF(length_scale=0.2) kernel = kr.Matern(100, (0.5, 1e3), nu=2.5) # kernel = kr.ConstantKernel(1.0, (1e-3, 1e3)) * kr.RBF(1, (1e-2, 1e2)) # #kernel = kr.Matern(length_scale=0.15, nu=2.0) nerr = err*1.0 nerr[-1] *= 0.25 gp = GaussianProcessRegressor(kernel=kernel, alpha = nerr**2, n_restarts_optimizer=20, normalize_y=True) #print y gp.fit(x, y) print(gp.kernel_) return gp
def gaussian_fit(x, y, err): err[np.isnan(err)] = 0.0 noise = np.mean(err) #kernel = kr.RBF(length_scale=0.2) kernel = kr.Matern(length_scale=0.2, nu=15.0) gp = GaussianProcessRegressor(kernel=kernel, alpha=(3 * err)**2.0, n_restarts_optimizer=20) #print y try: gp.fit(x, y) except: print(y) return gp
def _gp(self, x, y): kernel = sk_kern.Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5) clf = GaussianProcessRegressor(kernel=kernel, alpha=1e-10, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, normalize_y=True) with warnings.catch_warnings(): warnings.simplefilter("ignore") clf.fit(x, y) def _impl(x): with warnings.catch_warnings(): warnings.simplefilter("ignore") mu, std = clf.predict(x.reshape(1, -1), return_std=True) return mu[0], std[0] return _impl
def _initialize_models(self): self.models['lr'] = {'model': linear_model.LinearRegression()} self.models['pr'] = { 'model': Pipeline([('poly', PolynomialFeatures()), ('linear', linear_model.LinearRegression(fit_intercept=False))]) } self.models['gpr'] = { 'model': gaussian_process.GaussianProcessRegressor( kernel=kernels.Matern(), optimizer='fmin_l_bfgs_b') } self.models['ann'] = { 'model': neural_network.MLPRegressor(random_state=self.random, solver='lbfgs', activation='logistic') } self.models['rf'] = { 'model': ensemble.RandomForestRegressor(random_state=self.random) }
def integrated_sigma(alpha, n_samples, n_restarts_optimizer=16, f=f): print("integrated_sigma(n_samples={n_samples}, alpha={alpha})".format( n_samples=n_samples, alpha=alpha, )) X = np.atleast_2d(np.linspace(1, 9, n_samples)).T y = f(X).ravel() x = np.atleast_2d(np.linspace(0, 10, 16 * 1024)).T kernel = kernels.Matern() + (kernels.WhiteKernel( noise_level=alpha) if alpha is not None else 0.0) gp = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, ) gp.fit(X, y) y_pred, sigma = gp.predict(x, return_std=True) return simps( x=x.ravel(), y=sigma, )
def bo_(x_obs, y_obs, n_iter): if n_iter > 0: kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16) gp.fit(x_obs, 1-y_obs) a = a_EI(gp, x_obs=x_obs, y_obs=1-y_obs) argmax_f_x_ = x[np.argmax(a(x))] # heavy evaluation f_argmax_f_x_ = cross_validation(argmax_f_x_) y_ob = np.atleast_2d(mean_mean_validation_scores(f_argmax_f_x_)).T return f_argmax_f_x_ + bo_( x_obs=np.vstack((x_obs, argmax_f_x_)), y_obs=np.vstack((y_obs, y_ob)), n_iter=n_iter-1, ) else: return []
def gp(xdata, ydata): kernel = [ kernels.RBF(), kernels.Matern(), kernels.ConstantKernel(), kernels.WhiteKernel(), kernels.RationalQuadratic() ] max_iter_predict = [10, 50, 100, 500, 1000] warm_start = [False, True] multi_class = ['one_vs_rest', 'one_vs_one'] with open('gaussianprocess.csv', mode='w', newline='') as file: writer = csv.writer(file, quoting=csv.QUOTE_NONNUMERIC) writer.writerow([ 'kernel', 'max_iter_predict', 'warm_start', 'multi_class', 'accuracy' ]) for k in kernel: for m in max_iter_predict: for w in warm_start: for mc in multi_class: accuracy = 0 model = GaussianProcessClassifier(kernel=k, max_iter_predict=m, warm_start=w, multi_class=mc, random_state=1) kf = StratifiedKFold(n_splits=5, shuffle=True) for i, j in kf.split(xdata, ydata): X_ktrain, X_ktest = X[i], X[j] y_ktrain, y_ktest = y[i], y[j] model.fit(X_ktrain, y_ktrain) ypred = model.predict(X_ktest) accuracy += np.mean(ypred == y_ktest) accuracy /= 5 writer.writerow([k, m, w, mc, accuracy])
def fit_model(self, train_x, train_y): """ Fit a Gaussian process regressor with noisy Matern kernel to the given data """ train_x, train_y = self.preprocess(train_x, train_y, 1500) k = ker.Matern(length_scale=0.01, nu=2.5) + \ ker.WhiteKernel(noise_level=1e-05) gpr = gp.GaussianProcessRegressor(kernel=k, alpha=0.01, n_restarts_optimizer=20, random_state=42, normalize_y=True) noisyMat_gpr = pipeline.Pipeline([("scaler", self.scaler), ("gpr", gpr)]) print("Fitting noisy Matern GPR") start = time() noisyMat_gpr.fit(train_x, train_y) print("Took {} seconds".format(time() - start)) self.model = noisyMat_gpr
df[df.columns[-1]].head() df = df.drop('Unnamed: 0', axis=0) X, y = pre_processing(df.astype(str)) X = X.astype(float) skf = StratifiedKFold(n_splits=metrics.folds, shuffle=True) scorer = make_scorer(accuracy_score) #modelos a serem treinados nmodels = { 'gauss': [ GaussianProcessClassifier(n_jobs=2), { 'kernel': [ 1 * kernels.RBF(), 1 * kernels.DotProduct(), 1 * kernels.Matern(), 1 * kernels.RationalQuadratic(), 1 * kernels.WhiteKernel() ] } ], 'nb': [GaussianNB()], 'rf': [ RandomForestClassifier(), { 'n_estimators': [10, 50, 100, 200, 500], 'criterion': ["gini", "entropy"] } ], 'dt': [ DecisionTreeClassifier(), { "criterion": ["gini", "entropy"], "splitter": ["best", "random"]
def bayesian_optimisation(slice_sample_num, coor_sigma, burn_in, input_dimension, n_iters, sample_loss, bounds, x0=None, n_pre_samples=5, acqui_eva_num=10, random_search=False, epsilon=1e-7, greater_is_better=False, mode='OPT', acqui_mode='MCMC', acqui_sample_num=3): """ bayesian_optimisation Uses Gaussian Processes to optimise the loss function `sample_loss`. Arguments: ---------- slice_sample_num: integer. how many samples we draw for each time of slice sampling coor_sigma: numpy array step-size for slice sampling of each coordinate, the dimension is equal to the number of hyperparameters contained in the kernel burn_in: integer. how many iterations we want to wait before draw samples from slice sampling input_dimension: integer. dimension of input data n_iters: integer. Number of iterations to run the search algorithm. sample_loss: function. Function to be optimised. bounds: array-like, shape = [n_params, 2]. Lower and upper bounds on the parameters of the function `sample_loss`. x0: array-like, shape = [n_pre_samples, n_params]. Array of initial points to sample the loss function for. If None, randomly samples from the loss function. n_pre_samples: integer. If x0 is None, samples `n_pre_samples` initial points from the loss function. acqui_eva_num: when evaluating acquisition function, how many points we want to look into gp_params: dictionary. Dictionary of parameters to pass on to the underlying Gaussian Process. random_search: integer. Flag that indicates whether to perform random search or L-BFGS-B optimisation over the acquisition function. alpha: double. Variance of the error term of the GP. epsilon: double. Precision tolerance for floats. greater_is_better: boolean True: maximize the sample_loss function, False: minimize the sample_loss function mode: OPT means using optimizer to optimize the hyperparameters of GP MAP means using sample posterior mean to optimize the hyperparameters of GP acqui_mode: mode controlling the acquisition 'OPT': using one prediction based on previously optimized model 'MCMC': using several samples to sample the expected acquisition function acqui_sample_num: the number of hyperparameter samples we want to use for integrated acquisition function """ # call slice sampler slice_sampler = Slice_sampler(slice_sample_num, coor_sigma, burn_in) acqui_slice_sampler = Slice_sampler( 1, coor_sigma, burn_in) # only sample one sample a time x_list = [] y_list = [] y_dur_list = [] n_params = bounds.shape[0] if x0 is None: # random draw several points as GP prior for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])): x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) else: for params in x0: x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) #print (xp,yp) # Create the GP #kernel = gp.kernels.Matern() init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) if mode == 'OPT': model = Gaussian_Process(kernel, mode) elif mode == 'MAP': model = Gaussian_Process(kernel, mode) else: raise Exception('Wrong GP model initialization mode!!!') dur = Gaussian_Process(kernel, 'OPT') iter_num = 0 for n in range(n_iters): iter_num += 1 if iter_num % int(n_iters / 2) == 0: print('%d iterations have been run' % iter_num) else: pass # for each iteration, one sample will be drawn and used to train GP model.fit(xp, yp) dur.fit(xp, yp_logdur) # Sample next hyperparameter #print ('One sample start') if random_search: x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(random_search, n_params)) ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=greater_is_better, n_params=n_params) next_sample = x_random[np.argmax(ei), :] else: if acqui_mode == 'OPT': next_sample = sample_next_hyperparameter( expected_improvement, model, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'MCMC': sample_theta_list = list() for sample_acqui_time in range(acqui_sample_num): initial_log_theta = np.ones((input_dimension + 2, )) initial_theta = np.exp(1.0 + initial_log_theta) one_log_theta = acqui_slice_sampler.sample( init=initial_theta, gp=model) one_theta = np.exp(1.0 + one_log_theta) sample_theta_list.append(one_theta) next_sample = integrate_sample( integrate_EI, sample_theta_list, yp, mode, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'PERSEC': sample_theta_list = list() for sample_acqui_time in range(acqui_sample_num): initial_log_theta = np.ones((input_dimension + 2, )) initial_theta = np.exp(1.0 + initial_log_theta) one_log_theta = acqui_slice_sampler.sample( init=initial_theta, gp=model) one_theta = np.exp(1.0 + one_log_theta) sample_theta_list.append(one_theta) next_sample = integrate_sample_perSec( integrate_EI_perSec, sample_theta_list, dur, yp, mode, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) else: raise Exception('Wrong acquisition mode!!!') #print ('One sample finished') # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point. if np.any(np.abs(next_sample - xp) <= epsilon): next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0]) # Sample loss for new set of parameters start = time.clock() func_value = sample_loss(next_sample) elapsed = (time.clock() - start) # Update lists x_list.append(next_sample) y_list.append(func_value) y_dur_list.append(elapsed) # Update xp and yp xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) return xp, yp, yp_logdur
def integrate_EI(x, sample_theta_list, evaluated_loss, mode, greater_is_better=False, n_params=1): """ expected_improvement Expected improvement acquisition function. Arguments: ---------- x: array-like, shape = [n_samples, n_hyperparams] The point for which the expected improvement needs to be computed. sample_theta_list: hyperparameter samples of the GP model, which will be used to calculate integrated acquisition function evaluated_loss: Numpy array. Numpy array that contains the values off the loss function for the previously evaluated hyperparameters. greater_is_better: Boolean. Boolean flag that indicates whether the loss function is to be maximised or minimised. n_params: int. Dimension of the hyperparameter space. """ # sample_theta_list contains all samples of hyperparameters ei_list = list() input_dimension = n_params init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) for theta_set in sample_theta_list: model = Gaussian_Process(kernel, mode) ''' model = gp.GaussianProcessRegressor(kernel=kernel, alpha=1e-5, optimizer = None, normalize_y=True) model.set_params(**{"kernel__k1__noise_level": np.abs(theta_set[0]), "kernel__k2__k1__constant_value": np.abs(theta_set[1]), "kernel__k2__k2__length_scale": theta_set[2:]}) ''' model.set_params(theta_set) x_to_predict = x.reshape(-1, n_params) mu, sigma = model.predict(x_to_predict) #mu, sigma = model.predict(x_to_predict, return_std=True) if greater_is_better: loss_optimum = np.max(evaluated_loss) else: loss_optimum = np.min(evaluated_loss) scaling_factor = (-1)**(not greater_is_better) # In case sigma equals zero with np.errstate(divide='ignore'): Z = scaling_factor * (mu - loss_optimum) / sigma expected_improvement = scaling_factor * ( mu - loss_optimum) * norm.cdf(Z) + sigma * norm.pdf(Z) expected_improvement[sigma == 0.0] == 0.0 ei_list.append(expected_improvement[0]) res_ei = np.mean(ei_list) result = np.array([res_ei]) return -1 * result
def to_sklearn(self): """Convert it to a sklearn kernel, if there is one""" return (self.variance * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5))
def bayesian_optimisation(coor_sigma, burn_in, input_dimension, n_iters, sample_loss, bounds, x0=None, n_pre_samples=5, acqui_eva_num=10, alpha=1e-5, epsilon=1e-7, greater_is_better=False, mode='OPT', acqui_mode='MCMC', acqui_sample_num=3, process_sample_mode='normal', prior_mode='normal_prior', likelihood_mode='normal_likelihood'): """ bayesian_optimisation Uses Gaussian Processes to optimise the loss function `sample_loss`. Arguments: ---------- slice_sample_num: integer. how many samples we draw for each time of slice sampling coor_sigma: numpy array step-size for slice sampling of each coordinate, the dimension is equal to the number of hyperparameters contained in the kernel burn_in: integer. how many iterations we want to wait before draw samples from slice sampling input_dimension: integer. dimension of input data n_iters: integer. Number of iterations to run the search algorithm. sample_loss: function. Function to be optimised. bounds: array-like, shape = [n_params, 2]. Lower and upper bounds on the parameters of the function `sample_loss`. x0: array-like, shape = [n_pre_samples, n_params]. Array of initial points to sample the loss function for. If None, randomly samples from the loss function. n_pre_samples: integer. If x0 is None, samples `n_pre_samples` initial points from the loss function. acqui_eva_num: when evaluating acquisition function, how many points we want to look into, number of restarts alpha: double. Variance of the error term of the GP. epsilon: double. Precision tolerance for floats. greater_is_better: boolean True: maximize the sample_loss function, False: minimize the sample_loss function mode: OPT means using optimizer to optimize the hyperparameters of GP MAP means using sample posterior mean to optimize the hyperparameters of GP acqui_mode: mode controlling the acquisition 'OPT': using one prediction based on previously optimized model 'MCMC': using several samples to sample the expected acquisition function acqui_sample_num: the number of hyperparameter samples we want to use for integrated acquisition function process_sample_mode: after getting sample, how to process it 'normal': only accept positive sample and reject negative ones 'abs': accept all samples after taking absolute value 'rho': reparamization trick is used, the samples are rho prior_mode: the prior distribution we want to use 'normal_prior': normal distribution 'exp_prior': exponential distribution likelihood_mode: how to calculate likelihood 'normal_likelihood': directly using input hyperparameter to calculate likelihood 'rho_likelihood': using reparamization trick (theta = np.log(1.0 + np.exp(rho))) """ # call slice sampler acqui_slice_sampler = Slice_sampler( 1, coor_sigma, burn_in, prior_mode, likelihood_mode) # only sample one sample a time x_list = [] y_list = [] y_dur_list = [] time_list = [] n_params = bounds.shape[0] print('Start presampling...') if x0 is None: # random draw several points as GP prior for params in np.random.uniform(bounds[:, 0], bounds[:, 1], (n_pre_samples, bounds.shape[0])): x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) else: for params in x0: x_list.append(params) start = time.clock() y_list.append(sample_loss(params)) elapsed = (time.clock() - start) y_dur_list.append(elapsed) print('Presampling finished.') xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) # Create the GP init_length_scale = np.ones((input_dimension, )) kernel = kernels.Sum( kernels.WhiteKernel(), kernels.Product( kernels.ConstantKernel(), kernels.Matern(length_scale=init_length_scale, nu=5. / 2.))) if mode == 'OPT': model = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=True) elif mode == 'MAP': model = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, optimizer=None, n_restarts_optimizer=0, normalize_y=True) else: raise Exception('Wrong GP model initialization mode!!!') dur = gp.GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=True) iter_num = 0 for n in range(n_iters): # Start the clock for recording total running time per iteration ite_start = time.clock() iter_num += 1 if iter_num % int(n_iters / 2) == 0: print('%d iterations have been run' % iter_num) else: pass # for each iteration, one sample will be drawn and used to train GP dur.fit(xp, yp_logdur) if mode == 'OPT': # for optimization mode, the hyperparameters are optimized during the process of fitting model.fit(xp, yp) elif mode == 'MAP': # for MAP mode, we use slice sampling to sample the posterior of hyperparameters and use the mean to update GP's hyperparameters model.fit(xp, yp) initial_theta = 10 * np.ones( (input_dimension + 2, ) ) # input_dimension + 2 = number of length_scale + amplitude + noise_sigma else: raise Exception('Wrong GP model initialization mode!!!') # Sample next hyperparameter if acqui_mode == 'OPT': next_sample = sample_next_hyperparameter( expected_improvement, model, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'MCMC': sample_theta_list = list() while (len(sample_theta_list) < acqui_sample_num): # all samples of theta must be valid one_sample = acqui_slice_sampler.sample(init=initial_theta, gp=model) if process_sample_mode == 'normal': if np.all(one_sample[:, 0] > 0): one_theta = [ np.mean(samples_k) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: continue elif process_sample_mode == 'abs': one_theta = [ np.abs(np.mean(samples_k)) for samples_k in one_sample ] sample_theta_list.append(one_theta) elif process_sample_mode == 'rho': one_theta = [ np.log(1.0 + np.exp((np.mean(samples_k)))) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: raise Exception('Wrong process sample mode!!!') next_sample = integrate_sample(integrate_EI, sample_theta_list, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'PERSEC': sample_theta_list = list() while (len(sample_theta_list) < acqui_sample_num): # all samples of theta must be valid one_sample = acqui_slice_sampler.sample(init=initial_theta, gp=model) if process_sample_mode == 'normal': if np.all(one_sample[:, 0] > 0): one_theta = [ np.mean(samples_k) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: continue elif process_sample_mode == 'abs': one_theta = [ np.abs(np.mean(samples_k)) for samples_k in one_sample ] sample_theta_list.append(one_theta) elif process_sample_mode == 'rho': one_theta = [ np.log(1.0 + np.exp((np.mean(samples_k)))) for samples_k in one_sample ] sample_theta_list.append(one_theta) else: raise Exception('Wrong process sample mode!!!') next_sample = integrate_sample_perSec( integrate_EI_perSec, sample_theta_list, dur, yp, greater_is_better=greater_is_better, bounds=bounds, n_restarts=acqui_eva_num) elif acqui_mode == 'RANDOM': x_random = np.random.uniform(bounds[:, 0], bounds[:, 1], size=(5, n_params)) ei = -1 * expected_improvement(x_random, model, yp, greater_is_better=greater_is_better, n_params=n_params) next_sample = x_random[np.argmax(ei), :] else: raise Exception('Wrong acquisition mode!!!') # Duplicates will break the GP. In case of a duplicate, we will randomly sample a next query point. if np.any(np.abs(next_sample - xp) <= epsilon): next_sample = np.random.uniform(bounds[:, 0], bounds[:, 1], bounds.shape[0]) # Sample loss for new set of parameters start = time.clock() func_value = sample_loss(next_sample) elapsed = (time.clock() - start) # Update lists x_list.append(next_sample) y_list.append(func_value) y_dur_list.append(elapsed) # Update xp and yp xp = np.array(x_list) yp = np.array(y_list) yp_logdur = np.log(np.array(y_dur_list)) ite_elapsed = (time.clock() - ite_start) time_list.append(ite_elapsed) timep = np.array(time_list) return xp, yp, timep
def __init__(self, system, npc=10, nrestarts=0): logging.info( 'training emulator for system %s (%d PC, %d restarts)', system, npc, nrestarts ) Y = [] self._slices = {} self.observables = observables # Build an array of all observables to emulate. nobs = 0 for obs, subobslist in self.observables: self._slices[obs] = {} for subobs in subobslist: Y.append(data_list[system][obs][subobs]['Y']) n = Y[-1].shape[1] self._slices[obs][subobs] = slice(nobs, nobs + n) nobs += n Y = np.concatenate(Y, axis=1) # pickle.dump(Y,open('mod_dat.p','wb')) self.npc = npc self.nobs = nobs self.scaler = StandardScaler(copy=False) self.pca = PCA(copy=False, whiten=True, svd_solver='full') # Standardize observables and transform through PCA. Use the first # `npc` components but save the full PC transformation for later. Z = self.pca.fit_transform(self.scaler.fit_transform(Y))[:, :npc] # Define kernel (covariance function): # Gaussian correlation (RBF) plus a noise term. design = Design(system) # design = joblib.load(filename='cache/lhs/design_s.p') # maxes = np.apply_along_axis(max,0,design) # mins = np.apply_along_axis(min,0,design) # ptp = maxes - mins ptp = design.max - design.min print(ptp) kernel = ( 1. * kernels.Matern( length_scale=ptp, length_scale_bounds=np.outer(ptp, (.1, 10)) ) # kernels.WhiteKernel( # noise_level=.1**2, # noise_level_bounds=(.01**2, 1) # ) ) # Fit a GP (optimize the kernel hyperparameters) to each PC. self.gps = [ GPR( kernel=kernel, alpha=0, n_restarts_optimizer=nrestarts, copy_X_train=False ).fit(design, z) for z in Z.T ] # print('Emulator design:') # print(design.array) # Construct the full linear transformation matrix, which is just the PC # matrix with the first axis multiplied by the explained standard # deviation of each PC and the second axis multiplied by the # standardization scale factor of each observable. self._trans_matrix = ( self.pca.components_ * np.sqrt(self.pca.explained_variance_[:, np.newaxis]) * self.scaler.scale_ ) # Pre-calculate some arrays for inverse transforming the predictive # variance (from PC space to physical space). # Assuming the PCs are uncorrelated, the transformation is # # cov_ij = sum_k A_ki var_k A_kj # # where A is the trans matrix and var_k is the variance of the kth PC. # https://en.wikipedia.org/wiki/Propagation_of_uncertainty # Compute the partial transformation for the first `npc` components # that are actually emulated. A = self._trans_matrix[:npc] self._var_trans = np.einsum( 'ki,kj->kij', A, A, optimize=False).reshape(npc, nobs**2) # Compute the covariance matrix for the remaining neglected PCs # (truncation error). These components always have variance == 1. B = self._trans_matrix[npc:] self._cov_trunc = np.dot(B.T, B) # Add small term to diagonal for numerical stability. self._cov_trunc.flat[::nobs + 1] += 1e-4 * self.scaler.var_
import sklearn.gaussian_process.kernels as kers #Constants myKernels = { 'k_Constant': lambda: kers.ConstantKernel(), 'k_WN': lambda: kers.WhiteKernel(), 'k_RBF': lambda: kers.RBF(), 'k_RQ': lambda: kers.RationalQuadratic(), 'k_mat0': lambda: kers.Matern(nu=0.5), 'k_mat1': lambda: kers.Matern(nu=1.5), 'k_mat2': lambda: kers.Matern(nu=2.5), 'k_sine': lambda: kers.ExpSineSquared(), #now combination kernels 'k1': lambda: kers.Sum(kers.ConstantKernel(), kers.ExpSineSquared()), 'k2': lambda: kers.Product(kers.ConstantKernel(), kers.ExpSineSquared()), 'k3': lambda: kers.Sum( kers.ConstantKernel(), kers.Product(kers.ConstantKernel(), kers.ExpSineSquared())), 'k4': lambda: kers.Sum(kers.ConstantKernel(), kers.Product(kers.RBF(), kers.ExpSineSquared())),
def __init__(self, kernel = 1.0*kernels.Matern(), n_restarts_optimizer=5, random_state=0): super().__init__() self._model = Pipeline([('Yscaler', preprocessing.StandardScaler()), ('Yreg', MultivariateGPR(kernel=kernel, random_state=random_state, n_restarts_optimizer=n_restarts_optimizer))])
x_fill = np.linspace(X[0, 0], X[-1, 0], 1000).reshape(-1, 1) y_pred, sigma = gpr.predict(x_fill, return_std=True) y_pred = y_pred.reshape(-1, 1) sigma = sigma.reshape(-1, 1) fig = plt.figure() plt.scatter(X, y, color='r', label='observations') plt.plot(x_fill, y_pred, 'b-', label='prediction') upper, lower = y_pred + 1.96 * sigma, y_pred - 1.96 * sigma plt.fill_between(x_fill.squeeze(), upper.squeeze(), lower.squeeze(), color='r', alpha='0.2') plt.xlabel('time') plt.ylabel('price') plt.legend(loc='upper left') plt.show() if __name__ == '__main__': f = lambda x: x * np.sin(x) + 2 X = np.atleast_2d([0.3, 1.2, 2.5, 4., 6.2]) obs = f(X) gpr = GaussianProcessRegressor(kernel=kernels.Matern(nu=2.5)) gpr.fit(X.reshape(-1, 1), obs.reshape(-1, 1)) plot_gpr(gpr)
import numpy as np from matplotlib import pyplot as plt from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process import kernels ker = [ 1.0 * kernels.RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)), 4.0 * kernels.RationalQuadratic(length_scale=3.0, alpha=0.2), 1.0 * kernels.Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=1.5) ] data = np.random.rand(7, 1) * 5 y = 2 * np.sin(data) for k in ker: plt.figure() gp = GaussianProcessRegressor(kernel=k) gp.fit(data, y) plt.scatter(data, y) X = np.linspace(0, 5, 100) y_mean, y_std = gp.predict(X[:, np.newaxis], return_std=True) y_mean = y_mean[:, 0] plt.plot(X, y_mean) plt.fill_between(X, y_mean - y_std, y_mean + y_std, alpha=0.2) plt.title("Kernel : %s" % (gp.kernel)) plt.show()
def trainregressionmodels( training_data_folder: str, test_data_folder: str, optimal_data_folder: str = None, performance_output_folder: str = None, name: str = None, models: dict = None, ): """Train regression models on the datasets found in the path given by *_data_folder. Saves R^2 values in csv file. Note: Only the final objective (named 'f*' where * is an integer) is trained. Parameters ---------- training_data_folder : str, optional Path to the datasets to be used for training, by default None test_data_folder : str, optional Path to the datasets to be used for testing, by default None name : str Name of the output files models : dict, optional Dictionary of sklearn models, by default None. Should be in the format: {"<Model_name>": [model object, {parameters for fit}]}. <model object> is the class that has .fit and .predict methods. """ if performance_output_folder is None: performance_output_folder = "./surrogate_performance/" if not os.path.exists(performance_output_folder): os.mkdir(performance_output_folder) training_data_files = os.listdir(training_data_folder) test_data_files = os.listdir(test_data_folder) test_data_files = [ file.split("/")[-1].split("_") for file in test_data_files ] test_data_files = pd.DataFrame( test_data_files, columns=["problem_name", "num_var", "num_samples", "dist"]) if optimal_data_folder is not None: optimal_data_files = os.listdir(optimal_data_folder) optimal_data_files = [ file.split("/")[-1].split("_") for file in optimal_data_files ] optimal_data_files = pd.DataFrame( optimal_data_files, columns=["problem_name", "num_var", "num_samples", "dist"], ) # DO some magic to get num_samples easily if models is None: models = { "svm_linear": [SVR, { "kernel": "linear" }], "svm_rbf": [SVR, { "gamma": "scale" }], "MLP": [MLPR, {}], "GPR_rbf": [GPR, { "kernel": kernels.RBF() }], "GPR_matern3/2": [GPR, { "kernel": kernels.Matern(nu=1.5) }], "GPR_matern5/2": [GPR, { "kernel": kernels.Matern(nu=2.5) }], # "GPR_ExpSinSq": [GPR, {"kernel": kernels.ExpSineSquared()}], "DecisionTree": [DTR, {}], "RandomForest_10": [RFR, { "n_estimators": 10 }], "RandomForest_100": [RFR, { "n_estimators": 100 }], "AdaBoost_10": [ABR, { "n_estimators": 10 }], "AdaBoost_100": [ABR, { "n_estimators": 100 }], "ExtraTrees_10": [ExTR, { "n_estimators": 10 }], "ExtraTrees_100": [ExTR, { "n_estimators": 100 }], } metrics = {"R^2": r2_score, "MSE": mean_squared_error, "time": []} model_types = models.keys() metrics_types = metrics.keys() performance_on_test = { metric: pd.DataFrame(columns=model_types, index=training_data_files, dtype=float) for metric in metrics_types } if optimal_data_folder is not None: performance_on_optimal = { metric: pd.DataFrame(columns=model_types, index=training_data_files, dtype=float) for metric in metrics_types } # oldfile = "" for file in tqdm(training_data_files): # use the magic above to get validation data efficiently training_data = pd.read_csv(training_data_folder + file) filename = file.split("/")[-1].split(".")[0].split("_") problem_name = filename[0] num_var = filename[1] test_data_file = test_data_files[ (test_data_files["problem_name"] == problem_name) & (test_data_files["num_var"] == num_var)].values test_data_file = "_".join(test_data_file[0].tolist()) test_data = pd.read_csv(test_data_folder + test_data_file) columns = training_data.columns x_columns = [column for column in columns if "x" in column] y_columns = [column for column in columns if "f" in column] X_train = training_data[x_columns].values y_train = training_data[y_columns[-1]].values X_test = test_data[x_columns].values y_test = test_data[y_columns[-1]].values if optimal_data_folder is not None: optimal_data_file = optimal_data_files[ (optimal_data_files["problem_name"] == problem_name) & (optimal_data_files["num_var"] == num_var)].values optimal_data_file = "_".join(optimal_data_file[0].tolist()) optimal_data = pd.read_csv(optimal_data_folder + optimal_data_file) X_optimal = optimal_data[x_columns].values y_optimal = optimal_data[y_columns[-1]].values for model_name, (model_type, model_parameters) in models.items(): model = model_type(**model_parameters) time_init = time() model.fit(X_train, y_train) time_delta = time() - time_init y_pred_test = model.predict(X_test) if optimal_data_folder is not None: y_pred_optimal = model.predict(X_optimal) performance_on_test["time"].at[file, model_name] = time_delta performance_on_test["R^2"].at[file, model_name] = r2_score( y_test, y_pred_test) performance_on_test["MSE"].at[file, model_name] = mean_squared_error( y_test, y_pred_test) if optimal_data_folder is not None: performance_on_optimal["time"].at[file, model_name] = time_delta performance_on_optimal["R^2"].at[file, model_name] = r2_score( y_optimal, y_pred_optimal) performance_on_optimal["MSE"].at[ file, model_name] = mean_squared_error(y_optimal, y_pred_optimal) for metric, performance_data in performance_on_test.items(): performance_data.to_csv(performance_output_folder + "/" + name + "_test_" + metric + ".csv") if optimal_data_folder is not None: for metric, performance_data in performance_on_optimal.items(): performance_data.to_csv(performance_output_folder + "/" + name + "_optimal_" + metric + ".csv")
def bo(X, y): data = list(zip(X, y)) x = np.atleast_2d(np.linspace(0, 10, 1024)).T x_= np.atleast_2d(np.linspace(0, 10, 1024)).T kernel = kernels.Matern() + kernels.WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16, )#normalize_y=True) gp.fit(X, y) # FIXME is it possible for mu(x) < min{x \in observed_x}? # is this due to that GaussainProcess's prior states that mu(x) = 0? # will this effect the performance of GO, since everything not observed will automatically give an additional boost since the prior plays a bigger role (look it up) [we know that the loss we in the end are optimizing is \in [0, 1] y_pred, sigma = gp.predict(x, return_std=True) #http://www.scipy-lectures.org/advanced/mathematical_optimization/ # x_min = fmin(negate(silly_f), 5) # TODO better maximizer # Strong points: it is robust to noise, as it does not rely on computing gradients. Thus it can work on functions that are not locally smooth such as experimental data points, as long as they display a large-scale bell-shape behavior. However it is slower than gradient-based methods on smooth, non-noisy functions. #opt_result = minimize(negate(silly_f), 5, bounds=[(0, 10)]) # TODO better maximizer #print(opt_result) #assert(opt_result.success) #x_min = opt_result.x # x_min = brent(negate(silly_f), brack=(0, 10)) # NOTE 1D only, NOTE not guaranteed to be within range brack=(0, 10) (see documentation) # TODO getting the gradient the gaussian would unlock all gradient based optimization methods!! (including L_BFGS) a = a_EI(gp, x_obs=X, y_obs=y, theta=0.01) a_x = np.apply_along_axis(a, 1, x) (x_min_,) = max(x, key=a) # TODO have a reasonable optimization (this doesn't scale well) #(x_min_,) = brute( # negate(a), # ranges=((0, 10),), # Ns=64, # finish=fmin, #) # FIXME brute can return numbers outside of the range! X = np.linspace(0, 10, 32), Ns=64, ranges=((0, 10) (x_min_ = 10.22...) # I think it occurs when the function is pretty flat (but not constant) # TODO verify that finish function gets the same range as brute and don't wonder off (perhaps this is intended behaviour?) # TODO check https://github.com/scipy/scipy/blob/master/scipy/optimize/optimize.py#L2614 to see if it's possible for x_min to end up outside of the range (and if then when) print(x_min_) #plot_2d(x=x, x_=x_, y_pred=y_pred, sigma = sigma, a_x=a_x) #plot(x=x, y_pred=y_pred, x_obs=X, y_obs=y, x_min_=x_min_, sigma=sigma, a_x=a_x) #plt.show() # evaluate fx_min_ = f(x_min_) bo( X=np.vstack( (X,[x_min_,]) ), y=np.hstack( (y,[fx_min_,]) ), )
def trainregressionmodelsCV( training_data_folder: str, performance_output_folder: str = None, name: str = None, models: dict = None, num_splits: int = 5, ): """Train regression models on the datasets found in the path given by *_data_folder, by doing cross validation. Saves R^2 values in csv file. Note: Only the final objective (named 'f*' where * is an integer) is trained. Parameters ---------- training_data_folder : str, optional Path to the datasets to be used for training, by default None test_data_folder : str, optional Path to the datasets to be used for testing, by default None name : str Name of the output files models : dict, optional Dictionary of sklearn models, by default None. Should be in the format: {"<Model_name>": [model object, {parameters for fit}]}. <model object> is the class that has .fit and .predict methods. num_splits : int, optional Number of splits for cross validation. """ if performance_output_folder is None: performance_output_folder = "./surrogate_performance" if not os.path.exists(performance_output_folder): os.mkdir(performance_output_folder) training_data_files = os.listdir(training_data_folder) if models is None: models = { "svm_linear": [SVR, { "kernel": "linear" }], "svm_rbf": [SVR, { "gamma": "scale" }], "MLP": [MLPR, {}], "GPR_rbf": [GPR, { "kernel": kernels.RBF() }], "GPR_matern3/2": [GPR, { "kernel": kernels.Matern(nu=1.5) }], "GPR_matern5/2": [GPR, { "kernel": kernels.Matern(nu=2.5) }], # "GPR_ExpSinSq": [GPR, {"kernel": kernels.ExpSineSquared()}], "DecisionTree": [DTR, {}], "RandomForest_10": [RFR, { "n_estimators": 10 }], "RandomForest_100": [RFR, { "n_estimators": 100 }], "AdaBoost_10": [ABR, { "n_estimators": 10 }], "AdaBoost_100": [ABR, { "n_estimators": 100 }], "ExtraTrees_10": [ExTR, { "n_estimators": 10 }], "ExtraTrees_100": [ExTR, { "n_estimators": 100 }], } model_types = models.keys() CV_score_mean = pd.DataFrame(columns=model_types, index=training_data_files, dtype=float) CV_score_max = pd.DataFrame(columns=model_types, index=training_data_files, dtype=float) scorer = make_scorer(r2_score) # oldfile = "" for file in tqdm(training_data_files): # use the magic above to get validation data efficiently training_data = pd.read_csv(training_data_folder + file) columns = training_data.columns x_columns = [column for column in columns if "x" in column] y_columns = [column for column in columns if "f" in column] X_train = training_data[x_columns].values y_train = training_data[y_columns[-1]].values for model_name, (model_type, model_parameters) in models.items(): model = model_type(**model_parameters) score = cross_val_score(model, X_train, y_train, scoring=scorer, cv=num_splits) CV_score_mean.at[file, model_name] = score.mean() CV_score_max.at[file, model_name] = score.max() CV_score_max.to_csv(performance_output_folder + "/" + name + "CV-score-max.csv") CV_score_mean.to_csv(performance_output_folder + "/" + name + "CV-score-mean.csv")
return np.exp(x) / np.sum(np.exp(x), -1) def softmax(x): return np.exp(x) / np.sum(np.exp(x), 1)[:, np.newaxis] def logit(x): logx = np.log(x) return logx - logx[:, -1][:, np.newaxis] depths_train = np.linspace(0, 3000, 3000)[:, np.newaxis] depths_ts = depths_train[::30, :] matk_fabric = k.Matern(length_scale=300.0, nu=0.5) kern_p = k.WhiteKernel(noise_level=5.0) + matk_fabric kern_sh = k.WhiteKernel(noise_level=5.0) + matk_fabric kern_sv = k.WhiteKernel(noise_level=5.0) + matk_fabric matk_vel_error = 100 * k.RBF(length_scale=600) kern_a11 = k.WhiteKernel(noise_level=0.0001) + matk_fabric kern_aii_noise = k.WhiteKernel(noise_level=0.2) + matk_fabric kern_a22 = k.WhiteKernel(noise_level=10.0) + matk_fabric kern_a33 = k.WhiteKernel(noise_level=1.0) + matk_fabric kern_a22 = k.WhiteKernel(noise_level=10.0) + matk_fabric kern_a33 = k.WhiteKernel(noise_level=1.0) + matk_fabric gpr = gp.GaussianProcessRegressor(matk_fabric) gpr_noise = gp.GaussianProcessRegressor(k.WhiteKernel(0.05)) aii = gpr.sample_y(depths_train, 3)
kernel = args.kernel multiplier = 1.0 bias = args.bias kernels = ['RBF', 'Matern', 'RationalQuadratic'] assert kernel in kernels if kernel == 'RBF': print('RBF model') kernelfun = multiplier * gpkernels.RBF( length_scale, length_scale_bounds=(length_scale_lb, length_scale_ub)) elif kernel == 'Matern': print('Matern') kernelfun = multiplier * gpkernels.Matern( length_scale, nu=nu, length_scale_bounds=(length_scale_lb, length_scale_ub)) elif kernel == 'RationalQuadratic': print('RationalQuadratic') kernelfun = multiplier * gpkernels.RationalQuadratic( length_scale, alpha=alpha, length_scale_bounds=(length_scale_lb, length_scale_ub), alpha_bounds=(alpha_lb, alpha_ub)) else: print('It should have not reached here!') kernelfun = 1.0 * gpkernels.RBF(1.0) #RBF, Matern, ConstantKernel, WhiteKernel, RationalQuadratic # length_scale=1.0, length_scale_bounds=(1e-05, 100000.0), nu=1.5 # length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-05, 100000.0), alpha_bounds=(1e-05, 100000.0)