def gp_base_rmse_mode(train_input, train_target, test_input, test_target): param = { 'kernel': RationalQuadratic(alpha=0.0001, length_scale=1), 'n_restarts_optimizer': 2 } adj_params = {'kernel': [RationalQuadratic(alpha=0.0001, length_scale=1), RationalQuadratic(alpha=0.001, length_scale=1), RationalQuadratic(alpha=0.01,length_scale=1), RationalQuadratic(alpha=0.1, length_scale=1), RationalQuadratic(alpha=1, length_scale=1), RationalQuadratic(alpha=10, length_scale=1), RationalQuadratic(alpha=0.0001, length_scale=1), RationalQuadratic(alpha=0.001, length_scale=1), RationalQuadratic(alpha=0.01,length_scale=1), RationalQuadratic(alpha=0.1, length_scale=1), RationalQuadratic(alpha=1, length_scale=1), RationalQuadratic(alpha=10, length_scale=1)], 'n_restarts_optimizer': [2]} gpr = GaussianProcessRegressor(**param) cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1) cscv = GridSearchCV(gpr, adj_params, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1) cscv.fit(train_input,train_target) #print("cv_results_:",cscv.cv_results_) print("best_params_: ",cscv.best_params_) gpr = GaussianProcessRegressor(**cscv.best_params_) gpr.fit(train_input, train_target) mu, cov = gpr.predict(test_input, return_cov=True) test_y = mu.ravel() #uncertainty = 1.96 * np.sqrt(np.diag(cov)) gp_base_rmse = np.sqrt(metrics.mean_squared_error(test_target, test_y)) print(gp_base_rmse) return gp_base_rmse
def predict_matches(preprocessed_matches, training_data): """Result: 2 - Home Team Wins, 1 - Draw, 0 - Away Team Wins""" X_cols = ["Overall Home", "rank Home", "Overall Away", "rank Away"] # Training algorithms X = training_data[X_cols] y_regr = training_data[["Goal Difference"]].values.ravel() y_class = training_data[["Simple Result"]].values.ravel() gpr = GaussianProcessRegressor(RationalQuadratic() + 10 * WhiteKernel(noise_level=10)) gpc = GaussianProcessClassifier(RationalQuadratic() + 10 * WhiteKernel(noise_level=10)) gpr.fit(X, y_regr) gpc.fit(X, y_class) print("Finished training") # Predicting new matches X_pred = preprocessed_matches[X_cols] y_regr_pred = gpr.predict(X_pred) y_class_pred = gpc.predict(X_pred) preprocessed_matches["Pred. Goal Difference"] = y_regr_pred preprocessed_matches["Pred. Result"] = y_class_pred predictions = preprocessed_matches[[ "Date", "Home Team Name", "Away Team Name", "Pred. Goal Difference", "Pred. Result" ]] return predictions
def __init__(self, t, y, selected_kernel="RatQuad", interpolation_factor=None): super().__init__(t, y) self.kernels = None self.selected_kernel = selected_kernel self.interpolation_factor = interpolation_factor # TODO: fix this to comply with python standards self.A_mean = None self.A_std = None # Create different kernels that will be explored self.kernels = dict() self.kernels["RBF"] = 1.0 * RBF(length_scale=0.5) self.kernels["RatQuad"] = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.2) self.kernels["ExpSineSquared"] = 1.0 * ExpSineSquared(length_scale=1.0, periodicity=3) self.kernels["Matern"] = 1.0 * Matern(length_scale=1.0, nu=1.5) self.kernels["Matern*ExpSineSquared"] = ( 1.0 * Matern(length_scale=1.0, nu=1.5) * ExpSineSquared(length_scale=1, periodicity=3)) self.kernels["RBF*ExpSineSquared"] = ( 1.0 * RBF(length_scale=1.0) * ExpSineSquared(length_scale=1, periodicity=3)) self.kernels["RatQuad*ExpSineSquared"] = ( 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.2) * ExpSineSquared(length_scale=1, periodicity=3)) self.kernels["Matern*RBF"] = (1.0 * Matern(length_scale=1.0, nu=1.5) * RBF(length_scale=1)) self.kernels["Matern+ExpSineSquared"] = 1.0 * Matern( length_scale=1.0, nu=1.5) + ExpSineSquared(length_scale=1, periodicity=3) self.kernels["RBF+ExpSineSquared"] = 1.0 * RBF( length_scale=1.0) + ExpSineSquared(length_scale=1, periodicity=3) self.kernels["RatQuad+ExpSineSquared"] = 1.0 * RationalQuadratic( length_scale=1.0) + ExpSineSquared(length_scale=1, periodicity=3) if selected_kernel not in self.kernels.keys(): raise KeyError( f"Unknown kernel: {selected_kernel}, available kernels: {self.kernels.keys()}" ) # Generate the noisy kernels self.noisy_kernels = dict() for key, kernel in self.kernels.items(): self.noisy_kernels[key] = kernel + WhiteKernel( noise_level=1, noise_level_bounds=(1e-7, 1e7))
def test_kernel_rational_quadratic_diag(self): ker = RationalQuadratic() onx = convert_kernel_diag(ker, 'X', output_names=['Y'], dtype=np.float32) model_onnx = onx.to_onnx(inputs=[('X', FloatTensorType([None, None]))]) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] m1 = res m2 = ker.diag(Xtest_) assert_almost_equal(m1, m2, decimal=4)
def gaussian_regressor_param_selection(X, y, X_test, y_test, nfolds): f = open("results.txt", "a") kernel_rbf = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF( 1.0, length_scale_bounds="fixed") kernel_rq = ConstantKernel( 1.0, constant_value_bounds="fixed") * RationalQuadratic(alpha=0.1, length_scale=1) # kernel_expsine = ConstantKernel(1.0, constant_value_bounds="fixed") * ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) Kernels = [kernel_rbf, kernel_rq] param_grid = {'kernel': Kernels} grid_search = GridSearchCV(GaussianProcessRegressor(random_state=0), param_grid, cv=nfolds, n_jobs=-1, iid=False) grid_search.fit(X, y) f.write('\nGaussianRegressor MSE Score for training data: ' + str(grid_search.best_score_)) f.write('\nGaussianRegressor With Parameters:' + str(grid_search.best_params_)) f.write( '\nGaussian Regressor coefficient of determination R^2 on test data: ' + str(grid_search.best_estimator_.score(X_test, y_test))) y_pred = grid_search.best_estimator_.predict(X_test) f.write('\nMSE for Gaussian Regressor on test set: ' + str(mean_absolute_error(y_test, y_pred)))
def train(self, input, target, *args, **kwargs): # ker_rbf = ConstantKernel(1.0, constant_value_bounds="fixed") * RBF(1.0, length_scale_bounds="fixed") ker_rbf = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) ker_rq = ConstantKernel(1.0, (1e-3, 1e3)) * RationalQuadratic( alpha=0.1, length_scale=1) # ker_expsine = ConstantKernel(1.0, constant_value_bounds="fixed") * ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) # kernel_list = [ker_rbf, ker_rq] # kernel_list = [ker_rbf] # param_grid = {"kernel": kernel_list, # "alpha": [1e-10, 1e-2, 1e-1, 1e1, 1e2], # "optimizer": ["fmin_l_bfgs_b"], # "n_restarts_optimizer": [10], # "normalize_y": [False], # "copy_X_train": [True], # "random_state": [0]} # # gp = GaussianProcessRegressor() # self.model = GridSearchCV(gp, param_grid=param_grid) # grid_search.fit(X, y) # self.model = GridSearchCV(GaussianProcessRegressor(kernel=self.kernel, n_restarts_optimizer=self.n_restarts_optimizer, alpha=self.alpha), cv=5, # param_grid={"C": [1e0, 1e1, 1e2, 1e3], "gamma": np.logspace(-2, 2, 5)}) self.model = GaussianProcessRegressor(kernel=self.kernel, n_restarts_optimizer=1, alpha=self.alpha) self.model.fit(input, target)
def train_gp_model( xtrain: Union[np.ndarray, pd.DataFrame], ytrain: Union[np.ndarray, pd.DataFrame], params, ) -> BaseEstimator: # define kernel function init_length_scale = np.ones(xtrain.shape[1]) kernel = ( ConstantKernel() * Matern(nu=2.5, length_scale=init_length_scale) + ConstantKernel() * RationalQuadratic(alpha=10, length_scale=1.0) + ConstantKernel() * RBF(length_scale=init_length_scale) + WhiteKernel(noise_level=0.01) ) # define GP model gp_model = GaussianProcessRegressor( kernel=kernel, **params ) # train GP Model t0 = time.time() gp_model.fit(xtrain, ytrain) t1 = time.time() - t0 if params['verbose'] > 0: print(f"Training time: {t1:.3f} secs.") return gp_model
def fit_gaussian_process(X_train, y_train): bound = (1e-012, 1000000.0) rbf_kernel = RBF(length_scale=1, length_scale_bounds=bound) matern_kernel = Matern(length_scale=1.0, length_scale_bounds=bound, nu=0.5) matern_kernel_1 = Matern(length_scale=1.0, length_scale_bounds=bound, nu=1.5) matern_kernel_2 = Matern(length_scale=1.0, length_scale_bounds=bound, nu=2.5) periodic_kernel = ExpSineSquared(length_scale=1.0, periodicity=1.0, length_scale_bounds=bound, periodicity_bounds=bound) rq_kernel = RationalQuadratic(length_scale=1.0, alpha=1.0, length_scale_bounds=bound, alpha_bounds=bound) if "_diff" in keyword: gp_kernel = matern_kernel_1 else: gp_kernel = matern_kernel_2 model = GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=1500) model.fit(X_train, y_train) return model
def test_scikit_gaussian_process_lhs_two(self): problem = Booth() problem.set_init_values(**{'initial_value': [0., 0.]}) problem.surrogate = SurrogateModelScikit(problem) # set custom regressor kernel = 1.0 * RationalQuadratic(length_scale=1.0) problem.surrogate.regressor = GaussianProcessRegressor(kernel=kernel) # set threshold problem.surrogate.sigma_threshold = 0.01 problem.surrogate.train_step = 100 # sweep analysis (for training) gen = LHSGenerator(problem.parameters) gen.init(problem.surrogate.train_step) algorithm_sweep = SweepAlgorithm(problem, generator=gen) algorithm_sweep.run() x_ref = Individual([2.00, -2.00]) # eval reference value_problem = problem.evaluate(x_ref)[0] # eval surrogate value_surrogate = problem.surrogate.predict(x_ref.vector)[0] percent = 100.0 * math.fabs(value_problem - value_surrogate) / math.fabs(value_problem) problem.logger.info( "{}: surrogate.value: eval = {}, pred = {}, diff = {} ({} %)". format(problem.name, value_problem, value_surrogate, math.fabs(value_problem - value_surrogate), percent)) self.assertLess(percent, 5.0)
def __init__(self, f, depot, first_xys, first_zs, budget, plotter=None, seedval=None): self.f = f self.depot = depot self.first_xys = first_xys self.first_zs = first_zs # self.kernel = Matern(length_scale_bounds=(0.000001, 100000), nu=2.5) # self.kernel = Matern(length_scale_bounds=(0.000001, 100000), nu=2.5) + WhiteKernel(noise_level_bounds=(1e-5, 1e-2)) # self.kernel = RationalQuadratic(length_scale_bounds=(0.08, 100)) + WhiteKernel(noise_level_bounds=(1e-5, 1e-2)) self.kernel = RationalQuadratic(length_scale_bounds=(0.08, 100)) self.xys, self.tour, self.cost = [], [], 0 self.plotter = plotter self.budget = budget self.model_time = 0 self.tour_time = 0 self.pred_time = 0 self.plotvar = False self.plotpred = False self.possibly_no_more_room_left = False self.feasible = False self.fixed_tour = [] self.fixed_xys = [] self.seed = seedval
def test_kernel_rational_quadratic(self): ker = RationalQuadratic() onx = convert_kernel(ker, 'X', output_names=['Y'], dtype=np.float32, op_version=_TARGET_OPSET_) model_onnx = onx.to_onnx(inputs=[('X', FloatTensorType([None, None]))], target_opset=_TARGET_OPSET_) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_) assert_almost_equal(m1, m2, decimal=5) onx = convert_kernel(ker, 'X', output_names=['Z'], x_train=(Xtest_ * 2).astype(np.float32), dtype=np.float32, op_version=_TARGET_OPSET_) model_onnx = onx.to_onnx(inputs=[('X', FloatTensorType([None, None]))]) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'X': Xtest_.astype(np.float32)})[0] m1 = res m2 = ker(Xtest_, Xtest_ * 2) assert_almost_equal(m1, m2, decimal=3)
def train(X, y, outdir, max_feat=30): experiment = Experiment(project_name='color-ml') with experiment.train(): gp_kernel = RationalQuadratic( length_scale=0.1, length_scale_bounds=(1e-4, 0.5)) + WhiteKernel( 0.01, (1e-3, 0.5e-1)) gp = GaussianProcessRegressor(kernel=gp_kernel, n_restarts_optimizer=15, normalize_y=True) sfs = SFS( gp, k_features=max_feat, forward=True, floating=False, scoring='neg_mean_squared_error', cv=5, verbose=2, n_jobs=-1, ) sfs = sfs.fit(X, y) joblib.dump(sfs, os.path.join(outdir, 'sfs.joblib')) return sfs
def compute_posterior(t, y, u=np.linspace(0, 1, 10), kernel='rbf'): ''' Compute posterior mean and variance under GP model inputs: - times t, (n x d) array - observations y, (n x d) array - inducing points u - kernel: chosen family of kernels, one of ['rbf','Matern', 'RationalQuadratic','ExpSineSquared'] outputs: - vector of posterior means and covariance matrix at inducing points ''' if kernel == 'rbf': kernel = 1.0*RBF(length_scale_bounds=(1e-3,100.0)) + C(1.0, (1e-3, 1e3)) +\ WhiteKernel(noise_level=0.1, noise_level_bounds=(1e-10, 1e+1)) if kernel == 'Matern': kernel = 1.0*Matern(length_scale_bounds=(1e-3,100.0))+ \ WhiteKernel(noise_level=0.05, noise_level_bounds=(1e-10, 1e+1)) + C(1.0, (1e-3, 1e3)) if kernel == 'RationalQuadratic': kernel = 1.0*RationalQuadratic(length_scale_bounds=(1e-1,100.0))+ \ WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))+ C(1.0, (1e-3, 1e3)) if kernel == 'ExpSineSquared': kernel = 1.0*ExpSineSquared(length_scale_bounds=(1e-1,100.0))+ \ WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))+ C(1.0, (1e-3, 1e3)) gp = GaussianProcessRegressor(kernel=kernel, normalize_y=True, n_restarts_optimizer=3) gp.fit(np.reshape(t.flatten(), (-1, 1)), y.flatten()) mean, cov = gp.predict(np.reshape(u, (-1, 1)), return_cov=True) return mean, cov
def gpr_train3(bid, train_size): data, train_X, train_Y, test_X, test_Y = get_data(bid, train_size) kernel = RBF() + Matern() + RationalQuadratic() + DotProduct() # kernel = RBF() + DotProduct() reg = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1) time_start = time.time() reg.fit(train_X, train_Y) print("train: " + str(time.time() - time_start)) time_start = time.time() output, err = reg.predict(test_X, return_std=True) print("predict: " + str(time.time() - time_start)) rmse = np.sqrt(metrics.mean_squared_error(test_Y, output)) print(bid + ": " + str(rmse)) # 95%置信区间 total = np.array(list(train_Y) + list(output)) err = np.append(np.zeros(train_size) + 0.05, err) up, down = total * (1 + 0.95 * err), total * (1 - 0.95 * err) X = np.arange(data.shape[0]) plt.fill_between(X, up, down, color='red', alpha=0.25) return X, data, total, rmse, test_Y - output
def reset(self, sample_point=2000, upper_bound=1, lower_bound=0): X = np.linspace(lower_bound - 0.1, upper_bound + 0.1, num=sample_point)[:, None] X1 = np.linspace(lower_bound, upper_bound, num=sample_point)[:, None] # 2. Specify the GP kernel (the smoothness of functions) # Smaller lengthscale => less smoothness # kernel_var = 1.0 # self.kernel_lengthscale = 0.5 ## modify to 0.1~1.0 if self.funType == "MA": self.kernel = self.kernel_var * Matern(self.kernel_lengthscale, nu=1.5) elif self.funType == "Exp": self.kernel = self.kernel_var * ExpSineSquared( self.kernel_lengthscale, periodicity=0.5) elif self.funType == "RQ": self.kernel = self.kernel_var * RationalQuadratic( self.kernel_lengthscale, alpha=0.1) elif self.funType == "RBF": self.kernel = self.kernel_var * RBF(self.kernel_lengthscale) else: raise ValueError("Unknown fun_type!") # print("current function type = {}, length scale = {}".format(self.kernel,self.kernel_lengthscale)) # 3. Sample true function values for all inputs in X trueF = self.sample_true_u_functions(X, self.kernel) Y = trueF[0] self.curFun = interp1d(X.reshape(-1), Y, kind='cubic') self.maxVal = max(self.curFun(X1)) self.minVal = min(self.curFun(X1)) return self.curFun
def gpr_gridsearch(): data, train_X, train_Y, test_X, test_Y = get_data('B0005', 80) w1 = w2 = w3 = w4 = w5 = 0.0 min_rmse = 100000 best_w = "" step = 1.0 time_start = time.time() for w1 in np.arange(step, 1.0 + step, step): print("w1: " + str(w1)) for w2 in np.arange(0, 1.0 + step, step): for w3 in np.arange(0, 1.0 + step, step): for w4 in np.arange(0, 1.0 + step, step): for w5 in np.arange(0, 1.0 + step, step): kernel = C(constant_value=w1) * RBF( ) + C(constant_value=w2) * Matern() + C( constant_value=w3) * ExpSineSquared() + C( constant_value=w4) * RationalQuadratic() + C( constant_value=w5) * DotProduct() reg = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=0.1) reg.fit(train_X, train_Y) output, err = reg.predict(test_X, return_std=True) rmse = np.sqrt( metrics.mean_squared_error(test_Y, output)) print( str(w1) + ", " + str(w2) + ", " + str(w3) + ", " + str(w4) + ", " + str(w5) + ": " + str(rmse)) if rmse < min_rmse: min_rmse = rmse best_w = str(w1) + ", " + str(w2) + ", " + str( w3) + ", " + str(w4) + ", " + str(w5) print("gridsearch use: " + str(time.time() - time_start) + "s") print(min_rmse) print(best_w)
def construct_surrogate(the_size, the_dim, **kwargs): #print('python: Keyword arguments:') #print(kwargs) W = "RBF" noise_amplitude = 0.1 iterations = 100 if W == "RBF": kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) elif W == "Matern12": kernel = 1.0 * Matern( length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=0.5) elif W == "Matern32": kernel = 1.0 * Matern( length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=1.5) elif W == "Matern52": kernel = 1.0 * Matern( length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=2.5) elif W == "RationalQuadratic": kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1) elif W == "ExpSineSquared": kernel = 1.0 * ExpSineSquared(length_scale=1.0, periodicity=3.0, length_scale_bounds=(0.1, 10.0), periodicity_bounds=(1.0, 10.0)) elif W == "DotProduct": kernel = ConstantKernel(0.1, (0.01, 10.0)) * (DotProduct( sigma_0=1.0, sigma_0_bounds=(0.0, 10.0))**2) return GaussianProcessRegressor(kernel=kernel, alpha=noise_amplitude, n_restarts_optimizer=iterations)
def fit_gpr(df, col: str, range_max: int): #The following hyperparemeters were decided using a grid search cross-validation kernel = ExpSineSquared() + RBF() + Matern() + RationalQuadratic() if col == 'New Cases': alpha: float = 1 normalize_y: bool = True elif col == 'New Deaths': alpha: float = 0.5 normalize_y: bool = True #Fitting the regressor and making the prediction gpr = GaussianProcessRegressor(kernel=kernel, alpha=alpha, n_restarts_optimizer=10, normalize_y=normalize_y) X = choose_data(df, col)[0].reshape(-1, 1) y = choose_data(df, col)[1].reshape(-1, 1) gpr.fit(X, y) X_predict = np.array([i for i in range(range_max + 1)]).reshape(-1, 1) plt_X_predict = np.linspace(0, range_max, range_max + 1) plt_X = X.reshape(1, -1)[0] prediction = [n[0] for n in gpr.predict(X_predict)] #save the model so we can load it later if col == 'New Cases': dump(gpr, 'cases.joblib') elif col == 'New Deaths': dump(gpr, 'deaths.joblib') #return the values as arrays, don't want negative values for deaths/cases so take absolute value of predictions return np.array(plt_X_predict), abs(np.array(prediction))
def get_dict_str_kernel(seasonal_periods: int): """ Get dictionary mapping optim run documentation string to kernel in order to read offline fitting :param seasonal_periods: length of a seasonal period :return: dictionary mapping documentation string and kernel """ kernels = [] base_kernels = [ ConstantKernel(constant_value=1000, constant_value_bounds=(1e-5, 1e5)), Matern(length_scale=1.0, length_scale_bounds=(1e-5, 1e5)), ExpSineSquared(length_scale=1.0, periodicity=seasonal_periods, length_scale_bounds=(1e-5, 1e5), periodicity_bounds=(int(seasonal_periods * 0.8), int(seasonal_periods * 1.2))), RBF(length_scale=1.0, length_scale_bounds=(1e-5, 1e5)), RationalQuadratic(length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-5, 1e5), alpha_bounds=(1e-5, 1e5)), WhiteKernel(noise_level=1.0, noise_level_bounds=(1e-5, 1e5)) ] extend_kernel_combinations(kernels=kernels, base_kernels=base_kernels) dict_str_kernel = {} for kern in kernels: dict_str_kernel[str(kern)] = kern return dict_str_kernel
def train_GaussianProcess(X_train, y_train): print('Training GaussianProcess ...') alpha = 1e-9 while (True): try: gaussian = GaussianProcessRegressor(normalize_y=True, random_state=0, optimizer=None, alpha=alpha) param_distributions = { 'kernel': [ DotProduct(), WhiteKernel(), RBF(), Matern(), RationalQuadratic() ], 'n_restarts_optimizer': scipy.stats.randint(0, 10), # 'alpha' : scipy.stats.uniform(1e-9, 1e-8) } randcv = sklearn.model_selection.RandomizedSearchCV( gaussian, param_distributions, n_iter=5, cv=3, n_jobs=-1, random_state=0) randcv.fit(X_train, y_train) return randcv except: alpha *= 10
def test_rational_quadratic_kernel(): kernel = RationalQuadratic(length_scale=[1.0, 1.0]) message = ("RationalQuadratic kernel only supports isotropic " "version, please use a single " "scalar for length_scale") with pytest.raises(AttributeError, match=message): kernel(X)
def __init__(self, kernel_alias, seed=0, signal=1, **params): self.kernel_alias = kernel_alias self.seed = seed self.signal = signal self.params = params if "lsb" in self.params: self.params["length_scale_bounds"] = self.params.pop("lsb") if "ab" in self.params: self.params["alpha_bounds"] = self.params.pop("ab") if "nlb" in self.params: self.params["noise_level_bounds"] = self.params.pop("nlb") self.restarts = self.params.pop( "restarts") if "restarts" in self.params else 10 if self.kernel_alias == "quad": self.kernel = RationalQuadratic(**self.params) elif self.kernel_alias == "rbf": self.kernel = RBF(**self.params) elif self.kernel_alias == "matern": self.kernel = Matern(**self.params) elif self.kernel_alias == "expsine": self.kernel = ExpSineSquared(**self.params) elif self.kernel_alias == "white": self.kernel = WhiteKernel(**self.params) else: raise Exception("Unknown kernel:", self.kernel_alias) self.skgpr_func = lambda: SkGPR( kernel=self. kernel, # + WhiteKernel(noise_level_bounds=(1e-5, 1e-2)), n_restarts_optimizer=self.restarts, copy_X_train=True, random_state=self.seed)
def get_case_a_case_b_transition(x, response): # Pass in [x,y] values for Gamma plane plot, fatigue lives and nuggets # response = np.log(response) # IMPORTANT: The variable alpha in the command below controls the amount of noise in the GPR model and may require adjustment for proper fitting of curves on the gamma plane! gp = GPR(kernel=1e-7 * RationalQuadratic(), n_restarts_optimizer=10, alpha=5e-5) x_min = np.min(x[:, 0]) gp.fit(x, response) xs = np.linspace(x_min, total_extents, 16) ys = xs / 3 xs = np.reshape(xs, (xs.size, 1)) ys = np.reshape(ys, (ys.size, 1)) xs = np.concatenate((xs, ys), axis=1) top_z = gp.predict(xs) # top_z = np.exp(top_z) return xs, top_z
def test_gpr_rbf_fitted_return_std_rational_quadratic(self): gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=1e-7, n_restarts_optimizer=15, normalize_y=True) gp.fit(Xtrain_, Ytrain_) gp.predict(Xtrain_, return_std=True) # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} model_onnx = to_onnx(gp, initial_types=[('X', DoubleTensorType([None, None]))], options=options, dtype=np.float64) self.assertTrue(model_onnx is not None) dump_data_and_model( Xtest_.astype(np.float64), gp, model_onnx, basename="SklearnGaussianProcessRationalQuadraticStdDouble-Out0") self.check_outputs( gp, model_onnx, Xtest_.astype(np.float64), predict_attributes=options[GaussianProcessRegressor])
def test_gpr_rbf_fitted_return_std_rational_quadratic_true(self): X, y = make_regression(n_features=2, n_informative=2, random_state=2) X_train, X_test, y_train, _ = train_test_split(X, y) gp = GaussianProcessRegressor(kernel=RationalQuadratic(), alpha=1e-3, n_restarts_optimizer=25, normalize_y=True) try: gp.fit(X_train, y_train) except (AttributeError, TypeError): # unstable bug fixed in scikit-learn 0.24 return gp.predict(X_train, return_std=True) # return_cov=False, return_std=False options = {GaussianProcessRegressor: {"return_std": True}} model_onnx = to_onnx(gp, initial_types=[('X', DoubleTensorType([None, None]))], options=options, target_opset=TARGET_OPSET) self.assertTrue(model_onnx is not None) dump_data_and_model( X_test.astype(np.float64), gp, model_onnx, basename="SklearnGaussianProcessRationalQuadraticStdDouble-Out0", disable_optimisation=True) self.check_outputs( gp, model_onnx, X_test.astype(np.float64), predict_attributes=options[GaussianProcessRegressor], disable_optimisation=True)
def main(): parser = argparse.ArgumentParser( description='sklearn Time series multi-output forecasting') parser.add_argument('--data_dir', default='data', type=str) parser.add_argument('--window', type=int, default=16) parser.add_argument('--horizon', type=int, default=4) parser.add_argument('--kernel_ls', default=[ WhiteKernel() + DotProduct(), RBF(), Matern(), RationalQuadratic() ]) parser.add_argument('--name', type=str, required=True) parser.add_argument('--save', type=str, required=True) params = parser.parse_args() global Data Data = DataUtils(params) model = create_model(params) model.validate() rmse, rse, corr = model.evaluate() with open(params.save, 'a+') as f: f.write( f"| {model.name} : {params.horizon} test rmse {rmse:5.4f} , test rse {rse:5.4f} , test corr {corr:5.4f} |\n" )
def __init__(self, fixed_noise=0.04): """ Initialize object with some x_data sets and corresponding y_data sets. """ self.xdata = [] self.xtransformed = [] self.ydata = [] self.ytransformed = [] self.states = [] # Set the default Gaussian Process kernel self.kernel = (RationalQuadratic() * ConstantKernel() + ConstantKernel() + WhiteKernel() + WhiteKernel(fixed_noise**2, "fixed")) self.gps = [] # Set the default x and y transformations to the identity self.htransforms = [] self.hparam_names = [] self.hparams = [] self.huncertainties = [] self.hbounds = [] self.hshared = [] self.vtransforms = [] self.vparam_names = [] self.vparams = [] self.vuncertainties = [] self.vbounds = [] self.vshared = []
def interpolate_column( data, name, n_samples=500, ): min_since_epoch = data['GHI'].index[0:n_samples].view( 'int64') // pd.Timedelta(1, unit='m') col_data = data[name].as_matrix()[0:n_samples] #create GP regressor kernel = ConstantKernel() + Matern(length_scale=2, nu=3 / 2) + Matern( length_scale=2, nu=3 / 2) + RationalQuadratic() gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=5) gpr.fit(min_since_epoch.reshape(-1, 1), col_data) #create minute by minute index date_range = pd.date_range(start=data[name].index[0], end=data[name].index[n_samples], freq="T") date_range_min = date_range.view('int64') // pd.Timedelta(1, unit='m') interpolated_data = gpr.predict(date_range_min.reshape(-1, 1)) #remove all values less than zero interpolated_data[interpolated_data < 0] = 0 inter_series = pd.Series(interpolated_data, index=date_range, name="{}".format(name)) return inter_series
def fit(split=None): global x, y if split: x_train, y_train = x[split[0]], y[split[0]] x_val, y_val = x[split[1]], y[split[1]] else: x_train = x y_train = y x_mean, x_std = x_train.mean(axis=0), x_train.std(axis=0) x_train = (x_train - x_mean) / (x_std + 1e-3) if split: x_val = (x_val - x_mean) / (x_std + 1e-3) model = GaussianProcessRegressor(kernel=1.0 * RationalQuadratic(1.0, 1.0) + 1.0 * DotProduct(1.0) + 1.0 * WhiteKernel(1.0), alpha=1e-4, normalize_y=True) model.fit(x_train, y_train) if split: out_val = model.predict(x_val) return model, out_val else: return model
def to_sklearn(self, n_samples: int = 0, n_features: int = 0, **kwargs): from sklearn.gaussian_process import GaussianProcessClassifier if self.kernel == "constant": from sklearn.gaussian_process.kernels import ConstantKernel self.kernel = ConstantKernel() elif self.kernel == "rbf": from sklearn.gaussian_process.kernels import RBF self.kernel = RBF() elif self.kernel == "matern": from sklearn.gaussian_process.kernels import Matern self.kernel = Matern() elif self.kernel == "rational_quadratic": from sklearn.gaussian_process.kernels import RationalQuadratic self.kernel = RationalQuadratic() elif self.kernel == "exp_sin_squared": from sklearn.gaussian_process.kernels import ExpSineSquared self.kernel = ExpSineSquared() elif self.kernel == "white": from sklearn.gaussian_process.kernels import WhiteKernel self.kernel = WhiteKernel() elif self.kernel == "dot": from sklearn.gaussian_process.kernels import DotProduct self.kernel = DotProduct() return GaussianProcessClassifier( kernel=self.kernel, optimizer=self.optimizer, n_restarts_optimizer=self.n_restarts_optimizer, max_iter_predict=self.max_iter_predict, multi_class=self.multi_class, random_state=self.random_state)