def cleaner_GP(x,y,**kwargs): from sklearn import gaussian_process from sklearn.gaussian_process.kernels import RBF from sklearn.gaussian_process.kernels import WhiteKernel from sklearn.gaussian_process.kernels import RationalQuadratic if isinstance(x,list): x = np.array(x) if isinstance(y,list): y = np.array(y) X = x.reshape(-1,1) # create a zero mean process Y = y.reshape(-1,1) - np.nanmean(y) # define the kernel based on kwargs if 'kernel' in kwargs.keys(): print('kernel is defined by user') kernel = kwargs['kernel'] elif 'kernel_lst' in kwargs.keys(): print('kernel constituents given by user') kernel = WhiteKernel(noise_level=1) if 'RBF' in kwargs['kernel_lst']: kernel += RBF(length_scale=1) if 'RationalQuadratic' in kwargs['kernel_lst']: kernel += RationalQuadratic(alpha=1,\ length_scale=1) else: print('default kernel') kernel = WhiteKernel(noise_level=1) + 1 * RBF(length_scale=1) gp = gaussian_process.GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=10) gp.fit(X, Y) print(gp.kernel_) y_pred, sigma = gp.predict(X, return_std=True) uplim = y_pred + (2*sigma).reshape(-1,1) lowlim = y_pred - (2*sigma).reshape(-1,1) idx = [i for i in range(len(Y)) \ if (Y[i]>uplim[i] or Y[i]<lowlim[i])] return idx
def fit_goodness_svr(X, y, n): kernel = 2.0 * RBF(length_scale=10.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1.0) gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True) scaler = StandardScaler().fit(X) even_KFold = even_split_train_test(X, y, n) error = [] for training_data_X, training_data_y, test_data_X, test_data_y in even_KFold: gpr.fit(scaler.transform(training_data_X), training_data_y) y_gpr, y_std = gpr.predict(scaler.transform(test_data_X), return_std=True) tmp_err = (y_gpr-test_data_y).tolist() error.extend(tmp_err) return np.array(error)
def __init__(self, params=None, limit=None, model=None): """ Init """ if model is None: kernel = PairwiseKernel( metric='laplacian') * DotProduct() + WhiteKernel( noise_level=5.0) self.model = GaussianProcessClassifier(kernel=kernel, n_jobs=-1) else: self.fitted = True self.model = model if limit is not None: self.limit = limit
def main(): X, y = _init() kernel = WhiteKernel(noise_level=1e-10) \ + ConstantKernel(2.0, (1e-3, 1e3)) \ * RBF([.1, .1], [(1e-5, 1e2), (1e-5, 1e2)]) gpr = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10) gpr.fit(X, y) x = np.random.rand(TEST_CASE_NUMBER, INPUT_DIM) * SIGNAL_LEVEL y_pred, sigma = gpr.predict(x, return_std=True) _print_test_error(x, y_pred, sigma) print(np.average(y)) print(np.average(sigma))
def get_models(): models = dict() # Neural Networks models['nnet'] = MLPRegressor(activation='relu', hidden_layer_sizes=(50, 50, 50), learning_rate='adaptive', learning_rate_init=0.1, max_iter=2000, solver='sgd', alpha=0.01, random_state=0, verbose=True) # Linear Regression tuned_parameters_lr = [{'normalize': ['True','False']}] clf_lr = GridSearchCV(LinearRegression(), tuned_parameters_lr, scoring='neg_mean_absolute_error') models['lr'] = clf_lr # Decision Tree tuned_parameters_dtr = [{'min_samples_leaf': [5, 10, 50, 100], 'criterion' : ['mse', 'friedman_mse', 'mae', 'poisson'], 'splitter' : ['best', 'random'], 'random_state':[0]}] clf_dtr = GridSearchCV(DecisionTreeRegressor(), tuned_parameters_dtr, scoring='neg_mean_absolute_error') models['dtr'] = clf_dtr # Random Forest tuned_parameters_rf = [{'min_samples_leaf': [5, 10, 50, 100], 'n_estimators': [5, 10, 50, 100], 'criterion': ['mse', 'mae'], 'random_state':[0]}] clf_rf = GridSearchCV(RandomForestRegressor(), tuned_parameters_rf, scoring='neg_mean_absolute_error') models['rf'] = clf_rf # SVR tuned_parameters_svm = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000]}] clf_svm = GridSearchCV(SVR(), tuned_parameters_svm, scoring='neg_mean_absolute_error') models['svm'] = clf_svm # Bayesian Ridge tuned_parameters_bayes = [{'n_iter': [100, 200, 300, 400, 500]}] clf_bayes = GridSearchCV(BayesianRidge(), tuned_parameters_bayes, scoring='neg_mean_absolute_error') models['bayes'] = clf_bayes # kNNeighbours tuned_parameters_knn = [{'n_neighbors': [1, 5, 10, 15, 20, 50], 'weights' : ['uniform','distance'], 'algorithm' : ['auto','ball_tree','kd_tree','brute']}] clf_knn = GridSearchCV(KNeighborsRegressor(), tuned_parameters_knn, scoring='neg_mean_absolute_error') models['knn'] = clf_knn # Gaussian Process # best params: {'kernel': WhiteKernel(noise_level=1) + RBF(length_scale=1) + DotProduct(sigma_0=1), 'random_state': 0} tuned_parameters_gp = [{'kernel': [WhiteKernel() + RBF() + DotProduct(), RBF() + DotProduct()], 'random_state':[0]}] clf_gp = GridSearchCV(GaussianProcessRegressor(), tuned_parameters_gp, scoring='neg_mean_absolute_error') models['gp'] = clf_gp return models
def test_query_regression_std(self): # Get the data X = np.random.choice(np.linspace(0, 20, 1000), size=100, replace=False).reshape(-1, 1) y = np.sin(X) + np.random.normal(scale=0.3, size=X.shape) # assembling initial training set train_idx, test_idx, label_idx, unlabel_idx = split( X=X, y=y, test_ratio=0.3, initial_label_rate=0.05, split_count=1, all_class=True) # defining the kernel for the Gaussian process ml_technique = GaussianProcessRegressor( kernel=RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))) experiment = HoldOutExperiment( client=self.__client, X=X, Y=y, scenario_type=PoolBasedSamplingScenario, train_idx=train_idx, test_idx=test_idx, label_idx=label_idx, unlabel_idx=unlabel_idx, ml_technique=ml_technique, performance_metrics=[Mse(squared=True)], query_strategy=QueryRegressionStd(), oracle=SimulatedOracle(labels=y), stopping_criteria=PercentOfUnlabel(value=70), self_partition=False ) result = experiment.evaluate(verbose=True) regressor = result[0].ml_technique # plotting the initial estimation with plt.style.context('seaborn-white'): plt.figure(figsize=(14, 7)) x = np.linspace(0, 20, 1000) pred, std = regressor.predict(x.reshape(-1, 1), return_std=True) plt.plot(x, pred) plt.fill_between(x, pred.reshape(-1, ) - std, pred.reshape(-1, ) + std, alpha=0.2) plt.scatter(X, y, c='k') plt.title('Initial estimation') plt.show()
def regressor(X_train, Y_train): kernel = 1.0 * RBF(length_scale=0.01, length_scale_bounds=(1e-1, 1e2)) + ( DotProduct()**3) * WhiteKernel(noise_level=2.e-8, noise_level_bounds=(1e-10, 1e-1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0., n_restarts_optimizer=15).fit( X_train, Y_train) print "kernel init: ", kernel print "kernel init params: ", kernel.theta print "kenel optimum: ", gp.kernel_ print "opt kernel params: ", gp.kernel_.theta print "LML (opt): ", gp.log_marginal_likelihood() return gp
def make_model(X, y, optimize, lengthscale, variance, noise_variance): kernel = variance * RBF(length_scale=lengthscale) + WhiteKernel(noise_level=noise_variance) if optimize: # default optimizer = "fmin_l_bfgs_b" model = GaussianProcessRegressor(kernel=kernel, alpha=0.0 ) else: model = GaussianProcessRegressor(kernel=kernel, alpha=0.0, optimizer=None ) model.fit(X, y) return model
def build_approximation(current_points, current_target, current_error, all_points, sigma=0.00001): """ Train Gaussian Process approximation and make prediction. :param current_points: (2d numpy array) [number of points, dimension] coordinates of training points :param current_target: (1d numpy array) [number of points] training target :param current_error: (2d numpy array) [number of points, 2] confidence interval for target :param all_points: (2d numpy array) [number of points, dimension] coordinates of all points :param sigma: (float) small constant added to the diagonal :return: gp_mean, gp_std (to be used for inference and acquisition function) """ variance = (current_error[:, 1] - current_error[:, 0]) ** 2 + sigma # (diagonal) variance vector gp = GaussianProcessRegressor(kernel=RBF()+WhiteKernel(), alpha=variance, normalize_y=True) gp = gp.fit(current_points, current_target) return gp.predict(all_points, return_std=True)
def model_function(var, lscale, noise_level, var_value_bounds=(1e-4, 1e-2), length_scale_bounds=(3, 100), noise_level_bounds=(1e-6, 1e-4)): kernel = C(var, constant_value_bounds=var_value_bounds) * \ RBF(length_scale=lscale, length_scale_bounds=length_scale_bounds) \ + WhiteKernel(noise_level=noise_level, noise_level_bounds=noise_level_bounds) return kernel
def build_GP_model(self): N = self.observation_size GP_list = [] noise = 0.01 for i in range(N - 1): kern = 1.0 * RBF(length_scale=2.0, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel( noise_level=0.1, noise_level_bounds=(1e-10, 1e+1)) # kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10) GP_list.append(gp) self.GP_model = GP_list
def initialize_models(self, verbose: int = 0) -> NoReturn: """Initializes the kernels for the GPs. Arguments ---------- verbose : Verbosity level. """ print("\nInitialize the following Gaussian Processes:") print( "**************************************************************************" ) if verbose > 0: for key in self.models.keys(): print(key) pretty_print_dict(self.parameters[key]) print() print( "**************************************************************************" ) for key in self.models.keys(): print(key) p = self.parameters[key] if p["kernel"] == "rbf": if p["whitekernel"]: K = C( constant_value=p["constant_value"], constant_value_bounds=p["constant_value_bounds"], ) * RBF( length_scale=p["length_scale"], length_scale_bounds=p["length_scale_bounds"], ) + WhiteKernel( noise_level=p["noise_level"], noise_level_bounds=p["noise_level_bounds"], ) else: K = C( constant_value=p["constant_value"], constant_value_bounds=p["constant_value_bounds"], ) * RBF( length_scale=p["length_scale"], length_scale_bounds=p["length_scale_bounds"], ) else: raise NotImplementedError("Kernel {} not implemented".format( p["kernel"])) self.initial_kernels[key] = K print()
def sla_from_gaussian_process(l2, ssh_tiepoint_indices, matern_kernel=None, white_noise_kernel=None): """ Compute sea level anomaly be fitting lead tie points with a gaussian process. This method uses an optimization process based on the assumption :param l2: :param ssh_tiepoint_indices: :param matern_kernel: :param white_noise_kernel: :return: """ # Step 1: Get the observed (noisy) sea surface elevations sla_raw = l2.elev[ssh_tiepoint_indices] - l2.mss[ssh_tiepoint_indices] x = np.arange(l2.n_records) y = np.array(sla_raw) # Step 2: Remove the mean value # -> SLA prediction will converge against mean SLA in the absence of # ssh tie points mean_sla = float(np.nanmean(sla_raw)) y -= mean_sla # Step 3: Prepare the input array for fitting x_fit = x[ssh_tiepoint_indices].reshape(-1, 1) y_fit = y.reshape(-1, 1) # Step 4: Establish the fitting kernel # The assumption here is that the covariance decreases with distance (Matern kernel) and # that the data is noisy (white noise kernel) if matern_kernel is None: logger.warning("SLAGaussianProcess: No input for matern kernel") matern_kernel = dict() if white_noise_kernel is None: logger.warning("SLAGaussianProcess: No input for white noise kernel") white_noise_kernel = dict() kernel = Matern(**matern_kernel) + WhiteKernel(**white_noise_kernel) # Step 5: Execute the Gaussian Process Regressor gp = gaussian_process.GaussianProcessRegressor(kernel=kernel) gp.fit(x_fit, y_fit) # Step 6: Predict sla for the entire track and re-add mean value. # The uncertainty value is also output of the prediction x_pred = x.reshape(-1, 1) sla, sla_unc = gp.predict(x_pred, return_std=True) sla = sla.squeeze() + mean_sla # Return the two parameters return sla, sla_unc
def sk_kernel(self, hypers_dict): amp = hypers_dict['amplitude_covar'] lengthscales = np.diag(hypers_dict['precisionMatrix'])**-0.5 noise_var = hypers_dict['noise_variance'] se_ard = Ck(amp) * RBF(length_scale=lengthscales, length_scale_bounds=(1e-6, 10)) noise = WhiteKernel(noise_level=noise_var, noise_level_bounds=(1e-9, 1)) # noise terms sk_kernel = se_ard if self.noiseQ: sk_kernel += noise t0 = time.time() gpr = GaussianProcessRegressor(kernel=sk_kernel, n_restarts_optimizer=5) print("Initial kernel: %s" % gpr.kernel) # self.ytrain = [y[0][0] for y in self.Y_obs] gpr.fit(self.X_obs, np.array(self.Y_obs).flatten()) print('SK fit time is ', time.time() - t0) print("Learned kernel: %s" % gpr.kernel_) print("Log-marginal-likelihood: %.3f" % gpr.log_marginal_likelihood(gpr.kernel_.theta)) #print(gpr.kernel_.get_params()) if self.noiseQ: # RBF w/ noise sk_ls = gpr.kernel_.get_params()['k1__k2__length_scale'] sk_amp = gpr.kernel_.get_params()['k1__k1__constant_value'] sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta) sk_noise = gpr.kernel_.get_params()['k2__noise_level'] else: #RBF w/o noise sk_ls = gpr.kernel_.get_params()['k2__length_scale'] sk_amp = gpr.kernel_.get_params()['k1__constant_value'] sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta) sk_noise = 0 # make dict sk_hypers = {} sk_hypers['precisionMatrix'] = np.diag(1. / (sk_ls**2)) sk_hypers['noise_variance'] = sk_noise sk_hypers['amplitude_covar'] = sk_amp return sk_loklik, sk_hypers
def str2ker(str): k1 = C(1.0) * RBF(length_scale=1) k2 = C(1.0) * RationalQuadratic(length_scale=1) k4 = DotProduct(sigma_0=1) k3 = C(1.0) * ExpSineSquared(length_scale=1, periodicity=1) k5 = WhiteKernel(1.0) map = {"s": k1, "r": k2, "p": k3, "l": k4} # if basic kernel if len(str) == 1: ker = map[str] else: # if composite kernel ker = [] factor = map[str[0]] op = str[1] for i in range(2, len(str), 2): # if the operator is *, use @ covProd to continue costructing the # factor if op == '*': factor = factor * map[str[i]] # the end? if i == len(str) - 1: if not ker: ker = factor else: ker = ker + factor else: op = str[i + 1] # if the oprator is +, combine current factor with ker then form a # new factor else: if not ker: ker = factor else: ker = ker + factor factor = map[str[i]] # % the end? if i == len(str) - 1: ker = ker + factor else: op = str[i + 1] ker = ker + k5 return ker
def __init__(self, index, reward_threshold, collision_threshold, world_size, states, num_agents, collision_distance): self.index = index self.name = 'multi safe q agent' self.world_size = world_size self.states = states self.num_agents = num_agents self.rewards = [] kernel = RBF(length_scale=world_size, length_scale_bounds=[(1e1, 1e5), (1e1, 1e5), (1e1, 1e5)]) \ + WhiteKernel(noise_level=1) self.reward_gp = GaussianProcessRegressor(kernel=kernel) self.reward_threshold = reward_threshold self.collision_threshold = collision_threshold self.collision_distance = collision_distance self.trajs = [[] for _ in range(num_agents)] self.my_states = [] self.action_traj = [] self.buffer = ReplayMemory(10000) self.gamma = 0.9 self.beta = 1 self.dimensions = [3, 50, 50, 7] self.dqn = MLP(self.dimensions).double() self.dqn_l = MLP(self.dimensions).double() self.dqn_u = MLP(self.dimensions).double() self.optimizer = optim.RMSprop(self.dqn.parameters()) self.optimizer_l = optim.RMSprop(self.dqn_l.parameters()) self.optimizer_u = optim.RMSprop(self.dqn_u.parameters()) self.target = MLP(self.dimensions).double() self.target.load_state_dict(self.dqn.state_dict()) self.target.eval() self.target_l = MLP(self.dimensions).double() self.target_l.load_state_dict(self.dqn_l.state_dict()) self.target_l.eval() self.target_u = MLP(self.dimensions).double() self.target_u.load_state_dict(self.dqn_u.state_dict()) self.target_u.eval() self.loss_fn = torch.nn.MSELoss(reduction='sum') self.lr = 1e-3 self.epsilons = [0. for _ in range(num_agents)] self.tau_exploits = [1. for _ in range(num_agents)] self.tau_explores = [1. for _ in range(num_agents)] self.num_collisions = 0 self.num_unsafe = 0 self.eps = 0.1 self.cum_rewards = 0 self.target_usage = 0
def _gpr_filter(self): """Removes low frequency global change sin body area to extract respiration trace only""" # define GPR kernel kernel = 1.0 * RBF(length_scale=5.0, length_scale_bounds=(2, 20)) \ + WhiteKernel(noise_level=50, noise_level_bounds=(10, 1e+3)) # fit GPR model X = np.arange(self.resp_raw.shape[0]).reshape(-1, 1) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, self.resp_raw) # filter signal to extract respiration self.resp_trend, y_cov = gp.predict(X, return_cov=True) self.resp_trace = self.resp_raw - self.resp_trend
def train(self, config): input_size = self.features['train'].shape[1] alpha = 1e-9 # 1e-5 # IMPORTANT: if no kernel is specified, a constant one will be used per default. # The constant kernels hyperparameters will NOT be optimized! #kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)) kernel = 0.01 * RBF(length_scale=[0.1]*input_size, length_scale_bounds=(1e-2, 1e+2)) \ + WhiteKernel(noise_level=alpha, noise_level_bounds=(1e-10, 1e0)) regressor = GaussianProcessRegressor(kernel=kernel, normalize_y=False, n_restarts_optimizer=10) self.model = MultiOutputRegressor(regressor) self.model.fit(self.features['train'], self.targets['train'])
def __init__(self, beam_mask, N_samples=1000, N_pred=(100, 100)): self.beam_mask = beam_mask self.mask_ind = np.nonzero(beam_mask.flatten()) self.N_samples = N_samples self.N_pred = N_pred kernel = 1**2 * Matern(length_scale=0.1, length_scale_bounds=(1e-2, 10.0), nu=1.5) + WhiteKernel() self.gp = GaussianProcessRegressor(kernel=kernel) self.x, self.y, self.XY = self._make_image_grid(beam_mask) self.x_pred, self.y_pred, self.XY_pred = self._make_image_grid( np.ones(N_pred)) self._determine_pixel_weights()
def run(self): import numpy as np from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels \ import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared X = np.array(self.train.data) Y = np.array(self.train.occupancy).flatten() kernel = RBF() + RBF() * ExpSineSquared() + RationalQuadratic( ) + WhiteKernel() gp = GaussianProcessClassifier(kernel=kernel, optimizer='fmin_l_bfgs_b').fit(X, Y) predict_occupancy = gp.predict(np.array(self.test.data)) return np.reshape(predict_occupancy, (-1, 1))
def __init__(self, params=None, model=None, limit=.5, noise_level=5): """ Init """ logging.info('Using scikit GPCLassifier') if model is None: kernel = PairwiseKernel( metric='laplacian') * DotProduct() + WhiteKernel( noise_level=noise_level) self.model = GaussianProcessClassifier(kernel=kernel, n_jobs=-1) else: self.fitted = True self.model = model if limit is not None: self.limit = limit
def make_estimator_strategy(name: str): """ Makes and returns estimator and strategy :param name: name of estimator :return: tuple (estimator, query_strategy) """ MODELS = { 'random_forest': (RandomForestRegressor(n_jobs=-1), random_forest_max_std), 'gaussian_process': (GaussianProcessRegressor( kernel=RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))), gaussian_process_max_std) } return MODELS[name]
def gpr(x_train, y_train, x_pred): #WhiteKernel for noise estimation (alternatively set alpha in GaussianProcessRegressor()) #ConstantKernel for signal variance #RBF for length-scale kernel = RBF(0.1, (0.01, 10)) * ConstantKernel(1.0, (0.1, 100)) + WhiteKernel( 0.1, (0.01, 1)) #noise = 0.1 gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=3) mean = numpy.mean(y_train) gp.fit(x_train, y_train - mean) y_pred, sigma = gp.predict(x_pred, return_std=True) y_pred += mean return y_pred, sigma
def gpr(res): from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import ( RBF, WhiteKernel, ConstantKernel, Matern, ) xs, ys, zs, data = res nlows = [5, 5, 1] for d in data: if d.shape[0] < 20: continue X = d[:, 0:1] X_ = np.arange(min(X), 300, 0.1).reshape(-1, 1) Y = d[:, 1:] for i in range(3): y = Y[:, i] k = (ConstantKernel() + 10**2 * RBF(length_scale=10.0, length_scale_bounds=(10.0, 100.0)) + WhiteKernel(noise_level_bounds=(nlows[i], 20.0))) gpr = GaussianProcessRegressor(kernel=k) gpr.fit(X, y) print(gpr.kernel_) y_pred, y_std = gpr.predict(X_, return_std=True) plt.subplot(3, 1, i + 1) plt.scatter(X, y, c="k", s=5) plt.plot(X_, y_pred) plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std, alpha=0.5, color="k") plt.xlim(X_.min(), X_.max()) plt.tight_layout() plt.draw() plt.waitforbuttonpress(-1) plt.clf()
def setup_pipeline(config): if config.optimisation['algorithm'] not in algos: raise ConfigException('optimisation algo must exist in algos dict') steps = [] param_dict = {} if 'featuretransforms' in config.optimisation: config.featuretransform = config.optimisation['featuretransforms'] if 'pca' in config.featuretransform: steps.append(('pca', pca)) for k, v in config.featuretransform['pca'].items(): param_dict['pca__' + k] = v if 'hyperparameters' in config.optimisation: steps.append((config.optimisation['algorithm'], algos[config.optimisation['algorithm']])) for k, v in config.optimisation['hyperparameters'].items(): if k == 'target_transform': v = [transforms.transforms[vv]() for vv in v] if k == 'kernel': # for scikitlearn kernels if isinstance(v, dict): V = [] for kk, value in v.items(): value = OrderedDict(value) values = [v for v in value.values()] prod = product(* values) keys = value.keys() combinations = [] for p in prod: d = {} for kkk, pp in zip(keys, p): d[kkk] = pp combinations.append(d) V += [kernels[kk](** c) + WhiteKernel() for c in combinations] v = V param_dict[config.optimisation['algorithm'] + '__' + k] = v pipe = Pipeline(steps=steps) estimator = GridSearchCV(pipe, param_dict, n_jobs=config.n_jobs, iid=False, pre_dispatch='2*n_jobs', verbose=True, cv=5, ) return estimator
def gp(train, test, t=132): X_train, X_test = sklearn_formatting(train, test) gp_kernel = 2**2 \ + ExpSineSquared(1, 60000.0) \ + ExpSineSquared(2, 120000.0) \ + WhiteKernel(2.5) gpr = GaussianProcessRegressor(kernel=gp_kernel) gpr.fit(X_train, train.values) y_fit = gpr.predict(X_train, return_std=False) # predict a cycle y_pred = gpr.predict(X_test, return_std=False) rmse = error(test.values, y_pred) return y_fit, y_pred, rmse
def train_GP(X, y, scaler): """Returns a trained Gaussian Process given training data and scaler.""" stdev = 20 kernel = (1.0 * Matern(length_scale=5 * np.ones(X.shape[1]), length_scale_bounds=(1e-1, 1e1), nu=2.5) + WhiteKernel(noise_level=stdev, noise_level_bounds=(1e-1, 2e1))) X_ = scaler.transform(X) GP = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True).fit(X_, y) return GP, scaler
def build_model(player_data): # build a player specific model X = player_data[FEATURES] y = player_data.event_type gp_opt = GaussianProcessClassifier( kernel=ConstantKernel() + RationalQuadratic(length_scale=1, alpha=1.5) + WhiteKernel(noise_level=1), n_restarts_optimizer=2) try: if y.nunique() == 3: gp_opt.fit(X, y) except ValueError as e: return return gp_opt
def __init__(self, dim, gp=None, n_restarts_optimizer=3): self.num_pts = 0 self.dim = dim self.X = np.empty([0, dim]) # pylint: disable=invalid-name self.fX = np.empty([0, 1]) self.updated = False if gp is None: # Use the SE kernel kernel = ConstantKernel(1, (1e-3, 1e3)) * RBF(1, (0.1, 100)) + \ WhiteKernel(1e-3, (1e-6, 1e-2)) self.model = GaussianProcessRegressor( kernel=kernel, n_restarts_optimizer=n_restarts_optimizer) else: self.model = gp if not isinstance(gp, GaussianProcessRegressor): raise TypeError("gp is not of type GaussianProcessRegressor")
def fit(self, long_term_length_scale=None, pre_periodic_term_length_scale=None, periodic_term_length_scale=None, periodicity=None, noise_level=None, do_plot=False, fig=None): data = self.data[['mjd', 'mag', 'err']] data = np.atleast_2d(data) time = data[:, 0] - data[0, 0] time = np.atleast_2d(time).T if self._gp is None: time_scale = data[-1, 0] - data[0, 0] data_scale = np.max(data[:, 1]) - np.min(data[:, 1]) noise_std = np.median(data[:, 2]) if long_term_length_scale is None: long_term_length_scale = 0.5 * time_scale if pre_periodic_term_length_scale is None: pre_periodic_term_length_scale = 0.5 * time_scale if periodic_term_length_scale is None: periodic_term_length_scale = 0.1 * time_scale if periodicity is None: periodicity = 0.1 * time_scale if noise_level is None: noise_level = noise_std k1 = data_scale ** 2 * RBF(length_scale=long_term_length_scale) k2 = 0.1 * data_scale *\ RBF(length_scale=pre_periodic_term_length_scale) *\ ExpSineSquared(length_scale=periodic_term_length_scale, periodicity=periodicity) k3 = WhiteKernel(noise_level=noise_level ** 2, noise_level_bounds=(1e-3, 1.)) kernel = k1 + k2 + k3 gp = GaussianProcessRegressor(kernel=kernel, alpha=(data[:, 2] / data[:, 1]) ** 2, normalize_y=True, n_restarts_optimizer=10) gp.fit(time, data[:, 1]) self._gp = gp if do_plot: self.plot_fitted(fig=fig)