def maximize(self, iterations): self.start_time2 = time() for n in np.arange(iterations): X = np.array(self.x).reshape(-1, 1) if self.dim == 1 else np.array( self.x) Y = np.array(self.y).reshape(-1, 1) gpreg = GPRegression(X, Y, kernel=self.kernel, noise_var=self.noise_var) gpreg.optimize() # MAP for kernel hyper-parameters vals, par = self.minimize_negative_acquisition(gpreg) if self.dim == 1: self.give_new_point(vals, par) else: self.x.append(par[vals.argmin()]) self.y.append(self.objective(self.x[-1])) if self.log_info: print( "%s step in BO; objective value: %.4f at %.4f; time: %.2f s." % (len(self.x), self.y[-1], self.x[-1], time() - self.start_time2 + self.end_time1 - self.start_time1)) if n % 10 == 0: self.a += 1 self.aquis_par.append(self.sample_aquis_param(self.a)) self.end_time2 = time()
def add_data(self, Z, X, optimize_hypers=True): assert Z.ndim == 2 and Z.shape[1] == self.D M = Z.shape[0] assert X.shape == (M, self.K) # Get the empirical probabilities (offset by 1 to ensure nonzero) pi_emp_train = (self.alpha+X).astype(np.float) / \ (self.alpha + X).sum(axis=1)[:,None] # Convert these to psi's self.Z = Z self.psi = np.array([pi_to_psi(pi) for pi in pi_emp_train]) # Compute the mean value of psi self.mu = self.psi.mean(axis=0) self.psi -= self.mu # Create the GP Regression model from GPy.models import GPRegression self.model = GPRegression(Z, self.psi, self.kernel) # Optimize the kernel parameters if optimize_hypers: self.model.optimize(messages=True)
def bayesian_opt(): # 2. ranges of the synth parameters syn1 = syn2 = syn3 = syn4 = syn5 = np.arange(158) syn6 = np.arange(6000) syn7 = np.arange(1000) syn8 = np.arange(700) # 2. synth paramters ranges into an 8D parameter space # parameter_space = ParameterSpace( # [ContinuousParameter('x1', 0., 157.)]) # parameter_space = ParameterSpace( # [DiscreteParameter('x8', syn8)]) parameter_space = ParameterSpace( [ContinuousParameter('x1', 0., 157.), ContinuousParameter('x2', 0., 157.), ContinuousParameter('x3', 0., 157.), ContinuousParameter('x4', 0., 157.), ContinuousParameter('x5', 0., 157.), ContinuousParameter('x6', 0., 5999.), ContinuousParameter('x7', 0., 999.), ContinuousParameter('x8', 0., 699.)]) # parameter_space = ParameterSpace( # [DiscreteParameter('x1', syn1), DiscreteParameter('x2', syn2), DiscreteParameter('x3', syn3), # DiscreteParameter('x4', syn4), DiscreteParameter('x5', syn5), DiscreteParameter('x6', syn6), # DiscreteParameter('x7', syn1), DiscreteParameter('x8', syn8)]) # 3. collect random points design = RandomDesign(parameter_space) X = design.get_samples(num_data_points) # X is a numpy array print("X=", X) # [is the below needed?] # UserFunction.evaluate(training_function, X) # I put UserFunctionWrapper in line 94 # 4. define training_function as Y Y = training_function(X) # [is this needed?] # loop_state = create_loop_state(X, Y) # 5. train and wrap the model in Emukit model_gpy = GPRegression(X, Y, normalizer=True) model_emukit = GPyModelWrapper(model_gpy) expected_improvement = ExpectedImprovement(model=model_emukit) bayesopt_loop = BayesianOptimizationLoop(model=model_emukit, space=parameter_space, acquisition=expected_improvement, batch_size=5) max_iterations = 15 bayesopt_loop.run_loop(training_function, max_iterations) model_gpy.plot() plt.show() results = bayesopt_loop.get_results() # bayesopt_loop.loop_state.X print("X: ", bayesopt_loop.loop_state.X) print("Y: ", bayesopt_loop.loop_state.Y) print("cost: ", bayesopt_loop.loop_state.cost)
def optimize_s_sn_l(self, sn, s, l): assert (isinstance(sn, float)) assert (l.shape == (self.fix_W.shape[1],)) # Create a GP self.kernel.update_params(W=self.fix_W, s=s, l=l) gp_reg = GPRegression(self.X, self.Y.reshape(-1, 1), self.kernel, noise_var=sn) try: gp_reg.optimize(optimizer="lbfgs", max_iters=config['max_iter_parameter_optimization']) except Exception as e: print(e) print(gp_reg.kern.K(gp_reg.X)) print("Error above!") # TODO: does this optimization work in the correct direction? new_variance = gp_reg.kern.inner_kernel.variance new_lengthscale = gp_reg.kern.inner_kernel.lengthscale new_sn = gp_reg['Gaussian_noise.variance'] assert gp_reg.kern.inner_kernel.lengthscale is not None assert gp_reg.kern.inner_kernel.variance is not None # assert not np.isclose(np.asarray(new_lengthscale), np.zeros_like(new_lengthscale) ).all(), new_lengthscale return float(new_variance), new_lengthscale.copy(), float(new_sn)
def populate_gp_model(self, observable, lecs, energy=None, rescale=False, fixvariance=0): """Creates a model based on given data and kernel. Args: observable - numpy array with observable. (1 row for each observable from each lec sample) lecs - numpy array with lec parameters fit should be done with regard to (lec 1 coloum 1 and so on, sample 1 on row 1 and so on) energy - energy values """ # Add row with energies to parameters for fit (c for col if that is that is the right way) if energy is not None: lecs = np.r_(lecs, energy) if rescale: (lecs, observable) = self.rescale(lecs, observable) lecs.transpose() observable.transpose() self.model = GPRegression(lecs, observable, self.kernel) self.model.Gaussian_noise.variance.unconstrain() self.model.Gaussian_noise.variance = fixvariance self.model.Gaussian_noise.variance.fix()
class MaximumLikelihoodGaussianProcess(object): """ Gaussian Process model which has its own hyperparameters chosen by a maximum likelihood process """ # Can't have instantiation of model without supplying data def __init__(self, X, Y, kernel, max_feval): if not GPRegression: raise ImportError('No module named GPy') self.X = X self.Y = Y self.kernel = kernel self.model = GPRegression(X=self.X, Y=self.Y, kernel=self.kernel) self.max_feval = max_feval # TODO make this a variable. self.num_restarts = 20 def fit(self): """ Fits the model with random restarts. :return: """ self.model.optimize_restarts(num_restarts=self.num_restarts, verbose=False) def predict(self, x): return self.model.predict(Xnew=x)
def model_builder(X, Y, kernel): if X.shape[0] < 1000 and not sparse: tmp = GPRegression(X, Y, kernel=kernel) else: tmp = SparseGPRegression(X, Y, num_inducing=num_inducing, kernel=kernel) if name is not None: tmp.name = name return tmp
def _model_chooser(self): """ Initialize the model used for the optimization """ kernel = Matern52(len(self.variables_list), variance=1., ARD=False) gpmodel = GPRegression(self.X, self.Y, kernel) gpmodel.optimize() self.model = GPyModelWrapper(gpmodel) if self.noiseless: gpmodel.Gaussian_noise.constrain_fixed(0.001) self.model = GPyModelWrapper(gpmodel)
class GP(Base): """A class that is declared for performing GP interpolation. GP interpolation (usually) works on the principle of finding the best unbiased predictor. Parameters ---------- type : str, optional This parameter defines the type of Kriging under consideration. This implementation uses PyKrige package (https://github.com/bsmurphy/PyKrige). The user needs to choose between "Ordinary" and "Universal". """ def __init__( self, kernel=RBF(2, ARD=True), ): super().__init__() self.kernel = kernel def _fit(self, X, y, n_restarts=5, verbose=False, random_state=None): """ Fit method for GP Interpolation This function shouldn't be called directly. """ np.random.seed(random_state) if len(y.shape) == 1: y = y.reshape(-1, 1) self.model = GPRegression(X, y, self.kernel) self.model.optimize_restarts(n_restarts, verbose=verbose) return self def _predict_grid(self, x1lim, x2lim): """The function that is called to return the interpolated data in Kriging Interpolation in a grid. This method shouldn't be called directly""" lims = (*x1lim, *x2lim) x1min, x1max, x2min, x2max = lims x1 = np.linspace(x1min, x1max, self.resolution) x2 = np.linspace(x2min, x2max, self.resolution) X1, X2 = np.meshgrid(x1, x2) X = np.array([(i, j) for i, j in zip(X1.ravel(), X2.ravel())]) predictions = self.model.predict(X)[0].reshape(len(x1), len(x2)) return predictions.ravel() def _predict(self, X, return_variance=False): """This function should be called to return the interpolated data in kriging in a pointwise manner. This method shouldn't be called directly.""" predictions, variance = self.model.predict(X) if return_variance: return predictions.ravel(), variance else: return predictions.ravel()
def __init__(self, X, Y, kernel, max_feval): if not GPRegression: raise ImportError('No module named GPy') self.X = X self.Y = Y self.kernel = kernel self.model = GPRegression(X=self.X, Y=self.Y, kernel=self.kernel) self.max_feval = max_feval # TODO make this a variable. self.num_restarts = 20
def _fit(self, X, y, n_restarts=5, verbose=False, random_state=None): """ Fit method for GP Interpolation This function shouldn't be called directly. """ np.random.seed(random_state) if len(y.shape) == 1: y = y.reshape(-1, 1) self.model = GPRegression(X, y, self.kernel) self.model.optimize_restarts(n_restarts, verbose=verbose) return self
def given_resample(self, data=[], dt=1): """ Resample the dynamics function given a list of inferred voltage and state trajectories """ # import pdb; pdb.set_trace() uu = self.Z[:, 0] V = self.Z[:, 1] # Evaluate dz/dt using true Kdr dynamics g = lambda x: x**4 ginv = lambda u: u**(1. / 4) dg_dx = lambda x: 4 * x**3 logistic = sigma logit = sigma_inv dlogit = lambda x: 1. / (x * (1.0 - x)) u_to_x = lambda u: ginv(logistic(u)) x_to_u = lambda x: logit(g(x)) # uu = x_to_u(xx) # Compute dynamics du/dt alpha = lambda V: 0.01 * (V + 55.) / (1 - np.exp(-(V + 55.) / 10.)) beta = lambda V: 0.125 * np.exp(-(V + 65.) / 80.) dx_dt = lambda x, V: alpha(V) * (1 - x) - beta(V) * x du_dt = lambda u, V: dlogit(g(u_to_x(u))) * dg_dx(u_to_x(u)) * dx_dt( u_to_x(u), V) X = self.Z Y = du_dt(uu, V)[:, None] # Set up the sparse GP regression model with the sampled inputs and outputs # gpr = SparseGPRegression(X, Y, self.kernel, Z=self.Z) # gpr.likelihood.variance = 0.01 gpr = GPRegression(X, Y, self.kernel) # HACK: Optimize the hyperparameters # contrain all parameters to be positive # gpr.constrain_positive('') # optimize and plot # gpr.ensure_default_constraints() # gpr.optimize_restarts(num_restarts=10) # gpr.plot() # import pdb; pdb.set_trace() # HACK: Rather than using a truly nonparametric approach, just sample # the GP at the grid of inducing points and interpolate at the GP mean self.h = gpr.posterior_samples_f(self.Z, size=1) # HACK: Recreate the GP with the sampled function h self.gp = SparseGPRegression(self.Z, self.h, self.kernel, num_inducing=25)
def predict_y2(self): x = self.x_new y = self.y1 m = GPRegression(x, y) m.optimize() mu, cov = (m.predictive_gradients(x)[0], None) self.y2 = mu[:, 0] self.cov2 = cov
def learn_flow(X, y, lengthscales, variance=1.0): dimensions = X.shape[1] # lengthscales = [ l_scale for d in range(dimensions)] kernel = GPy.kern.rbf(dimensions, ARD=True, lengthscale=lengthscales, variance=variance) m = GPRegression(X,y,kernel) m.optimize('bfgs', max_iters=1000) return m
def run_optimisation(current_range, freq_range, power_range): parameter_space = ParameterSpace([\ ContinuousParameter('current', current_range[0], current_range[1]), \ ContinuousParameter('freq', freq_range[0], freq_range[1]), \ ContinuousParameter('power', power_range[0], power_range[1]) ]) def function(X): current = X[:, 0] freq = X[:, 1] power = X[:, 2] out = np.zeros((len(current), 1)) for g in range(len(current)): ''' Set JPA Current, Frequency & Power ''' out[g, 0] = -get_SNR( plot=False)[-1] #Negative as want to maximise SNR return out num_data_points = 10 design = RandomDesign(parameter_space) X = design.get_samples(num_data_points) Y = function(X) model_gpy = GPRegression(X, Y) model_gpy.optimize() model_emukit = GPyModelWrapper(model_gpy) exp_imprv = ExpectedImprovement(model=model_emukit) optimizer = GradientAcquisitionOptimizer(space=parameter_space) point_calc = SequentialPointCalculator(exp_imprv, optimizer) coords = [] min = [] bayesopt_loop = BayesianOptimizationLoop(model=model_emukit, space=parameter_space, acquisition=exp_imprv, batch_size=1) stopping_condition = FixedIterationsStoppingCondition(i_max=100) bayesopt_loop.run_loop(q, stopping_condition) coord_results = bayesopt_loop.get_results().minimum_location min_value = bayesopt_loop.get_results().minimum_value step_results = bayesopt_loop.get_results().best_found_value_per_iteration print(coord_results) print(min_value) return coord_results, abs(min_value)
def __init__(self, X, Y, k, Y_mean=0., Y_std=1., comm=None, verbosity=0, **kwargs): Parallelizer.__init__(self, comm=comm, verbosity=verbosity) GPRegression.__init__(self, X, Y, k, **kwargs) self.Y_mean = Y_mean self.Y_std = Y_std
def given_resample(self, data=[], dt=1): """ Resample the dynamics function given a list of inferred voltage and state trajectories """ # import pdb; pdb.set_trace() uu = self.Z[:,0] V = self.Z[:,1] # Evaluate dz/dt using true Kdr dynamics g = lambda x: x**4 ginv = lambda u: u**(1./4) dg_dx = lambda x: 4*x**3 logistic = sigma logit = sigma_inv dlogit = lambda x: 1./(x*(1.0-x)) u_to_x = lambda u: ginv(logistic(u)) x_to_u = lambda x: logit(g(x)) # uu = x_to_u(xx) # Compute dynamics du/dt alpha = lambda V: 0.01*(V+55.)/(1-np.exp(-(V+55.)/10.)) beta = lambda V: 0.125*np.exp(-(V+65.)/80.) dx_dt = lambda x,V: alpha(V)*(1-x) - beta(V) * x du_dt = lambda u,V: dlogit(g(u_to_x(u))) * dg_dx(u_to_x(u)) * dx_dt(u_to_x(u),V) X = self.Z Y = du_dt(uu, V)[:,None] # Set up the sparse GP regression model with the sampled inputs and outputs # gpr = SparseGPRegression(X, Y, self.kernel, Z=self.Z) # gpr.likelihood.variance = 0.01 gpr = GPRegression(X, Y, self.kernel) # HACK: Optimize the hyperparameters # contrain all parameters to be positive # gpr.constrain_positive('') # optimize and plot # gpr.ensure_default_constraints() # gpr.optimize_restarts(num_restarts=10) # gpr.plot() # import pdb; pdb.set_trace() # HACK: Rather than using a truly nonparametric approach, just sample # the GP at the grid of inducing points and interpolate at the GP mean self.h = gpr.posterior_samples_f(self.Z, size=1) # HACK: Recreate the GP with the sampled function h self.gp = SparseGPRegression(self.Z, self.h, self.kernel, num_inducing=25)
def load_model_parameters(self, Ylearn, Xlearn, loadpath): """Loads a GPy model with hyperparameters from a .pickle file""" Xlearn.transpose() Ylearn.transpose() with open(loadpath, 'r') as f: filecontents = pickle.load(f) if len(filecontents) == 6: params, kernel, traintags, LEC_LENGTH, lengthscale, multi_dim = filecontents rescale = False elif len(filecontents) == 7: params, kernel, traintags, LEC_LENGTH, lengthscale, multi_dim, rescale = filecontents print(params) print(LEC_LENGTH) self.set_gp_kernel(kernel=kernel, in_dim=LEC_LENGTH, lengthscale=lengthscale, multi_dim=multi_dim) if rescale: (Xlearn, Ylearn) = self.rescale(Xlearn, Ylearn) m_load = GPRegression(Xlearn, Ylearn, self.kernel, initialize=False) m_load.update_model(False) m_load.initialize_parameter() m_load[:] = params m_load.update_model(True) self.model = m_load
def __init__(self, x0, y0, cons=None, alpha=opt.ke_alpha, beta=opt.ke_beta, input_size=opt.ke_input_size, hidden_size=opt.ke_hidden_size, num_layers=opt.ke_num_layers, bidirectional=opt.ke_bidirectional, lr=opt.ke_lr, weight_decay=opt.ke_weight_decay): super(Kernel, self).__init__() self.alpha = alpha self.beta = beta self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) self.lstm = self.lstm.to(opt.device) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional self.bi = 2 if bidirectional else 1 self.x = [x0] self.y = torch.tensor([y0], dtype=torch.float, device=opt.device, requires_grad=False) self.cons = [cons] inp, out = clean_x(self.x, self.cons) self.model = GPRegression(inp, out) self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False) self.model.optimize() self.x_best = x0 self.y_best = y0 self.i_best = 0 self.n = 1 self.E = self.embedding(x0).view(1, -1) self.K = self.kernel(self.E[0], self.E[0]).view(1, 1) self.K_inv = torch.inverse(self.K + self.beta * torch.eye(self.n, device=opt.device)) self.optimizer = optim.Adam(self.lstm.parameters(), lr=lr, weight_decay=weight_decay)
def rmse_rbf(x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray, y_test: np.ndarray) -> float: """RMSE of a GPy RBF kernel. :param x_train: :param y_train: :param x_test: :param y_test: :return: """ model = GPRegression(x_train, y_train, kernel=RBF(input_dim=x_train.shape[1])) model.optimize() return compute_gpy_model_rmse(model, x_test, y_test)
def _fit_model(self, X, Y): if max(Y) < 0: self.transformed = True else: self.transformed = False Y_trans = self._transform_score(Y) model = GPRegression(X, Y_trans, self.kernel) # Catch fitting error try: model.optimize_restarts(num_restarts=self.n_init, verbose=False) self.model = model except np.linalg.linalg.LinAlgError: self.model = None
def _create_model(self, x: np.ndarray, y: np.ndarray): """Create model given input data X and output data Y. :param x: 2d array of indices of distance builder :param y: model fitness scores :return: """ # Make sure input data consists only of positive integers. assert np.issubdtype(x.dtype, np.integer) and x.min() >= 0 # Define kernel self.input_dim = x.shape[1] # TODO: figure out default kernel kernel initialization if self.covariance is None: assert self.covariance is not None # kern = GPy.kern.RBF(self.input_dim, variance=1.) else: kern = self.covariance.raw_kernel self.covariance = None # Define model noise_var = y.var() * 0.01 if self.noise_var is None else self.noise_var normalize = x.size > 1 # only normalize if more than 1 observation. self.model = GPRegression(x, y, kern, noise_var=noise_var, normalizer=normalize) # Set hyperpriors if self.kernel_hyperpriors is not None: if 'GP' in self.kernel_hyperpriors: # Set likelihood hyperpriors. likelihood_hyperprior = self.kernel_hyperpriors['GP'] set_priors(self.model.likelihood, likelihood_hyperprior, in_place=True) if 'SE' in self.kernel_hyperpriors: # Set kernel hyperpriors. se_hyperprior = self.kernel_hyperpriors['SE'] set_priors(self.model.kern, se_hyperprior, in_place=True) # Restrict variance if exact evaluations of the objective. if self.exact_f_eval: self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False) else: # --- We make sure we do not get ridiculously small residual noise variance if self.model.priors.size > 0: # FIXME: shouldn't need this case, but GPy doesn't have log Jacobian implemented for Logistic self.model.Gaussian_noise.constrain_positive(warning=False) else: self.model.Gaussian_noise.constrain_bounded(1e-9, 1e6, warning=False)
def gp_surrogate(self, Z=None, Y=None, kern_x=None, kern_p=None): self.set_training_data(Z, Y) assert self.Z is not None and self.Y is not None self.set_kernels(kern_x, kern_p) assert self.kern_x is not None and self.kern_p is not None R, J = binary_dimensions(self.Z, self.binary_variables) gps = [] for e in range(self.num_outputs): gps.append([]) for r in R: Jr = (J == r) if not np.any(Jr): gps[e].append(None) continue dim_xb = self.dim_x - self.dim_b dim = self.dim_x + self.dim_p kernx = self.kern_x(dim_xb, self.non_binary_variables, 'kernx') kernp = self.kern_p(self.dim_p, range(self.dim_x, dim), 'kernp') #Zr = self.Z[ np.ix_(Jr, I ) ] Zr = self.Z[Jr] Yr = self.Y[np.ix_(Jr, [e])] gp = GPRegression(Zr, Yr, kernx * kernp) gps[e].append(gp) self.gps = gps
def predict(self, X): """ Predicts the conditional expectation $E[Y \mid X=x]$ for all x in $X$. :param X: a numpy array of shape (num_predictions, num_dimensions) :return: a numpy array of shape (num_predictions,) """ self._calculate_locregs() self.gpr_ = GPRegression(self.x_mesh_, np.atleast_2d(self.y_mesh_).T, self.gpr_kernel) self.gpr_.optimize(messages=False) #self.gpr_.optimize_restarts(num_restarts = 10) y_pred, self.gp_var_ = self.gpr_.predict(X) self.gp_var_ = self.gp_var_.squeeze() return y_pred.squeeze()
def get_active_model(self): """ Get the active model. """ Z = np.dot(self.X, self.kern.W) m = GPRegression(Z, self.Y, self.kern.inner_kernel) m.Gaussian_noise.variance = self.Gaussian_noise.variance return m
def fit(self): if self.model: self.noise = self.model.Gaussian_noise.variance[0] return None x_dim = self.x.shape[1] # number of input dimensions, 1 if only time y_dim = self.y.shape[ 1] # number of ouptut dimensions, typically only 1 for log OD kern = buildKernel(x_dim, ARD=self.ARD) m = GPRegression(self.x, self.y, kern) if self.heteroscedastic: kern = addFixedKernel(kern, y_dim, self.error) m = GPRegression(self.x, self.y, kern) m.optimize() self.noise = m.Gaussian_noise.variance[ 0] # should be negligible (<1e-10) for full model if self.heteroscedastic: m.kern = m.kern.parts[ 0] # cannot predict with fixed kernel, so remove it self.model = m
def permute(self, varb=None): # get model input x = self.x.copy() y = self.y.copy() # shuffle targeet variable col = np.where(varb == self.x_keys)[0] # which one? shuffled = np.random.choice(x[:, col].ravel(), size=x.shape[0], replace=False) x[:, col] = shuffled[:, np.newaxis] # replace # same steps as fit, below can be done more concisely, but I worry about minor differenes x_dim = x.shape[1] y_dim = y.shape[1] kern = buildKernel(x_dim, ARD=self.ARD) mcopy = GPRegression(x, y, kern) if self.heteroscedastic: kern = addFixedKernel(kern, y_dim, self.error) mcopy = GPRegression(x, y, kern) mcopy.optimize() return mcopy.log_likelihood()
def fit(self, restarts=None, optimiser='lbfgsb', verbose=False, robust=False, **kwargs): if restarts is None: if self.restarts is None: raise ValueError('No restarts value specified') else: self.restarts = restarts self.model = GPRegression(self.X, self.Y, self.kernel_expression.to_kernel()) with warnings.catch_warnings(): # Ignore known numerical warnings warnings.simplefilter('ignore') self.model.optimize_restarts(num_restarts=self.restarts, verbose=verbose, robust=robust, optimizer=optimiser, **kwargs) return self
def check_jacobian(self): try: import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt from GPy.models import GradientChecker, GPRegression except: raise self.skipTest("autograd not available to check gradients") def k(X, X2, alpha=1., lengthscale=None): if lengthscale is None: lengthscale = np.ones(X.shape[1]) exp = 0. for q in range(X.shape[1]): exp += ((X[:, [q]] - X2[:, [q]].T) / lengthscale[q])**2 #exp = np.sqrt(exp) return alpha * np.exp(-.5 * exp) dk = ag.elementwise_grad(lambda x, x2: k( x, x2, alpha=ke.variance.values, lengthscale=ke.lengthscale.values) ) dkdk = ag.elementwise_grad(dk, argnum=1) ke = GPy.kern.RBF(1, ARD=True) #ke.randomize() ke.variance = .2 #.randomize() ke.lengthscale[:] = .5 ke.randomize() X = np.linspace(-1, 1, 1000)[:, None] X2 = np.array([[0.]]).T np.testing.assert_allclose(ke.gradients_X([[1.]], X, X), dk(X, X)) np.testing.assert_allclose( ke.gradients_XX([[1.]], X, X).sum(0), dkdk(X, X)) np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2)) np.testing.assert_allclose( ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2)) m = GPRegression(self.X, self.Y) def f(x): m.X[:] = x return m.log_likelihood() def df(x): m.X[:] = x return m.kern.gradients_X(m.grad_dict['dL_dK'], X) def ddf(x): m.X[:] = x return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0) gc = GradientChecker(f, df, self.X) gc2 = GradientChecker(df, ddf, self.X) assert (gc.checkgrad()) assert (gc2.checkgrad())
def test_set_hyperparameters(): from GPy.models import GPRegression from GPy.kern import RBF as gRBF, Matern52 expected_hyperparameters = { 'length_scale': np.array([1]), 'sigma_n': np.array([1]), 'sigma_f': np.array([1]) } sur = GPySurrogate() # Default RBF kernel sur.model = GPRegression(Xtrain, ytrain) sur._set_hyperparameters_from_model() assert sur.hyperparameters == expected_hyperparameters # Product kernel sur.model = GPRegression(Xtrain, ytrain, kernel=gRBF(1) * Matern52(1)) sur._set_hyperparameters_from_model() assert sur.hyperparameters == expected_hyperparameters
class Stationary(Base): """ Matern32 kernel for sensor placement """ def __init__(self, n_restarts, kernel_name, verbose=True): super().__init__(verbose) self.__n_restarts = n_restarts self.__kernel_name = kernel_name def _Kernel(self, S1, S2=None): return self.__model.kern.K(S1, S2) def _fit(self, X, y, ECM=None): self._X = X self._y = y kern_dict = { 'm32': Matern32(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True), 'm52': Matern52(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True), 'rbf': RBF(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True) } self.__model = GPRegression(X, y, kern_dict[self.__kernel_name]) self.__model.optimize_restarts(self.__n_restarts, verbose=self._verbose) return self def _predict(self, X, return_cov=True): if not return_cov: return self.__model.predict(X)[0] return self.__model.predict(X, full_cov=True)
def maximize(self, iterations): self.start_time2 = time() for n in np.arange(iterations): X = np.array(self.x).reshape(-1, 1) if self.dim == 1 else np.array(self.x) Y = np.array(self.y).reshape(-1, 1) gpreg = GPRegression(X, Y, kernel=self.kernel, noise_var=self.noise_var) gpreg.optimize() # MAP for kernel hyper-parameters vals, par = self.minimize_negative_acquisition(gpreg) if self.dim == 1: self.give_new_point(vals, par) else: self.x.append(par[vals.argmin()]) self.y.append(self.objective(self.x[-1])) if self.log_info: print("%s step in BO; objective value: %.4f at %.4f; time: %.2f s." % (len(self.x), self.y[-1], self.x[-1], time() - self.start_time2 + self.end_time1 - self.start_time1)) if n % 10 == 0: self.a += 1 self.aquis_par.append(self.sample_aquis_param(self.a)) self.end_time2 = time()
def _fit_model(self, X, Y): model = GPRegression(X, Y, self.kernel) model.optimize(messages=False, max_f_eval=self.max_feval) self.model = model
def _initialize(self): x = np.asarray(self.inputs) y = np.asarray(self.outputs).reshape(-1, 1) self.kernel = self.kernel_class(input_dim=x.shape[1], ARD=True) self.gp = GPRegression(x, y, kernel=self.kernel, **self.kwargs)
class EmpiricalStickBreakingGPModel(Model): """ Compute the empirical probability given the counts, convert the empirical probability into a real valued vector that can be modeled with a GP. """ def __init__(self, K, kernel, D=1, alpha=1): self.alpha = alpha self.K = K self.D = D self.kernel = kernel def add_data(self, Z, X, optimize_hypers=True): assert Z.ndim == 2 and Z.shape[1] == self.D M = Z.shape[0] assert X.shape == (M, self.K) # Get the empirical probabilities (offset by 1 to ensure nonzero) pi_emp_train = (self.alpha+X).astype(np.float) / \ (self.alpha + X).sum(axis=1)[:,None] # Convert these to psi's self.Z = Z self.psi = np.array([pi_to_psi(pi) for pi in pi_emp_train]) # Compute the mean value of psi self.mu = self.psi.mean(axis=0) self.psi -= self.mu # Create the GP Regression model from GPy.models import GPRegression self.model = GPRegression(Z, self.psi, self.kernel) # Optimize the kernel parameters if optimize_hypers: self.model.optimize(messages=True) def initialize_from_data(self, initialize_to_mle=False): "For consistency" pass def generate(self, keep=True, **kwargs): raise NotImplementedError def collapsed_predict(self, Z_test): psi_pred, psi_pred_var = self.model.predict(Z_test, full_cov=False) psi_pred += self.mu pi_pred = np.array([psi_to_pi(psi) for psi in psi_pred]) return pi_pred, psi_pred, psi_pred_var def predict(self, Z_test): return self.collapsed_predict(Z_test) def predictive_log_likelihood(self, Z_test, X_test): pi_pred, _, _ = self.predict(Z_test) pll = 0 pll += gammaln(X_test.sum(axis=1)+1).sum() - gammaln(X_test+1).sum() pll += np.nansum(X_test * np.log(pi_pred)) return pll, pi_pred
class GaussianProcessRewardModel(RewardModel): """ Models rewards with a Gaussian process regressor. Implemented with a modified version of scikit-learn's Gaussian Process Regressor class. The GP is updated online as samples are added. As such, hyperparameters for the GP are fit in batch after a threshold number of samples are collected. The hyperparameters are then refined afterwards as more samples are added until the number of samples passes an upper threshold, after which the hyperparameters are no longer updated. This helps avoid highly expensive refinement which has computational complexity of O(N^3) in number of samples. Parameters: ----------- min_samples: integer (default 100) The number of samples after which initial batch hyperparameter fitting is performed. batch_retries: integer (default 20) The number of random restarts for the initial hyperparameter fit. refine_ll_delta: numeric (default 1.0) The hyperparameters are refined after the average GP marginal log-likelihood decreases by this much since the last refinement. max_samples: integer (default 1000) The number of samples after which hyperparameters are no longer refined. Other Keyword Parameters: ------------------- Refer to sklearn.gaussian_process.GaussianProcessRegressor's __init__ """ def __init__(self, min_samples=10, batch_retries=19, enable_refine=True, refine_period=0, refine_ll_delta=1.0, refine_retries=0, kernel_type='rbf', verbose=False, **kwargs): self.min_samples = min_samples self.hp_batch_retries = batch_retries self.enable_refine = enable_refine self.hp_refine_ll_delta = float(refine_ll_delta) self.hp_refine_retries = refine_retries self.hp_refine_period = refine_period self.last_refine_iter = 0 self.hp_init = False self.last_ll = None self.kwargs = kwargs self.verbose = bool(verbose) if kernel_type.lower() == 'rbf': self.kernel_class = RBF elif kernel_type.lower() == 'matern': self.kernel_class = Matern32 else: raise ValueError('Unknown kernel_type: ' + kernel_type) self.kernel = None self.gp = None # Init later self.inputs = [] self.outputs = [] def _initialize(self): x = np.asarray(self.inputs) y = np.asarray(self.outputs).reshape(-1, 1) self.kernel = self.kernel_class(input_dim=x.shape[1], ARD=True) self.gp = GPRegression(x, y, kernel=self.kernel, **self.kwargs) @property def num_samples(self): return len(self.inputs) def average_log_likelihood(self): # NOTE For some reason this returns the negative log-likelihood if self.gp is None or self.num_samples < self.min_samples: return None return -self.gp.log_likelihood() / self.num_samples def report_sample(self, x, reward): self.inputs.append(x) self.outputs.append(reward) if self.gp is None: self.batch_optimize() else: x = np.asarray(self.inputs) y = np.asarray(self.outputs).reshape(-1, 1) self.gp.set_XY(x, y) # Wait until we've initialized if not self.hp_init: return current_ll = self.average_log_likelihood() if self.verbose: rospy.loginfo('Prev LL: %f Curr LL: %f', self.last_ll, current_ll) self.check_refine(current_ll) def check_refine(self, current_ll): if not self.enable_refine: return if current_ll > self.last_ll: self.last_ll = current_ll # If the LL has decreased by refine_ll_delta delta_achieved = current_ll < self.last_ll - self.hp_refine_ll_delta # If it has been refine_period samples since last refinement period_achieved = self.num_samples > self.last_refine_iter + self.hp_refine_period if delta_achieved or period_achieved: self.batch_optimize(self.hp_refine_retries + 1) self.last_refine_iter = self.num_samples def batch_optimize(self, n_restarts=None): if self.num_samples < self.min_samples: return if n_restarts is None: n_restarts = self.hp_batch_retries + 1 # NOTE Warm-restarting seems to get stuck in local optima, possibly from mean? # if self.gp is None: self._initialize() if self.verbose: rospy.loginfo('Batch optimizing with %d restarts...', n_restarts) self.gp.optimize_restarts(optimizer='bfgs', messages=False, num_restarts=n_restarts) if self.verbose: rospy.loginfo('Optimization complete. Model:\n%s\n Kernel:\n%s', str(self.gp), str(self.kernel.lengthscale)) self.hp_init = True self.last_ll = self.average_log_likelihood() def predict(self, x, return_std=False): if self.gp is None: #raise RuntimeError('Model is not fitted yet!') pred_mean = 0 pred_std = float('inf') else: x = np.asarray(x) if len(x.shape) == 1: x = x.reshape(1, -1) pred_mean, pred_var = self.gp.predict_noiseless(x) # To catch negative variances if pred_var < 0: rospy.logwarn('Negative variance %f rounding to 0', pred_var) pred_var = 0 pred_std = np.sqrt(pred_var) if return_std: return np.squeeze(pred_mean), np.squeeze(pred_std) else: return np.squeeze(pred_mean) def clear(self): self.inputs = [] self.outputs = [] self.kernel = None self.gp = None def fit(self, X, y): """Initialize the model from lists of inputs and corresponding rewards. Parameters ---------- X : Iterable of inputs Y : Iterable of corresponding rewards """ if len(X) != len(y): raise RuntimeError('X and Y lengths must be the same!') self.inputs = list(X) self.outputs = list(y) self._initialize() self.batch_optimize(self.hp_batch_retries) @property def num_samples(self): return len(self.inputs) @property def model(self): return self.gp