class TripathyGP(ConfidenceBoundModel): """ Base class for GP optimization. Handles common functionality. """ def set_new_kernel(self, d, W=None, variance=None, lengthscale=None): self.kernel = TripathyMaternKernel( real_dim=self.domain.d, active_dim=d, W=W, variance=variance, lengthscale=lengthscale ) def set_new_gp(self, noise_var=None): self.gp = GPRegression( input_dim=self.domain.d, kernel=self.kernel, noise_var=noise_var if noise_var else 2., # self.config.noise_var, calculate_gradients=self.config.calculate_gradients ) def set_new_gp_and_kernel(self, d, W, variance, lengthscale, noise_var): self.set_new_kernel(d, W, variance, lengthscale) self.set_new_gp(noise_var) def __init__(self, domain, calculate_always=False): super(TripathyGP, self).__init__(domain) self.optimizer = TripathyOptimizer() # TODO: d is chosen to be an arbitrary value rn! self.set_new_gp_and_kernel(2, None, None, None, None) # number of data points self.t = 0 self.kernel = self.kernel.copy() self._woodbury_chol = np.asfortranarray(self.gp.posterior._woodbury_chol) # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._Y = np.empty(shape=(0,1)) self._beta = 2 self._bias = self.config.bias self.calculate_always = calculate_always @property def beta(self): return self._beta @property def scale(self): if self.gp.kern.name == 'sum': return sum([part.variance for part in self.gp.kern.parts]) else: return np.sqrt(self.gp.kern.variance) @property def bias(self): return self._bias def _get_gp(self): return GPRegression(self.domain.d, self.kernel, noise_var=self.config.noise_var, calculate_gradients=self.config.calculate_gradients) def add_data(self, x, y): """ Add a new function observation to the GPs. Parameters ---------- x: 2d-array y: 2d-array """ self.i = 1 if not ("i" in dir(self)) else self.i + 1 # print("Add data ", self.i) x = np.atleast_2d(x) y = np.atleast_2d(y) self.set_data(x, y, append=True) # TODO: check if this is called anyhow! def optimize(self): self._update_beta() def _update_cache(self): # if not self.config.calculate_gradients: self._woodbury_chol = np.asfortranarray(self.gp.posterior._woodbury_chol) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._update_beta() def _optimize_bias(self): self._bias = minimize(self._bias_loss, self._bias, method='L-BFGS-B')['x'].copy() self._set_bias(self._bias) logger.info(f"Updated bias to {self._bias}") def _bias_loss(self, c): # calculate mean and norm for new bias via a new woodbury_vector new_woodbury_vector,_= dpotrs(self._woodbury_chol, self._Y - c, lower=1) K = self.gp.kern.K(self.gp.X) mean = np.dot(K, new_woodbury_vector) norm = new_woodbury_vector.T.dot(mean) # loss is least_squares_error + norm return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm) def _set_bias(self, c): self._bias = c self.gp.set_Y(self._Y - c) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() def _update_beta(self): logdet = self._get_logdet() logdet_priornoise = self._get_logdet_prior_noise() self._beta = np.sqrt(2 * np.log(1/self.delta) + (logdet - logdet_priornoise)) + self._norm() def _optimize_var(self): # fix all parameters for p in self.gp.parameters: p.fix() if self.gp.kern.name == 'sum': for part in self.gp.kern.parts: part.variance.unfix() else: self.gp.kern.variance.unfix() self.gp.optimize() if self.gp.kern.name == 'sum': values = [] for part in self.gp.kern.parts: values.append(np.asscalar(part.variance.values)) else: values = np.asscalar(self.gp.kern.variance.values) logger.info(f"Updated prior variance to {values}") # unfix all parameters for p in self.gp.parameters: p.unfix() def _get_logdet(self): return 2.*np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol))) def _get_logdet_prior_noise(self): return self.t * np.log(self.gp.likelihood.variance.values) def mean_var(self, x): """Recompute the confidence intervals form the GP. Parameters ---------- context: ndarray Array that contains the context used to compute the sets """ x = np.atleast_2d(x) if self.config.calculate_gradients or True: mean,var = self.gp.predict_noiseless(x) else: mean,var = self._raw_predict(x) return mean + self._bias, var def mean_var_grad(self, x): return self.gp.predictive_gradients(x) def var(self, x): return self.mean_var(x)[1] def mean(self, x): return self.mean_var(x)[0] def set_data(self, X, Y, append=True): if append: X = np.concatenate((self.gp.X, X)) Y = np.concatenate((self.gp.Y, Y)) # Do our optimization now if self.i % 100 == 99 or self.calculate_always: import time start_time = time.time() print("Adding data: ", self.i) # TODO: UNCOMMENT THE FOLLOWING LINE AGAIN! # This is just to check if tripathy conforms with the other version # W_hat, sn, l, s, d = self.optimizer.find_active_subspace(X, Y) d = 2 W_hat = np.asarray([ [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912], [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551] ]).T s = 1. l = 1.5 sn = 2. # 0.01 #self.config.noise_var print("--- %s seconds ---" % (time.time() - start_time)) # Overwrite GP and kernel values self.set_new_gp_and_kernel(d=d, W=W_hat, variance=s, lengthscale=l, noise_var=sn) # TODO: Should the following not come before the optimization? self.gp.set_XY(X, Y) self.t = X.shape[0] self._update_cache() def sample(self, X=None): class GPSampler: def __init__(self, X, Y, kernel, var): self.X = X self.Y = Y self.N = var * np.ones(shape=Y.shape) self.kernel = kernel self.m = GPy.models.GPHeteroscedasticRegression(self.X, self.Y, self.kernel) self.m['.*het_Gauss.variance'] = self.N def __call__(self, X): X = np.atleast_2d(X) sample = np.empty(shape=(X.shape[0], 1)) # iteratively generate sample values for all x in x_test for i, x in enumerate(X): sample[i] = self.m.posterior_samples_f(x.reshape((1, -1)), size=1) # add observation as without noise self.X = np.vstack((self.X, x)) self.Y = np.vstack((self.Y, sample[i])) self.N = np.vstack((self.N, 0)) # recalculate model self.m = GPy.models.GPHeteroscedasticRegression(self.X, self.Y, self.kernel) self.m['.*het_Gauss.variance'] = self.N # Set the noise parameters to the error in Y return sample return GPSampler(self.gp.X.copy(), self.gp.Y.copy(), self.kernel, self.gp.likelihood.variance) def _raw_predict(self, Xnew): Kx = self.kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape)==1: mu = mu.reshape(-1,1) Kxx = self.kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:,None] return mu, var def _raw_predict_covar(self, Xnew, Xcond): Kx = self.kernel.K(self._X, np.vstack((Xnew,Xcond))) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] n = Xnew.shape[0] tmp1 = tmp[:,:n] tmp2 = tmp[:,n:] Kxx = self.kernel.K(Xnew, Xcond) var = Kxx - (tmp1.T).dot(tmp2) Kxx_new = self.kernel.Kdiag(Xnew) var_Xnew = (Kxx_new - np.square(tmp1).sum(0))[:,None] return var_Xnew, var def _norm(self): norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(self._woodbury_vector) return np.asscalar(np.sqrt(norm)) def __getstate__(self): self_dict = self.__dict__.copy() del self_dict['gp'] # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky return self_dict
class BoringGP(ConfidenceBoundModel): """ Base class for GP optimization. Handles common functionality. """ def create_new_kernel(self, active_d, passive_d=0, W=None, variance=None, lengthscale=None): print("Creating a new kernel!") self.kernel = Matern32(input_dim=active_d, variance=variance, lengthscale=lengthscale, ARD=True, active_dims=np.arange(active_d), name="active_subspace_kernel") for i in range(passive_d): cur_kernel = Matern32(input_dim=1, variance=variance, lengthscale=1., ARD=True, active_dims=[active_d + i], name="passive_subspace_kernel_" + str(i)) self.kernel += cur_kernel print("New resulting kernel is: ", self.kernel) def create_new_gp(self, noise_var=None): # Take over data from the old GP, if existent print("Creating a new gp!") self.gp = GPRegression( self.domain.d, self.kernel, noise_var= 0.1, # noise_var if noise_var is not None else self.config.noise_var, calculate_gradients=self.config.calculate_gradients) def create_new_gp_and_kernel(self, active_d, passive_d, W, variance, lengtscale, noise_var): self.create_new_kernel(active_d=active_d, passive_d=passive_d, W=W, variance=variance, lengthscale=lengtscale) self.create_new_gp(noise_var=noise_var) print("Got kernel: ") print(self.kernel) def __init__(self, domain, calculate_always=False): super(BoringGP, self).__init__(domain) print("Starting tripathy model!") self.gp = None self.active_d = None self.W_hat = None self.variance = None self.lengthscale = None self.noise_var = None self.create_new_gp_and_kernel( active_d=self.domain.d if self.active_d is None else self.active_d, passive_d=0, W=np.eye(self.domain.d) if self.active_d is None else self.W, variance=1.0 if self.active_d is None else self.variance, lengtscale=1.5 if self.active_d is None else self.lengthscale, noise_var=None if self.active_d is None else self.noise_var, ) # Create the datasaver GP placeholder_kernel = RBF(input_dim=self.domain.d) self.datasaver_gp = GPRegression(input_dim=self.domain.d, kernel=placeholder_kernel, noise_var=0.1, calculate_gradients=False) # number of data points self.t = 0 self.i = 0 self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol ) # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._Y = np.empty(shape=(0, 1)) self._beta = 2 self._bias = self.config.bias self.calculate_always = calculate_always self.optimizer = TripathyOptimizer() # Obligatory values @property def beta(self): return self._beta @property def scale(self): if self.gp.kern.name == 'sum': return sum([part.variance for part in self.gp.kern.parts]) else: return np.sqrt(self.gp.kern.variance) @property def bias(self): return self._bias def _get_gp(self): return self.gp def add_data(self, x, y): """ Add a new function observation to the GPs. Parameters ---------- x: 2d-array y: 2d-array """ x = np.atleast_2d(x) y = np.atleast_2d(y) assert x.shape[ 1] == self.domain.d, "Input dimension is not the one of the domain!" self.i += 1 self.set_data(x, y, append=True) def optimize(self): self._update_beta() def _update_cache(self): # if not self.config.calculate_gradients: self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() # TODO: should it be gp, or datasaver_gp? self._update_beta() def _update_beta(self): logdet = self._get_logdet() logdet_priornoise = self._get_logdet_prior_noise() self._beta = np.sqrt(2 * np.log(1 / self.delta) + (logdet - logdet_priornoise)) + self._norm() def _get_logdet(self): return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol))) def _get_logdet_prior_noise(self): return self.t * np.log(self.gp.likelihood.variance.values) def mean_var(self, x): """Recompute the confidence intervals form the GP. Parameters ---------- context: ndarray Array that contains the context used to compute the sets """ x = np.atleast_2d(x) # Need to project x to the matrix( if self.W_hat is not None: x = np.dot(x, self.W_hat) if self.config.calculate_gradients and False: # or True: mean, var = self.gp.predict_noiseless(x) else: mean, var = self._raw_predict(x) return mean + self._bias, var def var(self, x): return self.mean_var(x)[1] def mean(self, x): return self.mean_var(x)[0] # TODO: Implement the thing finder in here! def set_data(self, X, Y, append=True): if append: X = np.concatenate((self.datasaver_gp.X, X), axis=0) Y = np.concatenate((self.datasaver_gp.Y, Y), axis=0) self._set_datasaver_data(X, Y) if self.i % 500 == 100 or self.calculate_always: print("Adding datapoint: ", self.i) # print("Datasaver X is: ") # print(self.datasaver_gp.X) # # print("Datasaver Y is: ") # print(self.datasaver_gp.Y) # # print("That's it") # exit(0) self.A, self.noise_var, self.lengthscale, self.variance, self.active_d = self.optimizer.find_active_subspace( X, Y, load=False) gc.collect() passive_dimensions = max(self.domain.d - self.active_d, 0) passive_dimensions = min(passive_dimensions, 1) # Generate the subspace projection # Generate A^{bot} if there's more dimensions if passive_dimensions > 0: self.AT = generate_orthogonal_matrix_to_A(A=self.A, n=passive_dimensions) self.W_hat = np.concatenate((self.A, self.AT), axis=1) else: self.AT = None self.W_hat = self.A assert not np.isnan( self.W_hat).all(), ("The projection matrix contains nan's!", self.Q) assert self.W_hat.shape == (self.domain.d, self.active_d + passive_dimensions), ( "Created wrong projectoin shape: ", self.At.shape, self.active_d, passive_dimensions) print("Found parameters are: ") print("W: ", self.W_hat) print("noise_var: ", self.noise_var) print("lengthscale: ", self.lengthscale) print("variance: ", self.variance) # For the sake of creating a kernel with new dimensions! self.create_new_gp_and_kernel(active_d=self.active_d, passive_d=passive_dimensions, W=self.W_hat, variance=self.variance, lengtscale=self.lengthscale, noise_var=self.noise_var) if self.W_hat is None: self._set_data(X, Y) else: Z = np.dot(X, self.W_hat) self._set_data(Z, Y) # self.gp.optimize() def _set_datasaver_data(self, X, Y): self.datasaver_gp.set_XY(X, Y) def _set_data(self, X, Y): self.gp.set_XY(X, Y) self.t = X.shape[0] self._update_cache() def _raw_predict(self, Xnew): Kx = self.kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape) == 1: mu = mu.reshape(-1, 1) Kxx = self.kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:, None] return mu, var def _norm(self): norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot( self._woodbury_vector) return np.asscalar(np.sqrt(norm)) def __getstate__(self): self_dict = self.__dict__.copy() del self_dict[ 'gp'] # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky return self_dict
class ClassicalActiveSubspaceGP(ConfidenceBoundModel): """ Base class for GP optimization. Handles common functionality. """ def set_new_kernel(self, d, W=None, variance=None, lengthscale=None): self.kernel = TripathyMaternKernel(real_dim=self.domain.d, active_dim=d, W=W, variance=variance, lengthscale=lengthscale) def set_new_gp(self, noise_var=None): self.gp = GPRegression( input_dim=self.domain.d, kernel=self.kernel, noise_var=noise_var if noise_var else 2., # TODO: replace with config value! calculate_gradients=True # TODO: replace with config value! ) def set_new_gp_and_kernel(self, d, W, variance, lengthscale, noise_var): self.set_new_kernel(d, W, variance, lengthscale) self.set_new_gp(noise_var) # # from .t_kernel import TripathyMaternKernel # TripathyMaternKernel.__module__ = "tripathy.src.t_kernel" def __init__(self, domain): super(ClassicalActiveSubspaceGP, self).__init__(domain) self.optimizer = TripathyOptimizer() # TODO: d is chosen to be an arbitrary value rn! # self.set_new_kernel(2, None, None) # self.set_new_gp(None) self.set_new_gp_and_kernel(2, None, None, None, None) # calling of the kernel # self.gp = self._get_gp() # TODO: does this actually create a new gp? # number of data points self.t = 0 self.kernel = self.kernel.copy() self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol ) # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._Y = np.empty(shape=(0, 1)) self._beta = 2 self._bias = self.config.bias @property def beta(self): return self._beta @property def scale(self): if self.gp.kern.name == 'sum': return sum([part.variance for part in self.gp.kern.parts]) else: return np.sqrt(self.gp.kern.variance) @property def bias(self): return self._bias def _get_gp(self): return GPRegression( self.domain.d, self.kernel, noise_var=self.config.noise_var, calculate_gradients=self.config.calculate_gradients) def add_data(self, x, y): """ Add a new function observation to the GPs. Parameters ---------- x: 2d-array y: 2d-array """ self.i = 1 if not ("i" in dir(self)) else self.i + 1 print("Add data ", self.i) x = np.atleast_2d(x) y = np.atleast_2d(y) self.set_data(x, y, append=True) # TODO: check if this is called anyhow! def optimize(self): # if self.config.optimize_bias: # self._optimize_bias() # if self.config.optimize_var: # self._optimize_var() # self.optimizer.find_active_subspace(self.X, self.Y) self._update_beta() def _update_cache(self): # if not self.config.calculate_gradients: self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._update_beta() def _optimize_bias(self): self._bias = minimize(self._bias_loss, self._bias, method='L-BFGS-B')['x'].copy() self._set_bias(self._bias) logger.info(f"Updated bias to {self._bias}") def _bias_loss(self, c): # calculate mean and norm for new bias via a new woodbury_vector new_woodbury_vector, _ = dpotrs(self._woodbury_chol, self._Y - c, lower=1) K = self.gp.kern.K(self.gp.X) mean = np.dot(K, new_woodbury_vector) norm = new_woodbury_vector.T.dot(mean) # loss is least_squares_error + norm return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm) def _set_bias(self, c): self._bias = c self.gp.set_Y(self._Y - c) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() def _update_beta(self): logdet = self._get_logdet() logdet_priornoise = self._get_logdet_prior_noise() self._beta = np.sqrt(2 * np.log(1 / self.delta) + (logdet - logdet_priornoise)) + self._norm() def _optimize_var(self): # fix all parameters for p in self.gp.parameters: p.fix() if self.gp.kern.name == 'sum': for part in self.gp.kern.parts: part.variance.unfix() else: self.gp.kern.variance.unfix() self.gp.optimize() if self.gp.kern.name == 'sum': values = [] for part in self.gp.kern.parts: values.append(np.asscalar(part.variance.values)) else: values = np.asscalar(self.gp.kern.variance.values) logger.info(f"Updated prior variance to {values}") # unfix all parameters for p in self.gp.parameters: p.unfix() def _get_logdet(self): return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol))) def _get_logdet_prior_noise(self): return self.t * np.log(self.gp.likelihood.variance.values) def mean_var(self, x): """Recompute the confidence intervals form the GP. Parameters ---------- context: ndarray Array that contains the context used to compute the sets """ x = np.atleast_2d(x) if self.config.calculate_gradients: mean, var = self.gp.predict_noiseless(x) else: mean, var = self._raw_predict(x) return mean + self._bias, var def mean_var_grad(self, x): return self.gp.predictive_gradients(x) def var(self, x): return self.mean_var(x)[1] # TODO: is this a bug? def predictive_var(self, X, X_cond, S_X, var_Xcond=None): X = np.atleast_2d(X) X_cond = np.atleast_2d(X_cond) var_X, KXX = self._raw_predict_covar(X, X_cond) if var_Xcond is None: var_Xcond = self.var(X_cond) return var_Xcond - KXX * KXX / (S_X * S_X + var_X) def mean(self, x): return self.mean_var(x)[0] def set_data(self, X, Y, append=True): if append: X = np.concatenate((self.gp.X, X)) Y = np.concatenate((self.gp.Y, Y)) # Do our optimization now if self.i % 3 == 0: import time start_time = time.time() W_hat, sn, l, s, d = self.optimizer.find_active_subspace(X, Y) print("--- %s seconds ---" % (time.time() - start_time)) # Overwrite GP and kernel values # TODO: W_hat not used ---- self.set_new_gp_and_kernel(d=d, W=W_hat, variance=s, lengthscale=l, noise_var=sn) self.gp.set_XY(X, Y) self.t = X.shape[0] self._update_cache() # TODO: merge all the following code with the current function! # print("Looking for optimal subspace!") # W_hat, sn, l, s, d = self.optimizer.find_active_subspace(X=X, Y=Y) # # print("Found optimal subspace") # # # Set the newly found hyperparameters everywhere # # Not found by pycharm bcs the kernel is an abstract object as of now # # self.kernel.update_params(W=W_hat, s=s, l=l) # # self.gp.kern.update_params(W=W_hat, s=s, l=l) # # # Create a new GP (bcs this is spaghetti code!) # self.set_new_kernel_and_gp( # d=d, # variance=s, # lengthscale=l, # noise_var=sn # ) def sample(self, X=None): class GPSampler: def __init__(self, X, Y, kernel, var): self.X = X self.Y = Y self.N = var * np.ones(shape=Y.shape) self.kernel = kernel self.m = GPy.models.GPHeteroscedasticRegression( self.X, self.Y, self.kernel) self.m['.*het_Gauss.variance'] = self.N def __call__(self, X): X = np.atleast_2d(X) sample = np.empty(shape=(X.shape[0], 1)) # iteratively generate sample values for all x in x_test for i, x in enumerate(X): sample[i] = self.m.posterior_samples_f(x.reshape((1, -1)), size=1) # add observation as without noise self.X = np.vstack((self.X, x)) self.Y = np.vstack((self.Y, sample[i])) self.N = np.vstack((self.N, 0)) # recalculate model self.m = GPy.models.GPHeteroscedasticRegression( self.X, self.Y, self.kernel) self.m[ '.*het_Gauss.variance'] = self.N # Set the noise parameters to the error in Y return sample return GPSampler(self.gp.X.copy(), self.gp.Y.copy(), self.kernel, self.gp.likelihood.variance) def _raw_predict(self, Xnew): Kx = self.kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape) == 1: mu = mu.reshape(-1, 1) Kxx = self.kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:, None] return mu, var def _raw_predict_covar(self, Xnew, Xcond): Kx = self.kernel.K(self._X, np.vstack((Xnew, Xcond))) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] n = Xnew.shape[0] tmp1 = tmp[:, :n] tmp2 = tmp[:, n:] Kxx = self.kernel.K(Xnew, Xcond) var = Kxx - (tmp1.T).dot(tmp2) Kxx_new = self.kernel.Kdiag(Xnew) var_Xnew = (Kxx_new - np.square(tmp1).sum(0))[:, None] return var_Xnew, var def _norm(self): norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot( self._woodbury_vector) return np.asscalar(np.sqrt(norm)) def __getstate__(self): self_dict = self.__dict__.copy() del self_dict[ 'gp'] # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky return self_dict
class TripathyGP(ConfidenceBoundModel): """ Base class for GP optimization. Handles common functionality. """ # JOHANNES: Die folgenden drei funktionen # sind helper functions welche den kernel und gp neu-spawnend, da wir das später noch einmal machen werden müssen def create_new_kernel(self, active_d, variance, lengthscale): print("Creating a new kernel!") self.kernel = Matern32( input_dim=active_d, variance=variance, lengthscale=lengthscale, ARD=True, active_dims=np.arange(active_d), name="active_subspace_kernel" ) print("Kernel is: ", self.kernel) def create_new_gp(self, active_d, noise_var): # Take over data from the old GP, if existent print("Creating a new gp!") self.gp = GPRegression( active_d, self.kernel, noise_var=noise_var, # noise_var if noise_var is not None else self.config.noise_var, calculate_gradients=False # self.config.calculate_gradients ) def create_new_gp_and_kernel(self, active_d, variance, lengthscale, noise_var): self.create_new_kernel( active_d=active_d, variance=variance, lengthscale=lengthscale ) self.create_new_gp( active_d=active_d, noise_var=noise_var ) print("Got kernel: ") print(self.kernel) def __init__(self, domain, calculate_always=False): super(TripathyGP, self).__init__(domain) print("Starting tripathy model!") self.gp = None # Just for completeness # self.active_d = None # self.W_hat = None # self.variance = None # self.lengthscale = None # self.noise_var = None # DEFAULT SETTINGS self.W_hat = np.eye(self.domain.d) # print(self.config.kernels[0][1]) self.noise_var = 0.005 self.lengthscale = 2.5 self.variance = 1.0 self.active_d = self.domain.d self.W_hat = np.asarray([ [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912], [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551] ]) self.noise_var = 0.005 self.lengthscale = 2.5 self.variance = 1.0 self.active_d = 2 self.create_new_gp_and_kernel( active_d=self.active_d, variance=self.variance, lengthscale=self.lengthscale, noise_var=self.noise_var ) # Create the datasaver GP placeholder_kernel = RBF( input_dim=self.domain.d ) self.datasaver_gp = GPRegression( input_dim=self.domain.d, kernel=placeholder_kernel, noise_var=self.noise_var, calculate_gradients=False ) # JOHANNES: Die folgenden Operationen habe ich übernommen aus dem febo GP # number of data points self.t = 0 self.i = 0 self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol) # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._Y = np.empty(shape=(0, 1)) self._beta = 2 self._bias = self.config.bias self.calculate_always = calculate_always self.optimizer = TripathyOptimizer() # self.set_data(self._X, self._Y) # Obligatory values @property def beta(self): return self._beta @property def scale(self): if self.gp.kern.name == 'sum': return sum([part.variance for part in self.gp.kern.parts]) else: return np.sqrt(self.gp.kern.variance) @property def bias(self): return self._bias def _get_gp(self): return self.gp def add_data(self, x, y): """ Add a new function observation to the GPs. Parameters ---------- x: 2d-array y: 2d-array """ x = np.atleast_2d(x) y = np.atleast_2d(y) assert x.shape[1] == self.domain.d, "Input dimension is not the one of the domain!" self.i += 1 self.set_data(x, y, append=True) def optimize(self): self._update_beta() def _update_cache(self): # if not self.config.calculate_gradients: self._woodbury_chol = np.asfortranarray(self.gp.posterior._woodbury_chol) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() # TODO: should it be gp, or datasaver_gp? self._update_beta() def _update_beta(self): logdet = self._get_logdet() logdet_priornoise = self._get_logdet_prior_noise() self._beta = np.sqrt(2 * np.log(1 / self.delta) + (logdet - logdet_priornoise)) + self._norm() def _get_logdet(self): return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol))) def _get_logdet_prior_noise(self): return self.t * np.log(self.gp.likelihood.variance.values) def mean_var(self, x): """Recompute the confidence intervals form the GP. Parameters ---------- context: ndarray Array that contains the context used to compute the sets """ x = np.atleast_2d(x) x = np.dot(x, self.W_hat.T) assert x.shape[1] == self.active_d, ( "The projected dimension does not equal to the active dimension: ", (self.active_d, x.shape)) if self.config.calculate_gradients and False: # or True: mean, var = self.gp.predict_noiseless(x) else: mean, var = self._raw_predict(x) return mean + self._bias, var def var(self, x): return self.mean_var(x)[1] def mean(self, x): return self.mean_var(x)[0] # TODO: Implement the thing finder in here! def set_data(self, X, Y, append=True): if append: X = np.concatenate((self.datasaver_gp.X, X), axis=0) Y = np.concatenate((self.datasaver_gp.Y, Y), axis=0) self._set_datasaver_data(X, Y) if self.i % 500 == 1 or self.calculate_always: print("Adding datapoint: ", self.i) #################### # PRETRAINED VALUES #################### # CAMELBACK # self.W_hat = np.asarray( # [[-0.33867927, -0.46107057], # [0.45801778, 0.2080514], # [0.26060095, 0.65276822], # [0.56757381, -0.28423894], # [0.53428755, -0.48706305] # ]).T # self.noise_var = 0.005 # self.lengthscale = np.asarray([1.5, 0.5]) # self.variance = 44.0 # self.active_d = 2 ############# # REAL VALUES ############# if self.domain.d == 2: self.W_hat = np.asarray([ [-0.46375963, -0.88596106], [-0.88596106, 0.46375963] ]) self.noise_var = 0.005 self.lengthscale = 2.5 self.variance = 1.0 self.active_d = 2 elif self.domain.d == 3: # CAMELBACK-5D self.W_hat = np.asarray([ [-0.46554187, -0.36224966, 0.80749362], [0.69737806, -0.711918, 0.08268378] ]) self.noise_var = 0.005 self.lengthscale = 2.5 self.variance = 1.0 self.active_d = 2 elif self.domain.d == 4: # CAMELBACK-4D self.W_hat = np.asarray([ [-0.50445148, -0.40016722, -0.48737089, -0.58980041], [-0.20042413, -0.65288502, -0.12700055, 0.71933454] ]) self.noise_var = 0.005 self.lengthscale = 2.5 self.variance = 1.0 self.active_d = 2 elif self.domain.d == 5: # CAMELBACK-3D self.W_hat = np.asarray([ [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912], [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551] ]) self.noise_var = 0.005 self.lengthscale = 2.5 self.variance = 1.0 self.active_d = 2 else: print("Something went terribly wrong!") exit(0) # self.W_hat, self.noise_var, self.lengthscale, self.variance, self.active_d = self.optimizer.find_active_subspace( # X, Y, load=False) gc.collect() print("USING CAMELBACK FUNCTION IN HIGHER D ::: ", self.domain.d) print("Found parameters are: ") print("W: ", self.W_hat) print("noise_var: ", self.noise_var) print("lengthscale: ", self.lengthscale) print("variance: ", self.variance) # For the sake of creating a kernel with new dimensions! self.create_new_gp_and_kernel( active_d=self.active_d, variance=self.variance, lengthscale=self.lengthscale, noise_var=self.noise_var ) if self.i % 500 == 299: print("TRIPATHY :: Likelihood of the current GP is: ", self.gp.log_likelihood()) assert X.shape[1] == self.W_hat.shape[1], (X.shape, self.W_hat.shape) # print(X.shape, self.W_hat.shape) Z = np.dot(X, self.W_hat.T) assert Z.shape[1] == self.active_d, ( "Projected Z does not conform to active dimension", (Z.shape, self.active_d)) self._set_data(Z, Y) def _set_datasaver_data(self, X, Y): self.datasaver_gp.set_XY(X, Y) def _set_data(self, X, Y): self.gp.set_XY(X, Y) self.t = X.shape[0] self._update_cache() def _raw_predict(self, Xnew): assert Xnew.shape[1] == self.active_d, ("Somehow, the input was not project") Kx = self.kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape) == 1: mu = mu.reshape(-1, 1) Kxx = self.kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:, None] return mu, var def _norm(self): norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot(self._woodbury_vector) return np.asscalar(np.sqrt(norm)) def __getstate__(self): self_dict = self.__dict__.copy() del self_dict[ 'gp'] # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky return self_dict
class BoringGP(ConfidenceBoundModel): """ Base class for GP optimization. Handles common functionality. """ def create_kernels(self, active_dimensions, passive_dimensions, first=False, k_variance=None, k_lengthscales=None): # Use tripathy kernel here instead, because it includes W in it's stuff active_kernel = Matern32( input_dim=active_dimensions, variance=1. if k_variance is None else k_variance, lengthscale=1.5 if k_lengthscales is None else k_lengthscales, # 0.5, ARD=True, active_dims=np.arange(active_dimensions), name="active_subspace_kernel") self.kernel = active_kernel if first: # TODO: need to change this back! # Now adding the additional kernels: for i in range(passive_dimensions): cur_kernel = RBF( input_dim=1, variance=2., lengthscale=0.5, # 0.5, ARD=False, active_dims=[active_dimensions + i], name="passive_subspace_kernel_dim_" + str(i)) self.kernel += cur_kernel print("Got kernel: ") print(self.kernel) def create_gp(self): self.gp = GPRegression(input_dim=self.domain.d, kernel=self.kernel, noise_var=0.01, calculate_gradients=False) # Let the GP take over datapoints from the datasaver! X = self.datasaver_gp.X Y = self.datasaver_gp.Y # Apply the Q transform if it was spawned already! if self.Q is not None: X = np.dot(X, self.Q) if self.Q is not None: assert X.shape[1] >= 2, ("Somehow, Q was not projected!", X.shape, 2) # TODO: change this back to ==! self.gp.set_XY(X, Y) self._update_cache() def create_gp_and_kernels(self, active_dimensions, passive_dimensions, first=False, k_variance=None, k_lengthscales=None): self.create_kernels(active_dimensions, passive_dimensions, first=first, k_variance=k_variance, k_lengthscales=k_lengthscales) self.create_gp() # From here on, it's the usual functions def __init__(self, domain, always_calculate=False): super(BoringGP, self).__init__(domain) # passive projection matrix still needs to be created first! # print("WARNING: CONFIG MODE IS: ", config.DEV) self.burn_in_samples = 101 # 101 # 102 self.recalculate_projection_every = 101 self.active_projection_matrix = None self.passive_projection_matrix = None self.Q = None # some other parameters that are cached self.t = 0 # Setting the datasaver (infrastructure which allows us to save the data to be projected again and again) placeholder_kernel = RBF(input_dim=self.domain.d) self.datasaver_gp = GPRegression(input_dim=self.domain.d, kernel=placeholder_kernel, noise_var=0.01, calculate_gradients=False) # Create a new kernel and create a new GP self.create_gp_and_kernels(self.domain.d, 0, first=True) # self.domain.d - 2 # Some post-processing self.kernel = self.kernel.copy() self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol ) # we create a copy of the matrix in fortranarray, such that we can directly pass it to lapack dtrtrs without doing another copy self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._Y = np.empty(shape=(0, 1)) self._bias = self.config.bias self.always_calculate = always_calculate @property def beta(self): return np.sqrt(np.log(self.datasaver_gp.X.shape[0])) @property def scale(self): if self.gp.kern.name == 'sum': return sum([part.variance for part in self.gp.kern.parts]) else: return np.sqrt(self.gp.kern.variance) @property def bias(self): return self._bias def _get_gp(self): return self.gp # GPRegression(self.domain.d, self.kernel, noise_var=self.config.noise_var, calculate_gradients=self.config.calculate_gradients) def add_data(self, x, y): """ Add a new function observation to the GPs. Parameters ---------- x: 2d-array y: 2d-array """ self.i = 1 if not ("i" in dir(self)) else self.i + 1 # print("Add data ", self.i) x = np.atleast_2d(x) y = np.atleast_2d(y) self.set_data(x, y, append=True) # TODO: check if this is called anyhow! def optimize(self): self._update_beta() def _update_cache(self): # if not self.config.calculate_gradients: self._woodbury_chol = np.asfortranarray( self.gp.posterior._woodbury_chol) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() self._X = self.gp.X.copy() self._update_beta() def _optimize_bias(self): self._bias = minimize(self._bias_loss, self._bias, method='L-BFGS-B')['x'].copy() self._set_bias(self._bias) logger.info(f"Updated bias to {self._bias}") def _bias_loss(self, c): # calculate mean and norm for new bias via a new woodbury_vector new_woodbury_vector, _ = dpotrs(self._woodbury_chol, self._Y - c, lower=1) K = self.gp.kern.K(self.gp.X) mean = np.dot(K, new_woodbury_vector) norm = new_woodbury_vector.T.dot(mean) # loss is least_squares_error + norm return np.asscalar(np.sum(np.square(mean + c - self._Y)) + norm) def _set_bias(self, c): self._bias = c self.gp.set_Y(self._Y - c) self._woodbury_vector = self.gp.posterior._woodbury_vector.copy() def _update_beta(self): logdet = self._get_logdet() logdet_priornoise = self._get_logdet_prior_noise() self._beta = np.sqrt(2 * np.log(1 / self.delta) + (logdet - logdet_priornoise)) + self._norm() def _optimize_var(self): # fix all parameters for p in self.gp.parameters: p.fix() if self.gp.kern.name == 'sum': for part in self.gp.kern.parts: part.variance.unfix() else: self.gp.kern.variance.unfix() self.gp.optimize() if self.gp.kern.name == 'sum': values = [] for part in self.gp.kern.parts: values.append(np.asscalar(part.variance.values)) else: values = np.asscalar(self.gp.kern.variance.values) logger.info(f"Updated prior variance to {values}") # unfix all parameters for p in self.gp.parameters: p.unfix() def _get_logdet(self): return 2. * np.sum(np.log(np.diag(self.gp.posterior._woodbury_chol))) def _get_logdet_prior_noise(self): return self.t * np.log(self.gp.likelihood.variance.values) def mean_var(self, x): """Recompute the confidence intervals form the GP. Parameters ---------- context: ndarray Array that contains the context used to compute the sets """ x = np.atleast_2d(x) assert not np.isnan(x).all(), ("X is nan at some point!", x) if self.config.calculate_gradients or True: # In the other case, projection is done in a subfunction if self.Q is not None: x = np.dot(x, self.Q) mean, var = self.gp.predict_noiseless(x) else: mean, var = self._raw_predict(x) return mean + self._bias, var def mean_var_grad(self, x): # TODO: should this be here aswell? # TODO: check, that this is not actually used for new AND saved gp's! if self.Q is not None: x = np.dot(x, self.Q) return self.gp.predictive_gradients(x) def var(self, x): return self.mean_var(x)[1] def mean(self, x): return self.mean_var(x)[0] def set_data(self, X, Y, append=True): # First of all, save everything in the saver GP if append: X = np.concatenate((self.datasaver_gp.X, X), axis=0) Y = np.concatenate((self.datasaver_gp.Y, Y), axis=0) # Should be axis=0 self.datasaver_gp.set_XY(X, Y) # Now, save everything in the other GP but with a projected X value # # TODO: This is pretty wrong! X = self.datasaver_gp.X Y = self.datasaver_gp.Y # Do our optimization now if self.burn_in_samples == self.i or self.always_calculate: # (self.i >= self.burn_in_samples and self.i % self.recalculate_projection_every == 1) or import time start_time = time.time() # print("Adding data: ", self.i) optimizer = TripathyOptimizer() # TODO: the following part is commented out, so we can test, if the function works well if we give it the real matrix! self.active_projection_matrix, sn, l, s, d = optimizer.find_active_subspace( X, Y) # # print("BORING sampled the following active matrix: ") # print(self.active_projection_matrix) # passive_dimensions = max(self.domain.d - d, 0) passive_dimensions = min(passive_dimensions, 2) # passive_dimensions = 1 # TODO: take out this part! # # passive_dimensions = 0 # # Generate A^{bot} if there's more dimensions if passive_dimensions > 0: self.passive_projection_matrix = generate_orthogonal_matrix_to_A( self.active_projection_matrix, passive_dimensions) else: self.passive_projection_matrix = None # # print("BORING sampled the following passive matrix: ") # print(self.passive_projection_matrix) # d = 2 # self.active_projection_matrix = np.asarray([ # [-0.31894555, 0.78400512, 0.38970008, 0.06119476, 0.35776912], # [-0.27150973, 0.066002, 0.42761931, -0.32079484, -0.79759551] # ]).T # s = 1. # l = 1.5 # passive_dimensions = 0 # Create Q by concatenateing the active and passive projections if passive_dimensions > 0: self.Q = np.concatenate((self.active_projection_matrix, self.passive_projection_matrix), axis=1) else: self.Q = self.active_projection_matrix assert not np.isnan( self.Q).all(), ("The projection matrix contains nan's!", self.Q) # print("BORING sampled the following matrix: ") # print(self.Q) assert d == self.active_projection_matrix.shape[1] self.create_gp_and_kernels(active_dimensions=d, passive_dimensions=passive_dimensions, first=True, k_variance=s, k_lengthscales=l) # print("Projection matrix is: ", self.Q.shape) # print("Dimensions found are: ", d) # print("Active projection matrix is ", self.active_projection_matrix.shape) # print("How many datapoints do we have in the kernel?", self.gp.X.shape) # print("How many datapoints do we have in the kernel?", self.datasaver_gp.X.shape) print("--- %s seconds ---" % (time.time() - start_time)) # if self.i > self.burn_in_samples: # assert self.Q is not None, "After the burning in, self.Q is still None!" # Add the points to the newly shapes GP! if (self.i < self.burn_in_samples or self.Q is None) and (not self.always_calculate): # print("Still using the old method!") self.gp.set_XY(X, Y) else: # print("We use the dot product thingy from now on!") Z = np.dot(X, self.Q) # print("Old shape: ", X.shape) # print("New shape: ", Z.shape) self.gp.set_XY(Z, Y) # print("Added data: ", self.i) # print("Datasave has shape: ", self.datasaver_gp.X.shape) # print("Another shape: ", self.gp.X.shape) self.t = X.shape[0] self._update_cache() def _raw_predict(self, Xnew): m, n = Xnew.shape # Need to project Xnew here? # if self.Q is not None: # Xnew = np.dot(Xnew, self.Q) # assert Xnew.shape[1] == self.Q.reshape(self.Q.shape[0], -1).shape[1], ("Shapes are wrong: ", Xnew.shape, self.Q.shape) # else: # assert Xnew.shape[1] == self.domain.d, ("Shapes are wrong when we have no Q!", Xnew.shape, self.domain.d) if not hasattr(self.kernel, 'parts'): # TODO: take this out? mu, var = self._raw_predict_given_kernel(Xnew, self.kernel) # print("Using the cool values! ") else: mu = np.zeros((Xnew.shape[0], 1)) var = np.zeros((Xnew.shape[0], 1)) for kernel in self.kernel.parts: cur_mu, cur_var = self._raw_predict_given_kernel(Xnew, kernel) assert not np.isnan(cur_mu).all(), ( "nan encountered for mean!", cur_mu) assert not np.isnan(cur_var).all(), ( "nan encountered for var!", cur_var) mu += cur_mu var += cur_var assert not np.isnan(mu).all(), ("nan encountered for mean!", mu) assert not np.isnan(var).all(), ("nan encountered for mean!", var) assert mu.shape == (m, 1), ("Shape of mean is different! ", mu.shape, (m, 1)) assert var.shape == (m, 1), ("Shape of variance is different! ", var.shape, (m, 1)) return mu, var def _raw_predict_given_kernel(self, Xnew, kernel): Kx = kernel.K(self._X, Xnew) mu = np.dot(Kx.T, self._woodbury_vector) if len(mu.shape) == 1: mu = mu.reshape(-1, 1) Kxx = kernel.Kdiag(Xnew) tmp = lapack.dtrtrs(self._woodbury_chol, Kx, lower=1, trans=0, unitdiag=0)[0] var = (Kxx - np.square(tmp).sum(0))[:, None] return mu, var def _norm(self): norm = self._woodbury_vector.T.dot(self.gp.kern.K(self.gp.X)).dot( self._woodbury_vector) return np.asscalar(np.sqrt(norm)) def __getstate__(self): self_dict = self.__dict__.copy() del self_dict[ 'gp'] # remove the gp from state dict to allow pickling. calculations are done via the cache woodbury/cholesky return self_dict