def optimize_policy(self, maxiter=50, restarts=1): """ Optimize policy parameters. adapted from from https://github.com/nrontsis/PILCO :param maxiter: max optimizer iterations :param restarts: number of random restarts """ mgpr_trainable_params = self.mgpr.trainable_parameters for param in mgpr_trainable_params: set_trainable(param, False) if not self.optimizer: self.optimizer = gpflow.optimizers.Scipy() self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter)) else: self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter)) restarts -= 1 best_parameter_values = [param.numpy() for param in self.trainable_parameters] best_reward = self.compute_reward() for restart in range(restarts): self.controller.randomize() self.optimizer.minimize(self.training_loss, self.trainable_variables, options=dict(maxiter=maxiter)) reward = self.compute_reward() if reward > best_reward: best_parameter_values = [param.numpy() for param in self.trainable_parameters] best_reward = reward for i, param in enumerate(self.trainable_parameters): param.assign(best_parameter_values[i]) for param in mgpr_trainable_params: set_trainable(param, True)
def gpr_and_vgp(data, kernel, likelihood): vgp = gpflow.models.VGP(data, kernel, likelihood) gpr = gpflow.models.GPR(data, kernel) gpr.likelihood.variance.assign(likelihood.variance) set_trainable(vgp, False) vgp.q_mu.trainable = True vgp.q_sqrt.trainable = True return gpr, vgp
def test_make_trainable(model): """ Checks whether we `set_trainable()` can make parameters which are *not* trainable trainable again. """ set_trainable(model, False) assert len(model.trainable_variables) == 0 set_trainable(model, True) assert len(model.trainable_variables) == len(model.parameters)
def sgpr_and_svgp(data, inducing_variable, kernel, likelihood): svgp = gpflow.models.SVGP(kernel, likelihood, inducing_variable) sgpr = gpflow.models.SGPR(data, kernel, inducing_variable=inducing_variable) sgpr.likelihood.variance.assign(Setup.likelihood_variance) set_trainable(svgp, False) svgp.q_mu.trainable = True svgp.q_sqrt.trainable = True return sgpr, svgp
def create_model(): kernel = create_kernel() model = gpflow.models.SVGP( kernel=kernel, likelihood=gpflow.likelihoods.Gaussian(variance_lower_bound=0.0), inducing_variable=Data.Z, q_diag=True, ) set_trainable(model.q_mu, False) return model
def train(self, x, y, sigma_n=None, sigma_f=1e-6): if (y.size) < 2: raise RuntimeError('y.size must be at least 2') notnan = np.logical_not(np.isnan(y)) x = x.reshape(y.size, -1)[notnan] y = y[notnan] self.ymean = np.mean(y) self.yvar = np.var(y) self.yscale = np.sqrt(self.yvar) self.xtrain = x.reshape(y.size, -1) self.ytrain = (y.reshape(y.size, 1) - self.ymean) / self.yscale print(self.xtrain.shape) self.xtrain = self.xtrain.astype(np.float64) self.ytrain = self.ytrain.astype(np.float64) self.ndim = self.xtrain.shape[-1] l = np.empty(self.ndim) kern = list() for k in range(self.ndim): # TODO: guess this in a finer fashion via FFT in all directions l[k] = 0.3 * (np.max(self.xtrain[:, k]) - np.min(self.xtrain[:, k])) kern.append( gpflow.kernels.SquaredExponential(lengthscales=l[k], variance=1.0, active_dims=[k])) if k == 0: # Guess a bit more broadly kern[k].variance.assign(3.0) else: # Need only one y scale set_trainable(kern[k].variance, False) kerns = gpflow.kernels.Product(kern) self.m = gpflow.models.GPR((self.xtrain, self.ytrain), kernel=kerns) self.m.likelihood.variance.assign(1e-2) # Guess little noise # Optimize def objective_closure(): return -self.m.log_marginal_likelihood() opt = gpflow.optimizers.Scipy() opt.minimize(objective_closure, self.m.trainable_variables) self.sigma = np.sqrt(self.m.likelihood.variance.value()) self.trained = True
def test_non_trainable_model_objective(model): """ Checks that we can still compute the objective of a model that has no trainable parameters whatsoever (regression test for bug in log_prior()). In this case we have no priors, so log_prior should be zero to add no contribution to the objective. """ set_trainable(model, False) _ = model.log_marginal_likelihood() assert model.log_prior() == 0.0
def gp_model(x_train, y_train, x_test, num_classes): """This function instantiates the gp model and gets the predictions from the model. :param x_train: The training dataset. :param y_train: The training dataset labels. :param x_test: The test dataset. :param num_classes: The number of classes in the dataset. :return: predictions, the predictions from the gp model. :return time_taken: The time taken to train the model.""" data = (x_train, y_train) kernel = gpflow.kernels.SquaredExponential() + gpflow.kernels.Matern12( ) + gpflow.kernels.Exponential() invlink = gpflow.likelihoods.RobustMax(num_classes) likelihood = gpflow.likelihoods.MultiClass(num_classes, invlink=invlink) z = x_train[::5].copy() model = gpflow.models.SVGP(kernel=kernel, likelihood=likelihood, inducing_variable=z, num_latent_gps=num_classes, whiten=True, q_diag=True) set_trainable(model.inducing_variable, False) print('\nInitial parameters:') print_summary(model, fmt="notebook") start = time.time() opt = gpflow.optimizers.Scipy() opt.minimize(model.training_loss_closure(data), model.trainable_variables, options=dict(maxiter=ci_niter(1000))) print('\nParameters after optimization:') print_summary(model, fmt="notebook") end = time.time() time_taken = round(end - start, 2) print('Optimization took {:.2f} seconds'.format(time_taken)) predictions = model.predict_y(x_test)[0] return predictions, time_taken
def test_mixed_mok_with_Id_vs_independent_mok(): data = DataMixedKernelWithEye # Independent model k1 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscale=1.2), data.L) f1 = InducingPoints(data.X[:data.M, ...]) model_1 = SVGP(k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_1, False) model_1.q_sqrt.trainable = True @tf.function(autograph=False) def closure1(): return -model_1.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure1, variables=model_1.trainable_variables, method='BFGS') # Mixed Model kern_list = [ SquaredExponential(variance=0.5, lengthscale=1.2) for _ in range(data.L) ] k2 = mk.LinearCoregionalization(kern_list, data.W) f2 = InducingPoints(data.X[:data.M, ...]) model_2 = SVGP(k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full) set_trainable(model_2, False) model_2.q_sqrt.trainable = True @tf.function(autograph=False) def closure2(): return -model_2.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure2, variables=model_2.trainable_variables, method='BFGS') check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
def build_model(data): variance = tf.math.reduce_variance(data.observations) kernel = gpflow.kernels.Matern52(variance=variance, lengthscales=0.2 * np.ones(2, )) gpr = gpflow.models.GPR(astuple(data), kernel, noise_variance=1e-5) set_trainable(gpr.likelihood, False) return { OBJECTIVE: trieste.models.create_model({ "model": gpr, "optimizer": gpflow.optimizers.Scipy(), "optimizer_args": { "options": dict(maxiter=100) }, }) }
def optimizeModel(self): k = gpflow.kernels.Matern52(self.kernel_variance, self.lengthscales) X = np.concatenate(self.X, 0) Y = np.concatenate(self.Y, 0) X = X.reshape((-1, self.input_dim)) Y = Y.reshape((-1, 1)) meanf = gpflow.mean_functions.Constant(self.mean_value) self.gp = gpflow.models.GPR(data=(X, Y), kernel=k, mean_function=meanf) self.gp.likelihood.variance.assign(self.noise_variance) #keep prior mean functions fixed #set_trainable(self.gp.mean_function.c, False) if(self.fixed_noise_variance): set_trainable(self.gp.likelihood.variance, False) opt = gpflow.optimizers.Scipy() opt_logs = opt.minimize(self.gp.training_loss, self.gp.trainable_variables, options=dict(maxiter=100)) print_summary(self.gp)
def _init_layers(self, X, Y, Z, dims, kernels, mean_function=Zero(), Layer=SVGPIndependentLayer, white=False): """Initialise DGP layers to have the same number of outputs as inputs, apart from the final layer.""" layers = [] X_running, Z_running = X.copy(), Z.copy() for i in range(len(kernels) - 1): dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i] if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) set_trainable(mf.A, False) set_trainable(mf.b, False) layers.append(Layer(kern, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) layers.append( Layer(kernels[-1], Z_running, dims[-1], mean_function, white=white)) return layers
def run_fold(X, Y, XT, YT): # Kernel kern_list = [ gpflow.kernels.Matern32(variance=3., lengthscale=1.), gpflow.kernels.Matern32(variance=3., lengthscale=1.) ] kernel = gpflow.kernels.SeparateIndependent(kern_list) # Inducing points (we hack this through SVGP, because the SpearateIndependent support in plain # VGP was broken). We simply put an inducing point at every data point. Xu = X.copy() inducing_variables = gpflow.inducing_variables.mo_inducing_variables. \ SharedIndependentInducingVariables(gpflow.inducing_variables.InducingPoints(Xu)) # The model model = gpflow.models.SVGP(kernel=kernel, likelihood=HeteroscedasticGaussian(), inducing_variable=inducing_variables, num_latent=2) # Set trainable (everything except the 'inducing' points, because we want the full model). set_trainable(model, True) set_trainable(model.inducing_variable.inducing_variable_shared.Z, False) # Optimize parameters o = gpflow.optimizers.Scipy() @tf.function(autograph=False) def objective(): return -model.elbo((X, Y)) o.minimize(objective, variables=model.trainable_variables) # Plot model plot(model) # Print model gpflow.utilities.print_summary(model, fmt='notebook') # Return NLPD return -tf.reduce_mean(model.predict_log_density((XT, YT))).numpy()
def build_dgp_model(data): variance = tf.math.reduce_variance(data.observations) dgp = build_vanilla_deep_gp(data.query_points, num_layers=2, num_inducing=100) dgp.f_layers[-1].kernel.kernel.variance.assign(variance) dgp.f_layers[-1].mean_function = gpflow.mean_functions.Constant() dgp.likelihood_layer.likelihood.variance.assign(1e-5) set_trainable(dgp.likelihood_layer.likelihood.variance, False) epochs = 200 batch_size = 100 optimizer = tf.optimizers.Adam(0.01) # These are just arguments for the Keras `fit` method. fit_args = { "batch_size": batch_size, "epochs": epochs, "verbose": 0, } return DeepGaussianProcess(model=dgp, optimizer=optimizer, fit_args=fit_args)
def optimizeModel(self): output_dim = self.output_dim rank = self.rank self.k = gpflow.kernels.Matern52(self.kernel_variance, self.lengthscales, active_dims= np.arange(self.input_dim).tolist()) self.coreg = gpflow.kernels.Coregion(output_dim=output_dim, rank=rank, active_dims=[self.input_dim]) k = self.k * self.coreg X = np.concatenate(self.X, 0) Y = np.concatenate(self.Y, 0) X = X.reshape((X.shape[0], -1)) Y = Y.reshape((Y.shape[0], -1)) meanf = gpflow.mean_functions.Constant(self.mean_value) lik = gpflow.likelihoods.SwitchedLikelihood([gpflow.likelihoods.Gaussian() for i in range(self.output_dim)]) # now build the GP model as normal self.gp = gpflow.models.VGP((X,Y), kernel=k, likelihood=lik, mean_function = meanf) #set_trainable(self.gp.mean_function.c, False) for i in range(self.output_dim): self.gp.likelihood.likelihoods[i].variance.assign(self.noise_variance[i]) if(self.fixed_noise_variance): for i in range(self.output_dim): set_trainable(self.gp.likelihood.likelihoods[i].variance, False) gpflow.optimizers.Scipy().minimize(self.gp.training_loss, self.gp.trainable_variables, options=dict(maxiter=10000), method="L-BFGS-B") print_summary(self.gp)
def conv_model(conv_k, conv_f, data, num_class): # convolutional model conv_m1 = gpflow.models.SVGP( conv_k, gpflow.likelihoods.MultiClass(num_class), conv_f, num_latent_gps=num_class, ) # set variance, lengthscale, weight as trainable parameters set_trainable(conv_m1.inducing_variable, True) set_trainable(conv_m1.kernel.base_kernel.variance, True) set_trainable(conv_m1.kernel.base_kernel.lengthscales, True) set_trainable(conv_m1.kernel.weights, False) return conv_m1
def _create_kernel(self): """Creates a kernel from list of strings stored in _kernel_split.""" k = None for i, prod_kern in enumerate(self.kernel_split): sub_k = None for j, kern in enumerate(prod_kern): new_k = getattr(gpflow.kernels, kern)(**self.kernel_params[i + j]) if hasattr(new_k, 'lengthscales') and self.length_scale_prior: new_k.lengthscales.prior = tfp.distributions.InverseGamma( to_default_float(1), to_default_float(1)) if j == 0: sub_k = new_k if self.variance_prior: new_k.variance.prior = tfp.distributions.Gamma( to_default_float(1), to_default_float(1)) else: set_trainable(new_k.variance, False) sub_k *= new_k if i == 0: k = sub_k else: k += sub_k return k
def set_KL_X_trainable(model, param: bool = True): '''If false the model will make the parameter that contribute to the KL divergence of X non trainable''' try: set_trainable(model.X_data_mean, param) set_trainable(model.X_data_var, param) except: pass try: for l in model.encoder._layers: l.trainable = param set_trainable(l, param) except: pass
def rbf_model(base_k, data, MAXITER, n, num_class): # inducing points idx = np.random.choice(data[0].shape[0], n, replace=False) induce_x = data[0][idx] # plain squared exponential kernel, no convolution rbf_m1 = gpflow.models.SVGP( base_k, gpflow.likelihoods.MultiClass(num_class), gpflow.inducing_variables.InducingPoints(induce_x.copy()), num_latent_gps=num_class, ) # set traininable set_trainable(rbf_m1.kernel.variance, True) set_trainable(rbf_m1.kernel.lengthscales, True) set_trainable(rbf_m1.inducing_variable, True) return rbf_m1
def FitModel( bConsider, GPt, GPy, globalBranching, priorConfidence=0.80, M=10, likvar=1.0, kerlen=2.0, kervar=5.0, fDebug=False, maxiter=100, fPredict=True, fixHyperparameters=False, ): """ Fit BGP model :param bConsider: list of candidate branching points :param GPt: pseudotime :param GPy: gene expression. Should be 0 mean for best performance. :param globalBranching: cell labels :param priorConfidence: prior confidence on cell labels :param M: number of inducing points :param likvar: initial value for Gaussian noise variance :param kerlen: initial value for kernel length scale :param kervar: initial value for kernel variance :param fDebug: Print debugging information :param maxiter: maximum number of iterations for optimisation :param fPredict: compute predictive mean and variance :param fixHyperparameters: should kernel hyperparameters be kept fixed or optimised? :return: dictionary of log likelihood, GPflow model, Phi matrix, predictive set of points, mean and variance, hyperparameter values, posterior on branching time """ assert isinstance(bConsider, list), "Candidate B must be list" assert GPt.ndim == 1 assert GPy.ndim == 2 assert ( GPt.size == GPy.size ), "pseudotime and gene expression data must be the same size" assert ( globalBranching.size == GPy.size ), "state space must be same size as number of cells" assert M >= 0, "at least 0 or more inducing points should be given" phiInitial, phiPrior = GetInitialConditionsAndPrior( globalBranching, priorConfidence, infPriorPhi=True ) XExpanded, indices, _ = VBHelperFunctions.GetFunctionIndexListGeneral(GPt) ptb = np.min([np.min(GPt[globalBranching == 2]), np.min(GPt[globalBranching == 3])]) tree = bt.BinaryBranchingTree(0, 1, fDebug=False) tree.add(None, 1, np.ones((1, 1)) * ptb) # B can be anything here (fm, _) = tree.GetFunctionBranchTensor() kb = bk.BranchKernelParam( gpflow.kernels.Matern32(1), fm, b=np.zeros((1, 1)) ) + gpflow.kernels.White(1) kb.kernels[1].variance.assign( 1e-6 ) # controls the discontinuity magnitude, the gap at the branching point set_trainable(kb.kernels[1].variance, False) # jitter for numerics if M == 0: m = assigngp_dense.AssignGP( GPt, XExpanded, GPy, kb, indices, np.ones((1, 1)) * ptb, phiInitial=phiInitial, phiPrior=phiPrior, ) else: ZExpanded = np.ones((M, 2)) ZExpanded[:, 0] = np.linspace(0, 1, M, endpoint=False) ZExpanded[:, 1] = np.array([i for j in range(M) for i in range(1, 4)])[:M] m = assigngp_denseSparse.AssignGPSparse( GPt, XExpanded, GPy, kb, indices, np.ones((1, 1)) * ptb, ZExpanded, phiInitial=phiInitial, phiPrior=phiPrior, ) # Initialise hyperparameters m.likelihood.variance.assign(likvar) m.kernel.kernels[0].kern.lengthscales.assign(kerlen) m.kernel.kernels[0].kern.variance.assign(kervar) if fixHyperparameters: print("Fixing hyperparameters") set_trainable(m.kernel.kernels[0].kern.lengthscales, False) set_trainable(m.likelihood.variance, False) set_trainable(m.kernel.kernels[0].kern.variance, False) else: if fDebug: print("Adding prior logistic on length scale to avoid numerical problems") m.kernel.kernels[0].kern.lengthscales.prior = tfp.distributions.Normal( to_default_float(2.0), to_default_float(1.0) ) m.kernel.kernels[0].kern.variance.prior = tfp.distributions.Normal( to_default_float(3.0), to_default_float(1.0) ) m.likelihood.variance.prior = tfp.distributions.Normal( to_default_float(0.1), to_default_float(0.1) ) # optimization ll = np.zeros(len(bConsider)) Phi_l = list() ttestl_l, mul_l, varl_l = list(), list(), list() hyps = list() for ib, b in enumerate(bConsider): m.UpdateBranchingPoint(np.ones((1, 1)) * b, phiInitial) try: opt = gpflow.optimizers.Scipy() opt.minimize( m.training_loss, variables=m.trainable_variables, options=dict(disp=True, maxiter=maxiter), ) # remember winning hyperparameter hyps.append( { "likvar": m.likelihood.variance.numpy(), "kerlen": m.kernel.kernels[0].kern.lengthscales.numpy(), "kervar": m.kernel.kernels[0].kern.variance.numpy(), } ) ll[ib] = m.log_posterior_density() except Exception as ex: print(f"Unexpected error: {ex} {'-' * 60}\nCaused by model: {m} {'-' * 60}") ll[0] = np.nan # return model so can inspect model return { "loglik": ll, "model": m, "Phi": np.nan, "prediction": {"xtest": np.nan, "mu": np.nan, "var": np.nan}, "hyperparameters": np.nan, "posteriorB": np.nan, } # prediction Phi = m.GetPhi() Phi_l.append(Phi) if fPredict: ttestl, mul, varl = VBHelperFunctions.predictBranchingModel(m) ttestl_l.append(ttestl), mul_l.append(mul), varl_l.append(varl) else: ttestl_l.append([]), mul_l.append([]), varl_l.append([]) iw = np.argmax(ll) postB = GetPosteriorB(ll, bConsider) if fDebug: print( "BGP Maximum at b=%.2f" % bConsider[iw], "CI= [%.2f, %.2f]" % (postB["B_CI"][0], postB["B_CI"][1]), ) assert np.allclose(bConsider[iw], postB["Bmode"]), "%s-%s" % str( postB["B_CI"], bConsider[iw] ) return { "loglik": ll, "Phi": Phi_l[iw], # 'model': m, "prediction": {"xtest": ttestl_l[iw], "mu": mul_l[iw], "var": varl_l[iw]}, "hyperparameters": hyps[iw], "posteriorB": postB, }
# %% np.random.seed(0) X = np.random.rand(20,1)*10 Y = np.sin(X) + 0.9 * np.cos(X*1.6) + np.random.randn(*X.shape)* 0.4 Xtest = np.random.rand(10,1)*10 plt.plot(X, Y, 'kx', mew=2); # %% data = (tf.convert_to_tensor(X, dtype=default_float()), tf.convert_to_tensor(Y, dtype=default_float())) inducing_variable = tf.convert_to_tensor(X, dtype=default_float()) m1 = gpflow.models.GPR(data, kernel=gpflow.kernels.SquaredExponential()) m2 = gpflow.models.VGP(data, kernel=gpflow.kernels.SquaredExponential(), likelihood=gpflow.likelihoods.Gaussian()) m3 = gpflow.models.SVGP(gpflow.kernels.SquaredExponential(), gpflow.likelihoods.Gaussian(), inducing_variable, q_diag=False) set_trainable(m3.inducing_variable, False) m4 = gpflow.models.SVGP(gpflow.kernels.SquaredExponential(), gpflow.likelihoods.Gaussian(), inducing_variable, q_diag=False, whiten=True) set_trainable(m4.inducing_variable, False) m5 = gpflow.models.SGPR(data, kernel=gpflow.kernels.SquaredExponential(), inducing_variable=inducing_variable) set_trainable(m5.inducing_variable, False) m6 = gpflow.models.GPRFITC(data, kernel=gpflow.kernels.SquaredExponential(), inducing_variable=inducing_variable) set_trainable(m6.inducing_variable, False) models = [m1, m2, m3, m4, m5, m6] # %% [markdown] # Now, we optimize the models. For `GPR`, `SVGP`, and `GPRFITC`, this simply optimizes the hyperparameters (since the inducing points are fixed). For the variational models, this jointly maximises the lower bound to the marginal likelihood (Evidence Lower Bound, ELBO) with respect to the variational parameters and the kernel and likelihood hyperparameters.
# %% # datavar = np.mean(var_dTglob) xtrain = t.reshape([-1,1]).astype(np.float64) ytrain = dT1glob.reshape([-1,1]).astype(np.float64) k = gpflow.kernels.SquaredExponential(variance=1e2*np.var(ytrain), lengthscales=50) #meanf = gpflow.mean_functions.Constant() m = gpflow.models.GPR(data=(xtrain, ytrain), kernel=k, noise_variance=np.var(ytrain))#, mean_function=meanf) opt = gpflow.optimizers.Scipy() set_trainable(m.kernel.variance, False) #set_trainable(m.likelihood.variance, False) print_summary(m) def objective_closure(): return - m.log_marginal_likelihood() opt_logs = opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=100)) print_summary(m) xpl = np.linspace(-20, 1020, 1041).reshape([-1, 1]).astype('float64') mean, var = m.predict_f(xpl, full_cov=False)
def test_separate_independent_mof(): """ Same test as above but we use different (i.e. separate) inducing inducing for each of the output dimensions. """ np.random.seed(0) # Model 1 (INefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscale=1.2), Data.P) inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...]) model_1 = SVGP(kernel_1, Gaussian(), inducing_variable_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) set_trainable(model_1, False) model_1.q_sqrt.trainable = True model_1.q_mu.trainable = True @tf.function(autograph=False) def closure1(): return -model_1.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure1, variables=model_1.trainable_variables, method='BFGS') # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscale=1.2), Data.P) inducing_variable_list_2 = [ InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P) ] inducing_variable_2 = mf.SeparateIndependentInducingVariables( inducing_variable_list_2) model_2 = SVGP(kernel_2, Gaussian(), inducing_variable_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2) set_trainable(model_2, False) model_2.q_sqrt.trainable = True model_2.q_mu.trainable = True @tf.function(autograph=False) def closure2(): return -model_2.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure2, variables=model_2.trainable_variables, method='BFGS') # Model 3 (Inefficient): an idenitical inducing variable is used P times, # and treated as a separate one. q_mu_3 = np.random.randn(Data.M, Data.P) q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list = [ SquaredExponential(variance=0.5, lengthscale=1.2) for _ in range(Data.P) ] kernel_3 = mk.SeparateIndependent(kern_list) inducing_variable_list_3 = [ InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P) ] inducing_variable_3 = mf.SeparateIndependentInducingVariables( inducing_variable_list_3) model_3 = SVGP(kernel_3, Gaussian(), inducing_variable_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3) set_trainable(model_3, False) model_3.q_sqrt.trainable = True model_3.q_mu.trainable = True @tf.function(autograph=False) def closure3(): return -model_3.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure3, variables=model_3.trainable_variables, method='BFGS') check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
def test_separate_independent_mok(): """ We use different independent kernels for each of the output dimensions. We can achieve this in two ways: 1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof 2) inefficient: SeparateIndependentMok with InducingPoints However, both methods should return the same conditional, and after optimization return the same log likelihood. """ # Model 1 (Inefficient) q_mu_1 = np.random.randn(Data.M * Data.P, 1) q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kern_list_1 = [ SquaredExponential(variance=0.5, lengthscale=1.2) for _ in range(Data.P) ] kernel_1 = mk.SeparateIndependent(kern_list_1) inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...]) model_1 = SVGP(kernel_1, Gaussian(), inducing_variable_1, num_latent=1, q_mu=q_mu_1, q_sqrt=q_sqrt_1) set_trainable(model_1, False) model_1.q_sqrt.trainable = True model_1.q_mu.trainable = True @tf.function(autograph=False) def closure1(): return -model_1.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure1, variables=model_1.trainable_variables, method='BFGS') # Model 2 (efficient) q_mu_2 = np.random.randn(Data.M, Data.P) q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kern_list_2 = [ SquaredExponential(variance=0.5, lengthscale=1.2) for _ in range(Data.P) ] kernel_2 = mk.SeparateIndependent(kern_list_2) inducing_variable_2 = mf.SharedIndependentInducingVariables( InducingPoints(Data.X[:Data.M, ...])) model_2 = SVGP(kernel_2, Gaussian(), inducing_variable_2, num_latent=Data.P, q_mu=q_mu_2, q_sqrt=q_sqrt_2) set_trainable(model_2, False) model_2.q_sqrt.trainable = True model_2.q_mu.trainable = True @tf.function(autograph=False) def closure2(): return -model_2.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure2, variables=model_2.trainable_variables, method='BFGS') check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
def test_shared_independent_mok(): """ In this test we use the same kernel and the same inducing inducing for each of the outputs. The outputs are considered to be uncorrelated. This is how GPflow handled multiple outputs before the multioutput framework was added. We compare three models here: 1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints. This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P which is extremely inefficient as most of the elements are zero. 2) efficient: SharedIndependentMok and SharedIndependentMof This combinations uses the most efficient form of matrices 3) the old way, efficient way: using Kernel and InducingPoints Model 2) and 3) follow more or less the same code path. """ np.random.seed(0) # Model 1 q_mu_1 = np.random.randn(Data.M * Data.P, 1) # MP x 1 q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...] # 1 x MP x MP kernel_1 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscale=1.2), Data.P) inducing_variable = InducingPoints(Data.X[:Data.M, ...]) model_1 = SVGP(kernel_1, Gaussian(), inducing_variable, q_mu=q_mu_1, q_sqrt=q_sqrt_1, num_latent=Data.Y.shape[-1]) set_trainable(model_1, False) model_1.q_sqrt.trainable = True @tf.function(autograph=False) def closure1(): return -model_1.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure1, variables=model_1.trainable_variables, options=dict(maxiter=500), method='BFGS') # Model 2 q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P]) # M x P q_sqrt_2 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_2 = SquaredExponential(variance=0.5, lengthscale=1.2) inducing_variable_2 = InducingPoints(Data.X[:Data.M, ...]) model_2 = SVGP(kernel_2, Gaussian(), inducing_variable_2, num_latent=Data.P, q_mu=q_mu_2, q_sqrt=q_sqrt_2) set_trainable(model_2, False) model_2.q_sqrt.trainable = True @tf.function(autograph=False) def closure2(): return -model_2.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure2, variables=model_2.trainable_variables, options=dict(maxiter=500), method='BFGS') # Model 3 q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P]) # M x P q_sqrt_3 = np.array([ np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P) ]) # P x M x M kernel_3 = mk.SharedIndependent( SquaredExponential(variance=0.5, lengthscale=1.2), Data.P) inducing_variable_3 = mf.SharedIndependentInducingVariables( InducingPoints(Data.X[:Data.M, ...])) model_3 = SVGP(kernel_3, Gaussian(), inducing_variable_3, num_latent=Data.P, q_mu=q_mu_3, q_sqrt=q_sqrt_3) set_trainable(model_3, False) model_3.q_sqrt.trainable = True @tf.function(autograph=False) def closure3(): return -model_3.log_marginal_likelihood(Data.X, Data.Y) gpflow.optimizers.Scipy().minimize(closure3, variables=model_3.trainable_variables, options=dict(maxiter=500), method='BFGS') check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
m.kernel.kernels[1].variance.trainable = True print_summary(m) # %% [markdown] # **NOTE:** If you want to recursively change the `trainable` status of an object that *contains* parameters, you **must** use the `set_trainable()` utility function. # # A module (e.g. a model, kernel, likelihood, ... instance) does not have a `trainable` attribute: # %% try: m.kernel.trainable except AttributeError: print(f'{m.kernel.__class__.__name__} does not have a trainable attribute') # %% set_trainable(m.kernel, False) print_summary(m) # %% [markdown] # ## Priors # # You can set priors in the same way as transforms and trainability, by using `tensorflow_probability` distribution objects. Let's set a Gamma prior on the variance of the Matern32 kernel. # %% k = gpflow.kernels.Matern32() k.variance.prior = tfp.distributions.Gamma(to_default_float(2), to_default_float(3)) print_summary(k) # %%
m = gpflow.models.GPR(data=(time, uv_band_flux), mean_function=Constant(np.mean(uv_band_flux)), kernel=k, noise_variance=1) if fix_noise: # Fix a noise level to be the average experimental error observed in the dataset (0.037) for magnitudes # Noise level is 2.0364e-15 for the flux values. # Standardisation destroys this information so setting noise to be mean of standardised values divided by # the SNR in the orignal space. fixed_noise = np.mean(np.abs(uv_band_flux / snr)) set_trainable( m.likelihood.variance, False ) # We don't want to optimise the noise level in this case. m.likelihood.variance = fixed_noise opt = gpflow.optimizers.Scipy() opt.minimize(objective_closure, m.trainable_variables, options=dict(maxiter=100)) print_summary(m) # We specify the grid of time points on which we wish to predict the count rate time_test = np.arange(54236, 58630, 1, dtype=np.float64).reshape(-1, 1) mean, var = m.predict_y(time_test) log_lik = m.log_marginal_likelihood()
invlink = gpflow.likelihoods.RobustMax(C) # Robustmax inverse link function likelihood = gpflow.likelihoods.MultiClass( 3, invlink=invlink) # Multiclass likelihood Z = X[::5].copy() # inducing inputs m = gpflow.models.SVGP( kernel=kernel, likelihood=likelihood, inducing_variable=Z, num_latent_gps=C, whiten=True, q_diag=True, ) # Only train the variational parameters set_trainable(m.kernel.kernels[1].variance, False) set_trainable(m.inducing_variable, False) print_summary(m, fmt="notebook") # %% [markdown] # #### Running inference # %% opt = gpflow.optimizers.Scipy() opt_logs = opt.minimize(m.training_loss_closure(data), m.trainable_variables, options=dict(maxiter=ci_niter(1000))) print_summary(m, fmt="notebook") # %%
# %% p = m.kernel.kernels[0].variance m.kernel.kernels[0].variance = gpflow.Parameter(p.numpy(), transform=tfp.bijectors.Exp()) # %% print_summary(m, fmt="notebook") # %% [markdown] # ## Changing whether a parameter will be trained in optimization # # Another helpful feature is the ability to fix parameters. To do this, simply set the `trainable` attribute to `False`; this is shown in the **trainable** column of the representation, and the corresponding variable is removed from the free state. # %% set_trainable(m.kernel.kernels[1].variance, False) print_summary(m) # %% m.trainable_parameters # %% [markdown] # To unfix a parameter, just set the `trainable` attribute to `True` again. # %% set_trainable(m.kernel.kernels[1].variance, True) print_summary(m) # %% [markdown] # **NOTE:** If you want to recursively change the `trainable` status of an object that *contains* parameters, you **must** use the `set_trainable()` utility function. #
# cannot copy this due to shape mismatch with different numbers of inducing points between models: del init_params['.inducing_variable.Z'] for M in fMs: Zinit = vfe.inducing_variable.Z.numpy()[:M, :] Zinit = np.vstack( (Zinit, X[np.random.permutation(len(X))[:(M - len(Zinit))], :].copy())) vfe = gpflow.models.SGPR((X, Y), gpflow.kernels.SquaredExponential(), inducing_variable=Zinit) # copy hyperparameters (omitting inducing_variable.Z) from optimized model: gpflow.utilities.multiple_assign(vfe, init_params) set_trainable(vfe.kernel, False) set_trainable(vfe.likelihood, False) objective = tf.function( autograph=False)(lambda: -vfe.log_marginal_likelihood()) gpflow.optimizers.Scipy().minimize(objective, vfe.trainable_variables, options=dict(disp=False, maxiter=ci_niter(1000))) fvfe_lml.append(vfe.log_likelihood().numpy()) fvupper_lml.append(vfe.upper_bound().numpy()) print("%i" % M, end=" ") # %% plt.plot(fMs, fvfe_lml, label="lower")