model = SVGP(X, Y, kernel, likelihood, mean_function=mean, minibatch_size=100, num_latent=1, num_data=None, whiten=False, Z=Z) model.compile() run_with_adam(model, 1e-3, iterations, PrintAction(model, "Adam")) # Predictions uses stochastic sampling and produces # [num_samples,N,D] shape output ystar, varstar = model.predict_y(X) # In[5]: plt.figure(figsize=(4, 4)) plt.plot(X[:, 0], ystar, alpha=1, c='r', label='vanilla-inferred') plt.fill_between(X[:, 0], np.squeeze(ystar + np.sqrt(varstar)), np.squeeze(ystar - np.sqrt(varstar)), alpha=0.5) plt.plot(X[:, 0], Y[:, 0], c='b', alpha=0.5, label='data') plt.plot(X[:, 0], func(X) + 5, ls='--', label='True offset by 5') plt.legend() # plt.title("Inferred solution on data") plt.savefig("vanilla.png") plt.show()
class TrainableSVGP(): def __init__(self, kernel, inducing_points, batch_size, num_iter, err_fn, var_dist, classif=None, error_every=100, train_hyperparams: bool = True, lr: float = 0.001, natgrad_lr: float = 0.01): self.train_hyperparams = train_hyperparams self.lr = lr self.natgrad_lr = natgrad_lr self.kernel = kernel self.Z = inducing_points.copy() self.batch_size = batch_size self.num_iter = num_iter self.err_fn = err_fn self.error_every = error_every self.do_classif = classif is not None and classif > 0 self.num_classes = 1 if self.do_classif: self.num_classes = int(classif) self.model = None self.var_dist = var_dist def fit(self, X, Y, Xval, Yval): N = X.shape[0] if self.var_dist == "diag": q_diag = True elif self.var_dist == "full": q_diag = False else: raise NotImplementedError( "GPFlow cannot implement %s variational distribution" % (self.var_dist)) if self.do_classif: if self.num_classes == 2: likelihood = gpflow.likelihoods.Bernoulli() num_latent = 1 else: # Softmax better than Robustmax (apparently per the gpflow slack) #likelihood = gpflow.likelihoods.MultiClass(self.num_classes, invlink=invlink) # Multiclass likelihood likelihood = gpflow.likelihoods.Softmax(self.num_classes) num_latent = self.num_classes # Y must be 1D for the multiclass model to actually work. Y = np.argmax(Y, 1).reshape((-1, 1)).astype(int) else: num_latent = 1 likelihood = gpflow.likelihoods.Gaussian() self.model = SVGP(kernel=self.kernel, likelihood=likelihood, inducing_variable=self.Z, num_data=N, num_latent_gps=num_latent, whiten=False, q_diag=q_diag) # Setup training if not self.train_hyperparams: set_trainable(self.model.inducing_variable.Z, False) set_trainable(self.kernel.lengthscales, False) set_trainable(self.kernel.variance, False) if self.natgrad_lr > 0: set_trainable(self.model.q_mu, False) set_trainable(self.model.q_sqrt, False) variational_params = [(self.model.q_mu, self.model.q_sqrt)] # Create the optimizers adam_opt = tf.optimizers.Adam(self.lr) if self.natgrad_lr > 0: natgrad_opt = NaturalGradient(gamma=self.natgrad_lr) # Print gpflow.utilities.print_summary(self.model) print("", flush=True) # Giacomo: If shuffle buffer is too large it will run OOM if self.num_classes == 2: Y = (Y + 1) / 2 Yval = (Yval + 1) / 2 generator = partial(data_generator, X, Y) #train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)) \ train_dataset = tf.data.Dataset.from_generator(generator, args=(self.batch_size, ), output_types=(tf.float32, tf.float32)) \ .prefetch(self.batch_size * 10) \ .repeat() \ .shuffle(min(N // self.batch_size, 1_000_000 // self.batch_size)) \ .batch(1) train_iter = iter(train_dataset) loss = self.model.training_loss_closure(train_iter) t_elapsed = 0 for step in range(self.num_iter): t_s = time.time() if self.natgrad_lr > 0: natgrad_opt.minimize(loss, var_list=variational_params) adam_opt.minimize(loss, var_list=self.model.trainable_variables) t_elapsed += time.time() - t_s if step % 700 == 0: print("Step %d -- Elapsed %.2fs" % (step, t_elapsed), flush=True) if (step + 1) % self.error_every == 0: preds = self.predict(Xval) val_err, err_name = self.err_fn(Yval, preds) print( f"Step {step + 1} - {t_elapsed:7.2f}s Elapsed - " f"Validation {err_name} {val_err:7.5f}", flush=True) preds = self.predict(Xval) val_err, err_name = self.err_fn(Yval, preds) print( f"Finished optimization - {t_elapsed:7.2f}s Elapsed - " f"Validation {err_name} {val_err:7.5f}", flush=True) print("Final model is ") gpflow.utilities.print_summary(self.model) print("", flush=True) return self def predict(self, X): preds = [] dset = tf.data.Dataset.from_tensor_slices((X, )).batch(self.batch_size) for X_batch in iter(dset): batch_preds = self.model.predict_y(X_batch[0])[0].numpy() if self.do_classif: batch_preds = batch_preds.reshape((X_batch[0].shape[0], -1)) preds.append(batch_preds) preds = np.concatenate(preds, axis=0) return preds @property def inducing_points(self): return self.model.inducing_variable.Z.numpy() def __str__(self): return (( "TrainableSVGP<kernel=%s, num_inducing_points=%d, batch_size=%d, " "num_iter=%d, lr=%f, natgrad_lr=%f, error_every=%d, train_hyperparams=%s, " "var_dist=%s, do_classif=%s, model=%s") % (self.kernel, self.Z.shape[0], self.batch_size, self.num_iter, self.lr, self.natgrad_lr, self.error_every, self.train_hyperparams, self.var_dist, self.do_classif, self.model))