def __init__(self, variational_model: bool = True, do_monitor: bool = False): self.var = variational_model self.do_monitor = do_monitor if do_monitor: self.monitor_path = "train_log/fit" os.system("rm -rf train_log") if variational_model: self.opt = tf.optimizers.Adam() self.opt_var = NaturalGradient(gamma=0.1) else: self.opt = Scipy()
# * `max_cg_iters`. The maximum number of CG iterations. # * `restart_cg_step`. The frequency with wich the CG resets the internal state to the initial position using current solution vector `v`. # * `v_grad_optimization`. CGLB introduces auxiliary parameter `v`, and by default optimal `v` is found with the CG. However you can include `v` into the list of trainable model parameters. # %% cglb = CGLB( data, kernel=SquaredExponential(), noise_variance=noise, inducing_variable=iv, cg_tolerance=1.0, max_cg_iters=50, restart_cg_iters=50, ) opt = Scipy() # %% [markdown] # We train the model as usual. Variables do not include the $ v $ auxiliary vector. # %% variables = cglb.trainable_variables _ = opt.minimize(cglb.training_loss_closure(compile=False), variables, compile=False, options=dict(maxiter=100)) # %% [markdown] # Below we compare prediction results for different CG tolerances. The `cg_tolerance=None` means that no CG is run to tune the $ v $ vector, and `cg_tolerance=0.01` is much lower value than the one used at the model optimization. # %% [markdown]
ax.legend() plt.show() # %% vgp = VGPWrapper(kernel=kernel_cls(), index_points=X_grid, observation_index_points=X_train, observations=Y_train, vgp_cls=VGPOpperArchambeau, jitter=jitter) # %% optimizer = Scipy() optimizer.minimize(vgp.variational_loss, variables=vgp._vgp.trainable_variables) # %% qf_loc = vgp.mean() qf_scale = vgp.stddev() # %% # m = tf.matmul(vgp.kernel.K(X_train), vgp.q_alpha) # %% fig, ax = plt.subplots() ax.plot(X_grid, r.logit(X_grid),
from gpflow.utilities import print_summary print_summary(model) # %% [markdown] # The objective function for MDN instances is the `log_marginal_likelihood`, which we use for optimization of the parameters. GPflow ensures that only the variables stored in `Parameter` objects are optimized. For the MDN, the only parameters are the weights and the biases of the neural net. # # We use the `Scipy` optimizer, which is a wrapper around SciPy's L-BFGS optimization algorithm. Note that GPflow supports other TensorFlow optimizers such as `Adam`, `Adagrad`, and `Adadelta` as well. # %% from gpflow.optimizers import Scipy from gpflow.ci_utils import ci_niter Scipy().minimize(tf.function(lambda: -model.log_marginal_likelihood(data)), variables=model.trainable_parameters, options=dict(maxiter=ci_niter(1500))) print("Final Likelihood", model.log_marginal_likelihood(data).numpy()) # %% [markdown] # To evaluate the validity of our model, we draw the posterior density. We also plot $\mu(x)$ of the optimized neural net. Remember that for every $x$ the neural net outputs $M$ means $\mu_m(x)$. These determine the location of the Gaussians. We plot all $M$ means and use their corresponding mixture weight $\pi_m(X)$ to determine their size. Larger dots will have more impact in the Gaussian ensemble. # %% try: from mdn_plotting import plot except: # VS CODE's root directory is GPflow's top-level directory from doc.source.notebooks.tailor.mdn_plotting import plot fig, axes = plt.subplots(1, 2, figsize=(12, 6))
from gpflow.utilities import print_summary print_summary(model) # %% [markdown] # The objective function for MDN instances is the `maximum_log_likelihood_objective`, which we use for optimization of the parameters. GPflow ensures that only the variables stored in `Parameter` objects are optimized. For the MDN, the only parameters are the weights and the biases of the neural net. # # We use the `Scipy` optimizer, which is a wrapper around SciPy's L-BFGS optimization algorithm. Note that GPflow supports other TensorFlow optimizers such as `Adam`, `Adagrad`, and `Adadelta` as well. # %% from gpflow.optimizers import Scipy from gpflow.ci_utils import ci_niter Scipy().minimize( model.training_loss_closure(data, compile=True), model.trainable_variables, options=dict(maxiter=ci_niter(1500)), ) print("Final Likelihood", model.maximum_log_likelihood_objective(data).numpy()) # %% [markdown] # To evaluate the validity of our model, we draw the posterior density. We also plot $\mu(x)$ of the optimized neural net. Remember that for every $x$ the neural net outputs $M$ means $\mu_m(x)$. These determine the location of the Gaussians. We plot all $M$ means and use their corresponding mixture weight $\pi_m(X)$ to determine their size. Larger dots will have more impact in the Gaussian ensemble. # %% try: from mdn_plotting import plot except: # VS CODE's root directory is GPflow's top-level directory from doc.source.notebooks.tailor.mdn_plotting import plot