Exemple #1
0
    def __init__(self,
                 variational_model: bool = True,
                 do_monitor: bool = False):

        self.var = variational_model
        self.do_monitor = do_monitor
        if do_monitor:
            self.monitor_path = "train_log/fit"
            os.system("rm -rf train_log")

        if variational_model:
            self.opt = tf.optimizers.Adam()
            self.opt_var = NaturalGradient(gamma=0.1)
        else:
            self.opt = Scipy()
Exemple #2
0
# * `max_cg_iters`. The maximum number of CG iterations.
# * `restart_cg_step`. The frequency with wich the CG resets the internal state to the initial position using current solution vector `v`.
# * `v_grad_optimization`. CGLB introduces auxiliary parameter `v`, and by default optimal `v` is found with the CG. However you can include `v` into the list of trainable model parameters.

# %%

cglb = CGLB(
    data,
    kernel=SquaredExponential(),
    noise_variance=noise,
    inducing_variable=iv,
    cg_tolerance=1.0,
    max_cg_iters=50,
    restart_cg_iters=50,
)
opt = Scipy()

# %% [markdown]
# We train the model as usual. Variables do not include the $ v $ auxiliary vector.

# %%
variables = cglb.trainable_variables
_ = opt.minimize(cglb.training_loss_closure(compile=False),
                 variables,
                 compile=False,
                 options=dict(maxiter=100))

# %% [markdown]
# Below we compare prediction results for different CG tolerances. The `cg_tolerance=None` means that no CG is run to tune the $ v $ vector, and `cg_tolerance=0.01` is much lower value than the one used at the model optimization.

# %% [markdown]
Exemple #3
0
ax.legend()

plt.show()

# %%
vgp = VGPWrapper(kernel=kernel_cls(),
                 index_points=X_grid,
                 observation_index_points=X_train,
                 observations=Y_train,
                 vgp_cls=VGPOpperArchambeau,
                 jitter=jitter)

# %%

optimizer = Scipy()
optimizer.minimize(vgp.variational_loss,
                   variables=vgp._vgp.trainable_variables)

# %%
qf_loc = vgp.mean()
qf_scale = vgp.stddev()
# %%

# m = tf.matmul(vgp.kernel.K(X_train), vgp.q_alpha)
# %%

fig, ax = plt.subplots()

ax.plot(X_grid,
        r.logit(X_grid),
Exemple #4
0
from gpflow.utilities import print_summary

print_summary(model)

# %% [markdown]
# The objective function for MDN instances is the `log_marginal_likelihood`, which we use for optimization of the parameters. GPflow ensures that only the variables stored in `Parameter` objects are optimized. For the MDN, the only parameters are the weights and the biases of the neural net.
#
# We use the `Scipy` optimizer, which is a wrapper around SciPy's L-BFGS optimization algorithm. Note that GPflow supports other TensorFlow optimizers such as `Adam`, `Adagrad`, and `Adadelta` as well.

# %%
from gpflow.optimizers import Scipy
from gpflow.ci_utils import ci_niter

Scipy().minimize(tf.function(lambda: -model.log_marginal_likelihood(data)),
                 variables=model.trainable_parameters,
                 options=dict(maxiter=ci_niter(1500)))

print("Final Likelihood", model.log_marginal_likelihood(data).numpy())

# %% [markdown]
# To evaluate the validity of our model, we draw the posterior density. We also plot $\mu(x)$ of the optimized neural net. Remember that for every $x$ the neural net outputs $M$ means $\mu_m(x)$. These determine the location of the Gaussians. We plot all $M$ means and use their corresponding mixture weight $\pi_m(X)$ to determine their size. Larger dots will have more impact in the Gaussian ensemble.

# %%
try:
    from mdn_plotting import plot
except:
    # VS CODE's root directory is GPflow's top-level directory
    from doc.source.notebooks.tailor.mdn_plotting import plot

fig, axes = plt.subplots(1, 2, figsize=(12, 6))
from gpflow.utilities import print_summary

print_summary(model)

# %% [markdown]
# The objective function for MDN instances is the `maximum_log_likelihood_objective`, which we use for optimization of the parameters. GPflow ensures that only the variables stored in `Parameter` objects are optimized. For the MDN, the only parameters are the weights and the biases of the neural net.
#
# We use the `Scipy` optimizer, which is a wrapper around SciPy's L-BFGS optimization algorithm. Note that GPflow supports other TensorFlow optimizers such as `Adam`, `Adagrad`, and `Adadelta` as well.

# %%
from gpflow.optimizers import Scipy
from gpflow.ci_utils import ci_niter

Scipy().minimize(
    model.training_loss_closure(data, compile=True),
    model.trainable_variables,
    options=dict(maxiter=ci_niter(1500)),
)

print("Final Likelihood", model.maximum_log_likelihood_objective(data).numpy())

# %% [markdown]
# To evaluate the validity of our model, we draw the posterior density. We also plot $\mu(x)$ of the optimized neural net. Remember that for every $x$ the neural net outputs $M$ means $\mu_m(x)$. These determine the location of the Gaussians. We plot all $M$ means and use their corresponding mixture weight $\pi_m(X)$ to determine their size. Larger dots will have more impact in the Gaussian ensemble.

# %%
try:
    from mdn_plotting import plot
except:
    # VS CODE's root directory is GPflow's top-level directory
    from doc.source.notebooks.tailor.mdn_plotting import plot