예제 #1
0
    def __init__(self,
                 variational_model: bool = True,
                 do_monitor: bool = False):

        self.var = variational_model
        self.do_monitor = do_monitor
        if do_monitor:
            self.monitor_path = "train_log/fit"
            os.system("rm -rf train_log")

        if variational_model:
            self.opt = tf.optimizers.Adam()
            self.opt_var = NaturalGradient(gamma=0.1)
        else:
            self.opt = Scipy()
예제 #2
0
# * `max_cg_iters`. The maximum number of CG iterations.
# * `restart_cg_step`. The frequency with wich the CG resets the internal state to the initial position using current solution vector `v`.
# * `v_grad_optimization`. CGLB introduces auxiliary parameter `v`, and by default optimal `v` is found with the CG. However you can include `v` into the list of trainable model parameters.

# %%

cglb = CGLB(
    data,
    kernel=SquaredExponential(),
    noise_variance=noise,
    inducing_variable=iv,
    cg_tolerance=1.0,
    max_cg_iters=50,
    restart_cg_iters=50,
)
opt = Scipy()

# %% [markdown]
# We train the model as usual. Variables do not include the $ v $ auxiliary vector.

# %%
variables = cglb.trainable_variables
_ = opt.minimize(cglb.training_loss_closure(compile=False),
                 variables,
                 compile=False,
                 options=dict(maxiter=100))

# %% [markdown]
# Below we compare prediction results for different CG tolerances. The `cg_tolerance=None` means that no CG is run to tune the $ v $ vector, and `cg_tolerance=0.01` is much lower value than the one used at the model optimization.

# %% [markdown]
예제 #3
0
ax.legend()

plt.show()

# %%
vgp = VGPWrapper(kernel=kernel_cls(),
                 index_points=X_grid,
                 observation_index_points=X_train,
                 observations=Y_train,
                 vgp_cls=VGPOpperArchambeau,
                 jitter=jitter)

# %%

optimizer = Scipy()
optimizer.minimize(vgp.variational_loss,
                   variables=vgp._vgp.trainable_variables)

# %%
qf_loc = vgp.mean()
qf_scale = vgp.stddev()
# %%

# m = tf.matmul(vgp.kernel.K(X_train), vgp.q_alpha)
# %%

fig, ax = plt.subplots()

ax.plot(X_grid,
        r.logit(X_grid),
예제 #4
0
# shortcuts
tfd = tfp.distributions

# sensible defaults
SUMMARY_DIR = "logs/"
SEED = 8888

dataset_seed = 8888

num_features = 1

num_train = 100
num_test = 100

kernel_cls = Matern52
optimizer = Scipy()

jitter = 1e-6

num_seeds = 10

# properties of the distribution
props = {
    "mean": tfd.Distribution.mean,
    "mode": tfd.Distribution.mode,
    "median": lambda d: d.distribution.quantile(0.5),
    # "sample": tfd.Distribution.sample,  # single sample
}


def poly(x):
예제 #5
0
class Trainer():
    def __init__(self,
                 variational_model: bool = True,
                 do_monitor: bool = False):

        self.var = variational_model
        self.do_monitor = do_monitor
        if do_monitor:
            self.monitor_path = "train_log/fit"
            os.system("rm -rf train_log")

        if variational_model:
            self.opt = tf.optimizers.Adam()
            self.opt_var = NaturalGradient(gamma=0.1)
        else:
            self.opt = Scipy()

    def run(self, model, dataset, epoch: int = 10):
        num_iter = len(dataset) * epoch

        #something not trainable
        set_trainable(model.inducing_variable, False)
        set_trainable(model.q_mu, False)
        set_trainable(model.q_sqrt, False)

        if self.do_monitor:
            self.create_monitor(model)

        if self.var:
            train_iter = iter(dataset)
            training_loss = model.training_loss_closure(train_iter,
                                                        compile=True)
            for step in tf.range(num_iter):
                self.optimization_step(model, training_loss)
                self.monitor(step)

        else:
            data = dataset.unbatch()
            self.opt.minimize(model.training_loss_closure(data),
                              variables=model.trainable_variables,
                              options={
                                  "disp": True,
                                  "maxiter": 1e3
                              })

    @tf.function
    def optimization_step(self, model, loss):
        self.opt.minimize(loss, par_list=model.trainable_variables)
        self.opt_var.minimize(loss, var_list=[model.q_mu, model.q_sqrt])

    def create_monitor(self, model):

        model_task = ModelToTensorBoard(self.monitor_path, model)
        self.monitor = Monitor(MonitorTaskGroup([model_task]), period=5)


# data_minibatch = (
#     tf.data.Dataset.from_tensor_slices(data)
#     .prefetch(autotune)
#     .repeat()
#     .shuffle(N)
#     .batch(batch_size)
# )

#nat grad loop
# gamma_start = 1e-2   # deliberately chosen to be too large for this example
# gamma_max = 1e-1   # same max value as before
# gamma_step = 1e-2  # this is much more aggressive increase

# gamma = tf.Variable(gamma_start, dtype=tf.float64)
# gamma_incremented = tf.where(tf.less(gamma, gamma_max), gamma + gamma_step, gamma_max)

# op_ng = NatGradOptimizer(gamma).make_optimize_tensor(model, var_list=[[model.q_mu, model.q_sqrt]])
# op_adam = AdamOptimizer(0.001).make_optimize_tensor(model)
# op_increment_gamma = tf.assign(gamma, gamma_incremented)

# gamma_fallback = 1e-1   # we'll reduce by this factor if there's a cholesky failure
# op_fallback_gamma = tf.assign(gamma, gamma * gamma_fallback)

# sess.run(tf.variables_initializer([gamma]))

# for it in range(1000):
#     try:
#         sess.run(op_ng)
#         sess.run(op_increment_gamma)
#     except tf.errors.InvalidArgumentError:
#         g = sess.run(gamma)
#         print('gamma = {} on iteration {} is too big! Falling back to {}'.format(it, g, g * gamma_fallback))
#         sess.run(op_fallback_gamma)

#     sess.run(op_adam)

#     if it % 100 == 0:
#         print('{} gamma={:.4f} ELBO={:.4f}'.format(it, *sess.run([gamma, model.likelihood_tensor])))
예제 #6
0
from gpflow.utilities import print_summary

print_summary(model)

# %% [markdown]
# The objective function for MDN instances is the `log_marginal_likelihood`, which we use for optimization of the parameters. GPflow ensures that only the variables stored in `Parameter` objects are optimized. For the MDN, the only parameters are the weights and the biases of the neural net.
#
# We use the `Scipy` optimizer, which is a wrapper around SciPy's L-BFGS optimization algorithm. Note that GPflow supports other TensorFlow optimizers such as `Adam`, `Adagrad`, and `Adadelta` as well.

# %%
from gpflow.optimizers import Scipy
from gpflow.ci_utils import ci_niter

Scipy().minimize(tf.function(lambda: -model.log_marginal_likelihood(data)),
                 variables=model.trainable_parameters,
                 options=dict(maxiter=ci_niter(1500)))

print("Final Likelihood", model.log_marginal_likelihood(data).numpy())

# %% [markdown]
# To evaluate the validity of our model, we draw the posterior density. We also plot $\mu(x)$ of the optimized neural net. Remember that for every $x$ the neural net outputs $M$ means $\mu_m(x)$. These determine the location of the Gaussians. We plot all $M$ means and use their corresponding mixture weight $\pi_m(X)$ to determine their size. Larger dots will have more impact in the Gaussian ensemble.

# %%
try:
    from mdn_plotting import plot
except:
    # VS CODE's root directory is GPflow's top-level directory
    from doc.source.notebooks.tailor.mdn_plotting import plot

fig, axes = plt.subplots(1, 2, figsize=(12, 6))
from gpflow.utilities import print_summary

print_summary(model)

# %% [markdown]
# The objective function for MDN instances is the `maximum_log_likelihood_objective`, which we use for optimization of the parameters. GPflow ensures that only the variables stored in `Parameter` objects are optimized. For the MDN, the only parameters are the weights and the biases of the neural net.
#
# We use the `Scipy` optimizer, which is a wrapper around SciPy's L-BFGS optimization algorithm. Note that GPflow supports other TensorFlow optimizers such as `Adam`, `Adagrad`, and `Adadelta` as well.

# %%
from gpflow.optimizers import Scipy
from gpflow.ci_utils import ci_niter

Scipy().minimize(
    model.training_loss_closure(data, compile=True),
    model.trainable_variables,
    options=dict(maxiter=ci_niter(1500)),
)

print("Final Likelihood", model.maximum_log_likelihood_objective(data).numpy())

# %% [markdown]
# To evaluate the validity of our model, we draw the posterior density. We also plot $\mu(x)$ of the optimized neural net. Remember that for every $x$ the neural net outputs $M$ means $\mu_m(x)$. These determine the location of the Gaussians. We plot all $M$ means and use their corresponding mixture weight $\pi_m(X)$ to determine their size. Larger dots will have more impact in the Gaussian ensemble.

# %%
try:
    from mdn_plotting import plot
except:
    # VS CODE's root directory is GPflow's top-level directory
    from doc.source.notebooks.tailor.mdn_plotting import plot