Ejemplo n.º 1
0
def test_periodicity_group(capsys):
    """Test that groups are called at different periods."""

    task_a = ExecuteCallback(lambda: print("a", end=" "))
    task_b = ExecuteCallback(lambda: print("b", end=" "))
    task_X = ExecuteCallback(lambda: print("X", end=" "))

    group_often = MonitorTaskGroup([task_a, task_b], period=1)
    group_seldom = MonitorTaskGroup([task_X], period=3)
    monitor = Monitor(group_often, group_seldom)
    for i in range(7):
        monitor(i)

    out, _ = capsys.readouterr()
    expected = "a b X a b a b a b X a b a b a b X "
    assert out == expected

    # AutoGraph mode
    compiled_monitor = tf.function(monitor)
    for i in tf.range(7):
        compiled_monitor(i)

    # When using TF's range and compiling the monitoring we only expected the python prints once.
    out, _ = capsys.readouterr()
    assert "a b X"
Ejemplo n.º 2
0
def test_MonitorTaskGroup_and_Monitor(task_or_tasks):
    group = MonitorTaskGroup(task_or_tasks, period=2)

    # check that the tasks is actually a list (custom setter)
    isinstance(group.tasks, list)

    # Smoke test the __call__
    group(0)
    compiled_group = tf.function(group)
    compiled_group(0)

    # Smoke test the Monitor wrapper
    monitor = Monitor(group)
    monitor(0)
    compiled_monitor = tf.function(monitor)
    compiled_monitor(0)
Ejemplo n.º 3
0
def monitored_training_loop(model,
                            training_loss,
                            epochs: int = 1,
                            num_batches_per_epoch: int = 1,
                            fast_tasks: gpf.monitor.MonitorTaskGroup = None,
                            slow_tasks: gpf.monitor.MonitorTaskGroup = None,
                            logging_epoch_freq: int = 100,
                            manager: tf.train.CheckpointManager = None):
    """Monitors (with images) Adam optimizer on model with training_loss.

    Monitoring is not inside tf.function so this method will be slower than
    monitored_training_tf_loop.

    :param model: The model to be trained.
    :param training_loss: A function that returns the training objective.
    :param epochs: The number of full data passes (epochs).
    :param num_batches_per_epoch: The number of batches per epoch
    :param fast_tasks: gpflow monitor fast tasks e.g.
        MonitorTaskGroup([ScalarToTensorBoard(log_dir, training_loss, "elbo")])
    :param slow_tasks: gpflow monitor slow tasks e.g. plotting images
    :param logging_epoch_freq: The epoch frequency that the training loss is printed.
    """
    optimizer = tf.optimizers.Adam()
    # checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
    # checkpoint_dir = os.path.dirname(checkpoint_path)

    @tf.function
    def tf_optimization_step():
        optimizer.minimize(training_loss, model.trainable_variables)

    monitor = Monitor(fast_tasks, slow_tasks)

    t = time.time()
    for epoch in range(epochs):
        for _ in range(num_batches_per_epoch):
            tf_optimization_step()
            # duration = t - time.time()
            # print("Iteration duration: ", duration)
            # t = time.time()
        monitor(epoch)
        epoch_id = epoch + 1
        if epoch_id % logging_epoch_freq == 0:
            tf.print(f"Epoch {epoch_id}: ELBO (train) {training_loss()}")
            if manager is not None:
                manager.save()
Ejemplo n.º 4
0
def test_ExecuteCallback_arguments(capsys):
    def cb1(x=None, **_):
        assert x is not None
        print(x)

    def cb2(**_):
        print(2)

    def cb3(y=None, **_):
        assert y is not None
        print(y)

    group1 = MonitorTaskGroup([ExecuteCallback(cb1), ExecuteCallback(cb2)])
    group2 = MonitorTaskGroup(ExecuteCallback(cb3))
    monitor = Monitor(group1, group2)
    monitor(0, x=1, y=3)
    out, _ = capsys.readouterr()
    assert out == "1\n2\n3\n"
Ejemplo n.º 5
0
    def create_monitor(self, model):

        model_task = ModelToTensorBoard(self.monitor_path, model)
        self.monitor = Monitor(MonitorTaskGroup([model_task]), period=5)


# data_minibatch = (
#     tf.data.Dataset.from_tensor_slices(data)
#     .prefetch(autotune)
#     .repeat()
#     .shuffle(N)
#     .batch(batch_size)
# )

#nat grad loop
# gamma_start = 1e-2   # deliberately chosen to be too large for this example
# gamma_max = 1e-1   # same max value as before
# gamma_step = 1e-2  # this is much more aggressive increase

# gamma = tf.Variable(gamma_start, dtype=tf.float64)
# gamma_incremented = tf.where(tf.less(gamma, gamma_max), gamma + gamma_step, gamma_max)

# op_ng = NatGradOptimizer(gamma).make_optimize_tensor(model, var_list=[[model.q_mu, model.q_sqrt]])
# op_adam = AdamOptimizer(0.001).make_optimize_tensor(model)
# op_increment_gamma = tf.assign(gamma, gamma_incremented)

# gamma_fallback = 1e-1   # we'll reduce by this factor if there's a cholesky failure
# op_fallback_gamma = tf.assign(gamma, gamma * gamma_fallback)

# sess.run(tf.variables_initializer([gamma]))

# for it in range(1000):
#     try:
#         sess.run(op_ng)
#         sess.run(op_increment_gamma)
#     except tf.errors.InvalidArgumentError:
#         g = sess.run(gamma)
#         print('gamma = {} on iteration {} is too big! Falling back to {}'.format(it, g, g * gamma_fallback))
#         sess.run(op_fallback_gamma)

#     sess.run(op_adam)

#     if it % 100 == 0:
#         print('{} gamma={:.4f} ELBO={:.4f}'.format(it, *sess.run([gamma, model.likelihood_tensor])))
Ejemplo n.º 6
0
def monitor(model, tmp_path):
    tmp_path = str(tmp_path)

    def lml_callback():
        return model.log_marginal_likelihood()

    def print_callback():
        print("foo")

    return Monitor(
        MonitorTaskGroup(
            [
                ModelToTensorBoard(tmp_path, model),
                ScalarToTensorBoard(tmp_path, lml_callback, "lml"),
            ],
            period=2,
        ),
        MonitorTaskGroup(ExecuteCallback(print_callback), period=1),
    )
Ejemplo n.º 7
0
def monitored_training_tf_loop(model,
                               training_loss,
                               epochs: int = 1,
                               num_batches_per_epoch: int = 1,
                               fast_tasks: gpf.monitor.MonitorTaskGroup = None,
                               logging_epoch_freq: int = 100,
                               manager: tf.train.CheckpointManager = None):
    """Monitors Adam optimizer on model with training_loss.

    Both training and monitoring are inside tf.function (no image monitoring).
    This method only monitors the fast tasks as matplotlib code cannot be built
    in a TF graph.

    :param model: The model to be trained.
    :param training_loss: A function that returns the training objective.
    :param epochs: The number of full data passes (epochs).
    :param num_batches_per_epoch: The number of batches per epoch
    :param fast_tasks: gpflow monitor fast tasks e.g.
        MonitorTaskGroup([ScalarToTensorBoard(log_dir, training_loss, "elbo")])
    :param logging_epoch_freq: The epoch frequency that the training loss is printed.
    """
    optimizer = tf.optimizers.Adam()
    monitor = Monitor(fast_tasks)

    @tf.function
    def monitored_tf_opt_step(epoch):
        optimizer.minimize(training_loss, model.trainable_variables)
        monitor(epoch)

    # t = time.time()
    epochs = tf.constant(epochs)  # needs to be tf.const
    for epoch in tf.range(epochs):
        for _ in range(num_batches_per_epoch):
            monitored_tf_opt_step(epoch)
        epoch_id = epoch + 1
        if epoch_id % logging_epoch_freq == 0:
            tf.print(f"Epoch {epoch_id}: ELBO (train) {training_loss()}")
            if manager is not None:
                manager.save()
Ejemplo n.º 8
0
def test_scipy_monitor_called(model):
    task = DummyTask()
    monitor = Monitor(MonitorTaskGroup(task, period=1))
    opt = gpflow.optimizers.Scipy()
    opt.minimize(model.training_loss, model.trainable_variables, step_callback=monitor)
    assert task.current_step > 1
Ejemplo n.º 9
0
output_logdir = enumerated_logdir()

model_task = ModelToTensorBoard(output_logdir, model)
elbo_task = ScalarToTensorBoard(output_logdir, elbo_cb, "elbo")
print_task = ExecuteCallback(callback=print_cb)

# We group these tasks and specify a period of `100` steps for them
fast_tasks = MonitorTaskGroup([model_task, elbo_task, print_task], period=100)

# We also want to see the model's fit during the optimisation
image_task = ImageToTensorBoard(output_logdir, plot_model, "samples_image")

# We typically don't want to plot too frequently during optimisation,
# which is why we specify a larger period for this task.
slow_taks = MonitorTaskGroup(image_task, period=500)
monitor = Monitor(fast_tasks, slow_taks)


def monitored_training_loop(epochs: int):
    tf_optimization_step = tf.function(optimization_step)

    batches = iter(train_dataset)

    for epoch in range(epochs):
        for _ in range(ci_niter(num_batches_per_epoch)):
            batch = next(batches)
            tf_optimization_step(model, batch)

        epoch_id = epoch + 1
        monitor(epoch, epoch_id=epoch_id, data=data)
Ejemplo n.º 10
0
# %% [markdown]
# We now group the tasks in a set of fast and slow tasks and pass them to the monitor.
# This allows us to execute the groups at a different frequency.

# %%
# Plotting tasks can be quite slow. We want to run them less frequently.
# We group them in a `MonitorTaskGroup` and set the period to 5.
slow_tasks = MonitorTaskGroup(image_task, period=5)

# The other tasks are fast. We run them at each iteration of the optimisation.
fast_tasks = MonitorTaskGroup([model_task, lml_task], period=1)

# Both groups are passed to the monitor.
# `slow_tasks` will be run five times less frequently than `fast_tasks`.
monitor = Monitor(fast_tasks, slow_tasks)

# %%
training_loss = model.training_loss_closure(
    compile=True)  # compile=True (default): compiles using tf.function
opt = tf.optimizers.Adam()

for step in range(optimisation_steps):
    opt.minimize(training_loss, model.trainable_variables)
    monitor(step)  # <-- run the monitoring

# %% [markdown]
# TensorBoard is accessible through the browser, after launching the server by running `tensorboard --logdir ${logdir}`.
# See the [TensorFlow documentation on TensorBoard](https://www.tensorflow.org/tensorboard/get_started) for more information.

# %% [markdown]
Ejemplo n.º 11
0
    def configure_tensorboard_monitor(self,
                                      scalar_period,
                                      imgs_period,
                                      nb_images=1,
                                      do_phase_space=None):
        if do_phase_space is None:
            do_phase_space = self.model.phase_space_dim == 2
        if self.experiment.tensorboard_dir is None or scalar_period < 1:
            return None

        def create_bloss_tasks(directory):
            bloss_names = [
                '-ly', '-lx', 'penalty_term', 'alpha_term', '-H', '+KL'
            ]
            bloss_tasks = []

            def create_lambda(i):
                return lambda train_bloss=None, **kwargs: train_bloss[i]

            for i, name in enumerate(bloss_names):
                bloss_tasks.append(
                    ScalarToTensorBoard(directory, create_lambda(i),
                                        'bloss/' + name))
            return bloss_tasks

        train_dir = os.path.join(self.experiment.tensorboard_dir, 'train')
        test_dir = os.path.join(self.experiment.tensorboard_dir, 'test')

        # diff_task = ModelToTensorBoard(train_dir, self.model.sde_model.diffusion)
        # drift_task = ModelToTensorBoard(train_dir, self.model.sde_model.drift_svgp)
        diff_task = []
        drift_task = []

        train_loss = ScalarToTensorBoard(
            train_dir, lambda train_loss=None, **kwargs: train_loss, 'loss')
        test_loss = ScalarToTensorBoard(
            test_dir,
            lambda epoch=None, kl_scheduler=None, **kwargs: self.test_loss(
                epoch, kl_scheduler),
            'loss')

        train_bloss_list = create_bloss_tasks(train_dir)

        # train_bloss_list = []  # TODO: remove or add

        generator = self.experiment.test_dataset if self.experiment.has_test else self.experiment.train_dataset
        y_inputs = []
        y_targets = []
        for y in generator.take(1):
            for y_input, y_target in self.tbptt_chunks_generator(y):
                break
        #         y_inputs.append(y_input)
        #         y_targets.append(y_target)
        # y_input = tf.concat(y_inputs, axis=1)
        # y_target = tf.concat(y_targets, axis=1)

        def calc_drift_error(**kwargs):
            samples, entropies, encoded_dist, q0_stats, states = draw_fast_samples(
                self.model, None, y_input)
            fx, var_fx = self.model.sde_model.drift_svgp.predict_f(
                tf.reshape(samples, (-1, samples.shape[-1])))
            fx = tf.reshape(fx, samples.shape)
            return tf.reduce_mean(
                tf.square(samples[..., 1:, :] - samples[..., :-1, :] -
                          fx[..., :-1, :]))

        drift_error = ScalarToTensorBoard(train_dir, calc_drift_error,
                                          'drift_error')
        beta_alpha = ScalarToTensorBoard(
            train_dir, lambda **kwargs: tf.reduce_mean(
                self.model.sde_model.diffusion.expected_diffusion()),
            'beta_div_alpha')
        if imgs_period > 0:
            print('Creating image callbacks')
            images_dir = os.path.join(self.experiment.tensorboard_dir,
                                      'images')

            nrows = 2 if self.model.phase_space_dim > 3 else 1
            encoded_samples = ImageToTensorBoard(
                images_dir,
                lambda f, a: plot_encoded_samples(f, a, self.model, y_input),
                'encoded_samples',
                fig_kw={'figsize': (12, 12)},
                subplots_kw={
                    'nrows': nrows,
                    'ncols': np.ceil(5 / 2).astype(int)
                })

            def plot_synth(fig, axes):
                plot_synthetic_samples(fig,
                                       axes,
                                       self.model,
                                       y_input,
                                       y_target,
                                       simulation_steps=y.shape[-2])

            nrows = 2 if do_phase_space else 1
            synthetic_samples = ImageToTensorBoard(
                images_dir,
                plot_synth,
                'synthetic_samples',
                fig_kw={'figsize': (12, 12)},
                subplots_kw={
                    'nrows': nrows,
                    'ncols': nb_images
                })

            def plot_dec(fig, axes):
                plot_decoder(fig, axes, self.model, y_input, y_target)

            nrows = 2 if self.experiment.batch_size > 1 else 1
            dec_images = ImageToTensorBoard(
                images_dir,
                plot_dec,
                'decoder',
                fig_kw={'figsize': (12, 12)},
                subplots_kw={
                    'nrows': nrows,
                    'ncols': min(self.experiment.batch_size // nrows, 2)
                })
            drift_images = ImageToTensorBoard(
                images_dir,
                lambda fig, axes: plot_drift_predictions(
                    fig, axes, self.model, y_input),
                'drift',
                fig_kw={'figsize': (12, 12)},
                subplots_kw={
                    'nrows': nrows,
                    'ncols': self.model.sde_model.dimension
                })

            monitor = Monitor(
                MonitorTaskGroup([train_loss, test_loss] + train_bloss_list,
                                 period=scalar_period),
                # MonitorTaskGroup([drift_error, beta_alpha], period=scalar_period),
                MonitorTaskGroup([
                    synthetic_samples, dec_images, encoded_samples,
                    drift_images
                ],
                                 period=imgs_period))
            print('done')
        else:
            monitor = Monitor(
                MonitorTaskGroup([train_loss, test_loss] + train_bloss_list,
                                 period=scalar_period),
                MonitorTaskGroup([drift_error, beta_alpha],
                                 period=scalar_period),
            )
        return monitor