def _test_chechpoint_roundtrip(self, use_global_step: bool, num_checkpoints: Optional[int]=5): """ Performs saving/restoring roundtrip, either with or without using `global_step`. Note that if `global_step` is used the save will create one checkpoint for each value of the global step. """ with tempfile.TemporaryDirectory() as tmp_event_dir: # Create a variable and do several checkpoints with session_context(tf.Graph()) as session: dummy_var = self._create_dummy_variable(session) monitor_context = mon.MonitorContext() monitor_context.session = session if use_global_step: monitor_context.global_step_tensor = mon.create_global_step(session) monitor_task = mon.CheckpointTask(tmp_event_dir) for i in range(num_checkpoints): session.run(dummy_var.assign(i)) if use_global_step: session.run(monitor_context.global_step_tensor.assign(10 * i)) monitor_task(monitor_context) # Restore the session and read the variables. # Verify if the latest checkpoint was restored. with session_context(tf.Graph()) as session: dummy_var = self._create_dummy_variable(session) global_step_tensor = mon.create_global_step(session) if use_global_step else None mon.restore_session(session, tmp_event_dir) self.assertEqual(session.run(dummy_var), num_checkpoints - 1) if use_global_step: self.assertEqual(session.run(global_step_tensor), 10 * (num_checkpoints - 1))
# build model gpflow.reset_default_graph_and_session() with gpflow.defer_build(): kernel = gpflow.kernels.RBF(1, active_dims=[0]) + gpflow.kernels.RBF(1, active_dims=[1]) feature = gpflow.features.InducingPoints(Z) model = gpflow.models.SVGP( X_aug, Y, kernel, NegativeBinomial(), feat=feature, minibatch_size=500, name=name) model.compile() # restore/create monitor session lr = 0.01 monitor_tasks, session, global_step, file_writer = build_monitor(model, path) optimiser = gpflow.train.AdamOptimizer(lr) if os.path.isdir('./monitor-saves/' + path + model.name): try: mon.restore_session(session, './monitor-saves/' + path + model.name) except: pass else: os.makedirs('./monitor-saves/' + path + model.name) model.anchor(session) # optimize with mon.Monitor(monitor_tasks, session, global_step, print_summary=True) as monitor: optimiser.minimize(model, step_callback=monitor, maxiter=1000, global_step=global_step) file_writer.close()
.with_condition(mon.PeriodicIterationCondition(saving_freq)) \ .with_exit_condition(True) writer = mon.LogdirWriter(tensorboard_path) tensorboard_task = mon.ModelToTensorBoardTask(writer, model) \ .with_name('tensorboard') \ .with_condition(mon.PeriodicIterationCondition(tensorboard_freq)) monitor_tasks = [print_task, tensorboard_task, checkpoint_task] #################################### training with mon.Monitor(monitor_tasks, sess, model.global_step, print_summary=True) as monitor: try: mon.restore_session(sess, checkpoint_path) except ValueError: pass iterations_to_go = max([ARGS.iterations - sess.run(model.global_step), 0]) print('Already run {} iterations. Running {} iterations'.format( sess.run(model.global_step), iterations_to_go)) for it in range(iterations_to_go): monitor() model.train_op(sess) model.anchor(sess) #################################### evaluation
def main(): #################################### args ARGS = experiment_common.parse_arguments() print("\n", "ARGS:", "\n", ARGS, "\n") if ARGS.plot_freq is not None and not ARGS.dataset.startswith("demo"): raise ValueError("Plotting only supported for demo dataset.") random.seed(ARGS.seed) np.random.seed(ARGS.seed) tf.set_random_seed(ARGS.seed) #################################### paths file_name = experiment_common.get_file_name(ARGS) checkpoint_path, tensorboard_path = experiment_common.create_paths( file_name, ARGS.results_path ) #################################### data data = experiment_common.get_data(ARGS) #################################### model model = build_model(ARGS, data.X_train, data.Y_train) #################################### init sess = model.enquire_session() model.init_op(sess) #################################### monitoring def _write_dict_to_csv(data: Dict, step: int): csvsavepath = os.path.join(checkpoint_path, f"{file_name}_{step}.csv") with open(csvsavepath, "w") as file: writer = csv.writer(file) for key, val in data.items(): writer.writerow([key, val]) print("CSV WRITTEN " + csvsavepath) #################################### training tensorboard_writer = LogdirWriter(tensorboard_path) checkpoint_task = _create_checkpoint_task(checkpoint_path) snr_log_task = _create_snr_log_task(ARGS, checkpoint_path, tensorboard_writer, checkpoint_task) tensorboard_task = _create_tensorboard_task(model, tensorboard_writer, ARGS.log_main_freq) monitor_tasks = [ checkpoint_task, # snr_log_task, # PrintTimingsTask() .with_name("print") .with_condition(PeriodicIterationCondition(interval=100)), tensorboard_task, ] with Monitor(monitor_tasks, sess, model.global_step, print_summary=True) as monitor: try: restore_session(sess, checkpoint_path) except ValueError: pass initial_global_step = sess.run(model.global_step) iterations_to_go = max([ARGS.iterations - initial_global_step, 0]) if initial_global_step == 0: # Log initial values. Bit dodgy. tensorboard_task.run(monitor._context) if ARGS.log_snr_freq is not None: snr_log_task.run(monitor._context) if ARGS.plot_freq is not None: demo_dataset.plot_data_and_predictions( data, model, tensorboard_writer, sess, step=0 ) print( "Already run {} iterations. Running {} iterations".format( initial_global_step, iterations_to_go ) ) epoch_train_elbos = [] epoch_train_dreg_objectives = [] datapoints_since_last_epoch = 0 batching_enabled = ARGS.minibatch_size is not None minibatch_size = ARGS.minibatch_size if batching_enabled else len(data.X_train) for it in range(iterations_to_go): monitor() if isinstance(model, DregModel) and hasattr(model, "train_op"): _, train_elbo, train_dreg_objective = sess.run( [ model.train_op, model.likelihood_tensor, model.get_dreg_objective_for_encoder_params(), ] ) epoch_train_elbos.append(train_elbo) epoch_train_dreg_objectives.append(train_dreg_objective) else: model.train_func(sess) global_step = sess.run(model.global_step) datapoints_since_last_epoch += minibatch_size # If batching is disabled then we use the entire dataset each iteration, so there is no point in recording # separate epoch statistics. if batching_enabled and datapoints_since_last_epoch >= len(data.X_train): # We have passed over the entire dataset, so compute epoch stats. epoch_train_elbo = np.mean(np.stack(epoch_train_elbos, axis=0)) tensorboard_writer.add_summary( _create_scalar_summary("optimisation/epoch_train_elbo", epoch_train_elbo), global_step, ) epoch_train_dreg_objective = np.mean(np.stack(epoch_train_dreg_objectives, axis=0)) tensorboard_writer.add_summary( _create_scalar_summary( "optimisation/epoch_train_dreg_objective", epoch_train_dreg_objective ), global_step, ) datapoints_since_last_epoch = 0 epoch_train_elbos = [] epoch_train_dreg_objectives = [] if ( ARGS.plot_freq is not None and (global_step - 1) % ARGS.plot_freq == 0 or it == iterations_to_go ): demo_dataset.plot_data_and_predictions( data, model, tensorboard_writer, sess, global_step ) if (global_step - 1) % ARGS.log_test_freq == 0 or it == iterations_to_go: print("Iteration: {}".format(it)) #################################### evaluation test_elbo = model.compute_log_likelihood(data.X_test) loglik, rmse, median_shapiro_W = metrics.compute_metrics( model, data.X_test, data.Y_test, ARGS.num_predict_samples ) res = {} res["test_loglik"] = loglik res["train_elbo"] = model.compute_log_likelihood(data.X_train) res["test_elbo"] = test_elbo res["test_shapiro_W_median"] = median_shapiro_W res["test_rmse"] = rmse res.update(ARGS.__dict__) print(res) _write_dict_to_csv(res, step=sess.run(model.global_step) - 1) tensorboard_writer.add_summary( _create_scalar_summary("optimisation/test_elbo", test_elbo), global_step ) tensorboard_writer.add_summary( _create_scalar_summary("optimisation/test_loglik", loglik), global_step ) model.anchor(sess) print(model.as_pandas_table()) #################################### loglik, rmse, median_shapiro_W = metrics.compute_metrics( model, data.X_test, data.Y_test, ARGS.num_predict_samples ) res = {} res["test_loglik"] = loglik res["train_elbo"] = model.compute_log_likelihood(data.X_train) res["test_elbo"] = model.compute_log_likelihood(data.X_test) res["test_shapiro_W_median"] = median_shapiro_W res["test_rmse"] = rmse res.update(ARGS.__dict__) print(res) ################################### save results as csv files for tighter bounds _write_dict_to_csv(res, step=sess.run(model.global_step))
def ex1(): fX_dim = 1 M = 100 X, Y, Xt, Yt = LoadData.load_ocean() # annoyingly only float32 and lower is supported by the conv layers f = lambda x: tf.cast(NN.cnn_fn(tf.cast(x, tf.float32), fX_dim), float_type) kern = NNComposedKernel(gpflow.kernels.Matern32(fX_dim), f) # build the model lik = gpflow.likelihoods.Gaussian() Z = kmeans2(X, M, minit='points')[0] model = NN_SVGP(X, Y, kern, lik, Z=Z, minibatch_size=200) session = model.enquire_session() global_step = mon.create_global_step(session) # print print_task = mon.PrintTimingsTask().with_name('print') \ .with_condition(mon.PeriodicIterationCondition(10)) \ .with_exit_condition(True) sleep_task = mon.SleepTask(0.01).with_name('sleep').with_name('sleep') saver_task = mon.CheckpointTask('./monitor-saves').with_name('saver') \ .with_condition(mon.PeriodicIterationCondition(10)) \ .with_exit_condition(True) file_writer = mon.LogdirWriter('./model-tensorboard') model_tboard_task = mon.ModelToTensorBoardTask(file_writer, model).with_name('model_tboard') \ .with_condition(mon.PeriodicIterationCondition(10)) \ .with_exit_condition(True) lml_tboard_task = mon.LmlToTensorBoardTask(file_writer, model).with_name('lml_tboard') \ .with_condition(mon.PeriodicIterationCondition(100)) \ .with_exit_condition(True) custom_tboard_task = CustomTensorBoardTask(file_writer, model, Xt, Yt).with_name('custom_tboard') \ .with_condition(mon.PeriodicIterationCondition(100)) \ .with_exit_condition(True) monitor_tasks = [print_task, model_tboard_task, lml_tboard_task, custom_tboard_task, saver_task, sleep_task] monitor = mon.Monitor(monitor_tasks, session, global_step) if os.path.isdir('./monitor-saves'): mon.restore_session(session, './monitor-saves') # use gpflow wrappers to train. NB all session handling is done for us optimiser = gpflow.training.AdamOptimizer(0.001) with mon.Monitor(monitor_tasks, session, global_step, print_summary=True) as monitor: optimiser.minimize(model, step_callback=monitor, maxiter=30000, global_step=global_step) file_writer.close() print('LML after the optimisation: %f' % m.compute_log_likelihood()) # # predictions pY, pYv = model.predict_y(Xt) rmse = np.mean((pY - Yt) ** 2.0) ** 0.5 nlpp = -np.mean(-0.5 * np.log(2 * np.pi * pYv) - 0.5 * (Yt - pY) ** 2.0 / pYv) print('rmse is {:.4f}%, nlpp is {:.f}%'.format(rmse, nlpp))