def test_glow(self): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.depth = 15 hparams.n_levels = 2 hparams.init_batch_size = 256 hparams.batch_size = 1 cifar_problem = problems.problem( 'image_cifar10_plain_random_shift') hparams.problem = cifar_problem model = glow.Glow(hparams, tf.estimator.ModeKeys.TRAIN) train_dataset = cifar_problem.dataset(MODES.TRAIN) one_shot = train_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} _, obj_dict = model.body(features) objective = obj_dict['training'] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Run initialization. init_op = tf.get_collection('glow_init_op') sess.run(init_op) # Run forward pass. obj_np = sess.run(objective) mean_obj = np.mean(obj_np) # Check that one forward-propagation does not NaN, i.e # initialization etc works as expected. self.assertTrue(mean_obj > 0 and mean_obj < 10.0)
def test_encoder_decoder(self): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.n_levels = 3 hparams.depth = 2 x = tf.random_uniform(shape=(16, 64, 64, 4), seed=0) x_inv, _, eps, z_levels, _ = glow_ops.encoder_decoder( "encoder_decoder", x, hparams, reverse=False) x_inv_inv, _, z_inv_levels, _ = glow_ops.encoder_decoder( "encoder_decoder", x_inv, hparams, eps=eps, reverse=True) with tf.Session() as session: session.run(tf.global_variables_initializer()) diff, x_inv_np, z_levels_np, z_inv_levels_np = session.run( [x - x_inv_inv, x_inv, z_levels, z_inv_levels]) self.assertLen(z_levels_np, 2) self.assertLen(z_inv_levels_np, 2) # (h_i, w_i, c_i) = (h_{i-1}/f, w_{i-1}/f, c_{i-1}*(2f)/2) where (f=2) self.assertEqual(z_levels_np[0].shape, (16, 32, 32, 8)) self.assertEqual(z_levels_np[1].shape, (16, 16, 16, 16)) self.assertEqual(z_inv_levels_np[0].shape, (16, 32, 32, 8)) self.assertEqual(z_inv_levels_np[1].shape, (16, 16, 16, 16)) self.assertTrue(x_inv_np.shape, (16, 8, 8, 64)) self.assertTrue(np.allclose(diff, 0.0, atol=1e-2))
def test_latent_dist_encoder_lstm(self): with tf.Graph().as_default(): rng = np.random.RandomState(0) # Initialize x, latent, state. x_rand = rng.randn(12, 32, 32, 16).astype(np.float32) latent_rand = rng.randn(12, 32, 32, 16).astype(np.float32) state_rand = rng.randn(12, 32, 32, 16).astype(np.float32) x_t = tf.convert_to_tensor(x_rand) latent_t = tf.convert_to_tensor(latent_rand) state_t = tf.convert_to_tensor(state_rand) init_state = tf.contrib.rnn.LSTMStateTuple(state_t, state_t) hparams = glow.glow_hparams() hparams.add_hparam("latent_dist_encoder", "conv_lstm") hparams.add_hparam("latent_skip", True) prior_dist, new_state = glow_ops.compute_prior("lstm_prior", x_t, latent=latent_t, hparams=hparams, state=init_state, condition=True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Test initialization (mu, sigma) = (z, 1.0) ops = [ prior_dist.loc, prior_dist.scale, new_state.h - init_state.h ] mean, scale, diff_np = sess.run(ops) self.assertTrue(np.allclose(latent_rand - mean, 0.0)) self.assertTrue(np.allclose(scale, 1.0)) # State update. self.assertFalse(np.allclose(diff_np, 0.0))
def check_split_latent_conditioning(self, merge_std): with tf.Graph().as_default(): rng = np.random.RandomState(0) x_rand = rng.randn(12, 32, 32, 32).astype(np.float32) latent_rand = rng.randn(12, 32, 32, 16).astype(np.float32) x_t = tf.convert_to_tensor(x_rand) latent_t = tf.convert_to_tensor(latent_rand) hparams = glow.glow_hparams() hparams.level_scale = merge_std hparams.add_hparam("latent_dist_encoder", "pointwise") # Test initalization. # x2 ~ N(scale * latent, 1.0) where initial scale is 1.0 exp_x2 = x_rand[:, :, :, 16:] exp_eps = x_rand[:, :, :, 16:] - latent_rand x_inv, _, eps, x2_t, _ = glow_ops.split( merge_std, x_t, cond_latents=latent_t, hparams=hparams, condition=True) # Test reversibility. x_inv_inv, _, _ = glow_ops.split( merge_std, x_inv, cond_latents=latent_t, eps=eps, reverse=True, hparams=hparams, condition=True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) actual_eps, actual_x2, diff_np = sess.run([eps, x2_t, x_inv_inv - x_t]) self.assertTrue(np.allclose(diff_np, 0.0, atol=1e-5)) self.assertTrue(np.allclose(actual_eps, exp_eps)) self.assertTrue(np.allclose(exp_x2, actual_x2))
def test_encoder_decoder(self): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.n_levels = 3 hparams.depth = 6 rng = np.random.RandomState(0) x_np = rng.rand(1, 64, 64, 4) x_t = tf.convert_to_tensor(x_np, dtype=tf.float32) init_ops = [glow_ops.get_variable_ddi, glow_ops.actnorm] with arg_scope(init_ops, init=True): x_inv, _, eps, z_levels, _ = glow_ops.encoder_decoder( "encoder_decoder", x_t, hparams, reverse=False) x_inv_inv, _, z_inv_levels, _ = glow_ops.encoder_decoder( "encoder_decoder", x_inv, hparams, eps=eps, reverse=True) with tf.Session() as session: session.run(tf.global_variables_initializer()) x_inv_np = session.run(x_inv) z_levels_np, z_inv_levels_np, x_inv_inv_np = session.run( [z_levels, z_inv_levels, x_inv_inv]) diff = x_inv_inv_np - x_np self.assertLen(z_levels_np, 2) self.assertLen(z_inv_levels_np, 2) # (h_i, w_i, c_i) = (h_{i-1}/f, w_{i-1}/f, c_{i-1}*(2f)/2) where (f=2) self.assertEqual(z_levels_np[0].shape, (1, 32, 32, 8)) self.assertEqual(z_levels_np[1].shape, (1, 16, 16, 16)) self.assertEqual(z_inv_levels_np[0].shape, (1, 32, 32, 8)) self.assertEqual(z_inv_levels_np[1].shape, (1, 16, 16, 16)) self.assertTrue(x_inv_np.shape, (1, 8, 8, 64)) self.assertTrue(np.allclose(diff, 0.0, atol=1e-2))
def test_glow(self): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.depth = 15 hparams.n_levels = 2 hparams.init_batch_size = 256 hparams.batch_size = 1 hparams.data_dir = '' cifar_problem = problems.problem('image_cifar10_plain_random_shift') hparams.problem = cifar_problem model = glow.Glow(hparams, tf.estimator.ModeKeys.TRAIN) train_dataset = cifar_problem.dataset(MODES.TRAIN) one_shot = train_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} _, obj_dict = model.body(features) objective = obj_dict['training'] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Run initialization. init_op = tf.get_collection('glow_init_op') sess.run(init_op) # Run forward pass. obj_np = sess.run(objective) mean_obj = np.mean(obj_np) # Check that one forward-propagation does not NaN, i.e # initialization etc works as expected. self.assertTrue(mean_obj > 0 and mean_obj < 10.0)
def check_revnet_reversibility(self, op, name): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.depth = 2 x = tf.random_uniform(shape=(16, 32, 32, 4), seed=0) x_inv, _ = op(name, x, hparams, reverse=False) x_inv_inv, _ = op(name, x_inv, hparams, reverse=True) with tf.Session() as session: session.run(tf.global_variables_initializer()) diff = session.run(x - x_inv_inv) self.assertTrue(np.allclose(diff, 0.0, atol=1e-2))
def test_encoder_decoder_practical_usage(self): """Tests the following sequence of operations. 1. Define forward network with arg_scope(init=True). 2. Run one-forward pass to do data-dependent initialization and save. 3. Define forward and reverse network with arg_scope(init=False) 4. Check that reverse(forward(x)) == x """ hparams = glow.glow_hparams() hparams.n_levels = 2 hparams.depth = 12 with tf.Graph().as_default(): rng = np.random.RandomState(0) x_rand = np.asarray(rng.rand(1, 4, 4, 4), dtype=np.float32) x_t = tf.convert_to_tensor(x_rand) ops = [glow_ops.get_variable_ddi, glow_ops.actnorm] with arg_scope(ops, init=True): x_inv, _, _, _ = glow_ops.encoder_decoder("revnet", x_t, hparams, reverse=False) curr_dir = tempfile.mkdtemp() model_path = os.path.join(curr_dir, "model") with tf.Session() as session: saver = tf.train.Saver() session.run(tf.global_variables_initializer()) session.run(x_inv) saver.save(session, model_path) with tf.Graph().as_default(): rng = np.random.RandomState(0) x_rand = np.asarray(rng.rand(1, 4, 4, 4), dtype=np.float32) x_t = tf.convert_to_tensor(x_rand) ops = [glow_ops.get_variable_ddi, glow_ops.actnorm] with arg_scope(ops, init=False): x_inv2, _, all_eps, _ = glow_ops.encoder_decoder("revnet", x_t, hparams, reverse=False) x_inv_inv_, _ = glow_ops.encoder_decoder("revnet", x_inv2, hparams, eps=all_eps, reverse=True) with tf.Session() as session: saver = tf.train.Saver() saver.restore(session, model_path) x_inv_inv_np = session.run(x_inv_inv_) diff = np.abs(x_inv_inv_np - x_rand) self.assertTrue(np.allclose(diff, 0.0, atol=1e-3))
def test_revnet_reversibility(self, op, name, coupling): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.depth = 2 hparams.coupling = coupling x = tf.random_uniform(shape=(16, 32, 32, 4), seed=0) x_inv, _ = op(name, x, hparams, reverse=False) x_inv_inv, _ = op(name, x_inv, hparams, reverse=True) with tf.Session() as session: session.run(tf.global_variables_initializer()) diff = session.run(x - x_inv_inv) self.assertTrue(np.allclose(diff, 0.0, atol=1e-2))
def get_glow_hparams(self): hparams = glow.glow_hparams() hparams.add_hparam("num_cond_latents", 1) hparams.add_hparam("latent_architecture", "glow_resnet") # Use latent skip connections hparams.add_hparam("model_input", False) hparams.add_hparam("latent_skip", True) hparams.add_hparam("latent_encoder_depth", 2) hparams.add_hparam("latent_encoder_width", 256) hparams.add_hparam("latent_pre_output_channels", 256) hparams.add_hparam("latent_dist_encoder", "conv_net") hparams.add_hparam("latent_time_filter_size", 3) return hparams
def next_frame_glow_hparams(): """Hparams for next_frame_glow.""" hparams = glow.glow_hparams() # Possible modes are conditional and unconditional hparams.add_hparam("gen_mode", "conditional") hparams.add_hparam("learn_top_scale", False) hparams.add_hparam("condition_all_levels", True) # For each video, substitutes "num_input_frames + num_output_frames" with a # randomly sampled patch of length "num_train_frames" during training. # -1 indicates that the entire video is used for training. hparams.add_hparam("num_train_frames", -1) # The following are hparams that model the latent transitions. # Encoder that maps the latents to a Gaussian distribution. # This function is used to model the prior over z_{t}. Can be, # Pointwise -> point-wise multiplication of z_{t-1}. # conv_net -> one-layer convolution over z_{t-1} .. z_{t - num_cond_latents} # conv3d_net or conv_lstm hparams.add_hparam("latent_dist_encoder", "conv_net") # Number of latents used in the encoder above. hparams.add_hparam("num_cond_latents", 1) hparams.add_hparam("latent_architecture", "glow_resnet") hparams.add_hparam("latent_apply_dilations", False) hparams.add_hparam("latent_dilation_rates", [1, 3]) # Use latent skip connections hparams.add_hparam("model_input", False) hparams.add_hparam("cond_first_frame", False) hparams.add_hparam("latent_skip", True) hparams.add_hparam("latent_encoder_depth", 2) hparams.add_hparam("latent_encoder_width", 512) hparams.add_hparam("latent_dropout", 0.0) hparams.add_hparam("latent_pre_output_channels", 512) hparams.add_hparam("latent_activation", "relu") hparams.add_hparam("latent_noise", 0.0) # Pretrains the glow encoder for "pretrain_steps" number of steps. # By default, don't pretrain and learn end-to-end hparams.add_hparam("pretrain_steps", -1) hparams.bottom = { "inputs": modalities.video_raw_bottom, "targets": modalities.video_raw_targets_bottom, } hparams.loss = { "targets": modalities.video_l1_raw_loss, } hparams.top = { "targets": modalities.video_raw_top, } hparams.init_batch_size = 256 hparams.batch_size = 32 # Possible options: are prev_frame, single_conv and normal hparams.top_prior = "single_conv" return hparams
def test_glow_inference(self): hparams = glow.glow_hparams() hparams.depth = 15 hparams.n_levels = 2 hparams.data_dir = '' curr_dir = tempfile.mkdtemp() # Training pipeline with tf.Graph().as_default(): cifar_problem = problems.problem( 'image_cifar10_plain_random_shift') hparams.problem = cifar_problem model = glow.Glow(hparams, tf_estimator.ModeKeys.TRAIN) train_dataset = cifar_problem.dataset(MODES.TRAIN) one_shot = train_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} model_path = os.path.join(curr_dir, 'model') model(features) with tf.Session() as session: saver = tf.train.Saver() session.run(tf.global_variables_initializer()) init_op = tf.get_collection('glow_init_op') session.run(init_op) z = session.run([model.z]) mean_z = np.mean(z) is_undefined = np.isnan(mean_z) or np.isinf(mean_z) self.assertTrue(not is_undefined) saver.save(session, model_path) # Inference pipeline with tf.Graph().as_default(): cifar_problem = problems.problem( 'image_cifar10_plain_random_shift') hparams.problem = cifar_problem model = glow.Glow(hparams, tf_estimator.ModeKeys.PREDICT) test_dataset = cifar_problem.dataset(MODES.EVAL) one_shot = test_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} model_path = os.path.join(curr_dir, 'model') predictions = model.infer(features) with tf.Session() as session: saver = tf.train.Saver() saver.restore(session, model_path) predictions_np = session.run(predictions) self.assertTrue(np.all(predictions_np <= 255)) self.assertTrue(np.all(predictions_np >= 0))
def get_glow_hparams(self): hparams = glow.glow_hparams() hparams.add_hparam("num_cond_latents", 1) hparams.add_hparam("latent_architecture", "glow_resnet") # Use latent skip connections hparams.add_hparam("model_input", False) hparams.add_hparam("latent_apply_dilations", False) hparams.add_hparam("latent_skip", True) hparams.add_hparam("latent_encoder_depth", 2) hparams.add_hparam("latent_encoder_width", 256) hparams.add_hparam("latent_pre_output_channels", 256) hparams.add_hparam("latent_dist_encoder", "conv_net") hparams.add_hparam("latent_time_filter_size", 3) return hparams
def test_glow_inference(self): hparams = glow.glow_hparams() hparams.depth = 15 hparams.n_levels = 2 hparams.data_dir = '' curr_dir = tempfile.mkdtemp() # Training pipeline with tf.Graph().as_default(): cifar_problem = problems.problem('image_cifar10_plain_random_shift') hparams.problem = cifar_problem model = glow.Glow(hparams, tf.estimator.ModeKeys.TRAIN) train_dataset = cifar_problem.dataset(MODES.TRAIN) one_shot = train_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} model_path = os.path.join(curr_dir, 'model') model(features) with tf.Session() as session: saver = tf.train.Saver() session.run(tf.global_variables_initializer()) init_op = tf.get_collection('glow_init_op') session.run(init_op) z = session.run([model.z]) mean_z = np.mean(z) is_undefined = np.isnan(mean_z) or np.isinf(mean_z) self.assertTrue(not is_undefined) saver.save(session, model_path) # Inference pipeline with tf.Graph().as_default(): cifar_problem = problems.problem('image_cifar10_plain_random_shift') hparams.problem = cifar_problem model = glow.Glow(hparams, tf.estimator.ModeKeys.PREDICT) test_dataset = cifar_problem.dataset(MODES.EVAL) one_shot = test_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} model_path = os.path.join(curr_dir, 'model') predictions = model.infer(features) with tf.Session() as session: saver = tf.train.Saver() saver.restore(session, model_path) predictions_np = session.run(predictions) self.assertTrue(np.all(predictions_np <= 255)) self.assertTrue(np.all(predictions_np >= 0))
def test_encoder_decoder_practical_usage(self): """Tests the following sequence of operations. 1. Define forward network with arg_scope(init=True). 2. Run one-forward pass to do data-dependent initialization and save. 3. Define forward and reverse network with arg_scope(init=False) 4. Check that reverse(forward(x)) == x """ hparams = glow.glow_hparams() hparams.n_levels = 2 hparams.depth = 12 with tf.Graph().as_default(): rng = np.random.RandomState(0) x_rand = np.asarray(rng.rand(1, 4, 4, 4), dtype=np.float32) x_t = tf.convert_to_tensor(x_rand) ops = [glow_ops.get_variable_ddi, glow_ops.actnorm] with arg_scope(ops, init=True): x_inv, _, _, _, _ = glow_ops.encoder_decoder( "revnet", x_t, hparams, reverse=False) curr_dir = tempfile.mkdtemp() model_path = os.path.join(curr_dir, "model") with tf.Session() as session: saver = tf.train.Saver() session.run(tf.global_variables_initializer()) session.run(x_inv) saver.save(session, model_path) with tf.Graph().as_default(): rng = np.random.RandomState(0) x_rand = np.asarray(rng.rand(1, 4, 4, 4), dtype=np.float32) x_t = tf.convert_to_tensor(x_rand) ops = [glow_ops.get_variable_ddi, glow_ops.actnorm] with arg_scope(ops, init=False): x_inv2, _, all_eps, _, _ = glow_ops.encoder_decoder( "revnet", x_t, hparams, reverse=False) x_inv_inv_, _, _, _ = glow_ops.encoder_decoder( "revnet", x_inv2, hparams, eps=all_eps, reverse=True) with tf.Session() as session: saver = tf.train.Saver() saver.restore(session, model_path) x_inv_inv_np = session.run(x_inv_inv_) diff = np.abs(x_inv_inv_np - x_rand) self.assertTrue(np.allclose(diff, 0.0, atol=1e-3))
def get_glow_hparams(self): hparams = glow.glow_hparams() hparams.add_hparam("mode", tf.estimator.ModeKeys.TRAIN) hparams.add_hparam("num_cond_latents", 1) hparams.add_hparam("latent_architecture", "glow_resnet") # Use latent skip connections hparams.add_hparam("model_input", False) hparams.add_hparam("latent_apply_dilations", False) hparams.add_hparam("latent_skip", True) hparams.add_hparam("latent_encoder_depth", 2) hparams.add_hparam("latent_encoder_width", 256) hparams.add_hparam("latent_pre_output_channels", 256) hparams.add_hparam("latent_dist_encoder", "conv_net") hparams.add_hparam("latent_time_filter_size", 3) hparams.add_hparam("latent_activation", "relu") hparams.add_hparam("latent_dropout", 0.0) hparams.add_hparam("latent_noise", 0.0) return hparams
def test_glow(self): with tf.Graph().as_default(): hparams = glow.glow_hparams() model = glow.Glow(hparams, tf.estimator.ModeKeys.TRAIN) cifar_problem = problems.problem( 'image_cifar10_plain_random_shift') train_dataset = cifar_problem.dataset(MODES.TRAIN) one_shot = train_dataset.make_one_shot_iterator() x_batch, y_batch = self.batch(one_shot) features = {'inputs': x_batch, 'targets': y_batch} _, obj_dict = model.body(features) objective = obj_dict['training'] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) obj_np = sess.run(objective) mean_obj = np.mean(obj_np) # Check that one forward-propagation does not NaN, i.e # initialization etc works as expected. is_undefined = np.isnan(mean_obj) or np.isinf(mean_obj) self.assertTrue(not is_undefined)
def test_encoder_decoder(self): with tf.Graph().as_default(): hparams = glow.glow_hparams() hparams.n_levels = 2 hparams.depth = 2 x = tf.random_uniform(shape=(16, 64, 64, 4), seed=0) x_inv, _, eps = glow_ops.encoder_decoder("encoder_decoder", x, hparams, reverse=False) x_inv_inv, _ = glow_ops.encoder_decoder("encoder_decoder", x_inv, hparams, eps=eps, reverse=True) with tf.Session() as session: session.run(tf.global_variables_initializer()) diff, x_inv_np = session.run([x - x_inv_inv, x_inv]) self.assertTrue(x_inv_np.shape, (16, 8, 8, 64)) self.assertTrue(np.allclose(diff, 0.0, atol=1e-3))