def create_loss(): """Creates the loss to be optimized. Returns: bound: A float Tensor containing the value of the bound that is being optimized. loss: A float Tensor that when differentiated yields the gradients to apply to the model. Should be optimized via gradient descent. """ inputs, targets, lengths, model, _ = create_dataset_and_model_fn( config, split="train", shuffle=True, repeat=True) # Compute lower bounds on the log likelihood if config.bound == "elbo": ll_per_seq, _, _ = bounds.iwae( model, (inputs, targets), lengths, num_samples=1, parallel_iterations=config.parallel_iterations) elif config.bound == "iwae": ll_per_seq, _, _ = bounds.iwae( model, (inputs, targets), lengths, num_samples=config.num_samples, parallel_iterations=config.parallel_iterations) elif config.bound in ("fivo", "fivo-aux"): if config.resampling_type == "relaxed": ll_per_seq, _, _, _ = bounds.fivo( model, (inputs, targets), lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, resampling_type=config.resampling_type, random_seed=config.random_seed, relaxed_resampling_temperature=config. relaxed_resampling_temperature, parallel_iterations=config.parallel_iterations) else: ll_per_seq, _, _, _ = bounds.fivo( model, (inputs, targets), lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, resampling_type=config.resampling_type, random_seed=config.random_seed, parallel_iterations=config.parallel_iterations) # Compute loss scaled by number of timesteps ll_per_t = tf.reduce_mean(ll_per_seq / tf.to_float(lengths)) ll_per_seq = tf.reduce_mean(ll_per_seq) tf.summary.scalar("train_ll_per_seq", ll_per_seq) tf.summary.scalar("train_ll_per_t", ll_per_t) if config.normalize_by_seq_len: return ll_per_t, -ll_per_t else: return ll_per_seq, -ll_per_seq
def create_losses(model, observations, lengths): """Creates the loss to be optimized. Args: model: A Trainable GHMM model. observations: A set of observations. lengths: The lengths of each sequence in the observations. Returns: loss: A float Tensor that when differentiated yields the gradients to apply to the model. Should be optimized via gradient descent. bound: A float Tensor containing the value of the bound that is being optimized. true_ll: The true log-likelihood of the data under the model. bound_gap: The gap between the bound and the true log-likelihood. """ # Compute lower bounds on the log likelihood. if config.bound == "elbo": ll_per_seq, _, _ = bounds.iwae( model, observations, lengths, num_samples=1, parallel_iterations=config.parallel_iterations ) elif config.bound == "iwae": ll_per_seq, _, _ = bounds.iwae( model, observations, lengths, num_samples=config.num_samples, parallel_iterations=config.parallel_iterations ) elif config.bound == "fivo": if config.resampling_type == "relaxed": ll_per_seq, _, _, _ = bounds.fivo( model, observations, lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, resampling_type=config.resampling_type, relaxed_resampling_temperature=config. relaxed_resampling_temperature, random_seed=config.random_seed, parallel_iterations=config.parallel_iterations) else: ll_per_seq, _, _, _ = bounds.fivo( model, observations, lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, resampling_type=config.resampling_type, random_seed=config.random_seed, parallel_iterations=config.parallel_iterations ) ll_per_t = tf.reduce_mean(ll_per_seq / tf.to_float(lengths)) # Compute the data's true likelihood under the model and the bound gap. true_ll_per_seq = model.likelihood(tf.squeeze(observations)) true_ll_per_t = tf.reduce_mean(true_ll_per_seq / tf.to_float(lengths)) bound_gap = true_ll_per_seq - ll_per_seq bound_gap = tf.reduce_mean(bound_gap/ tf.to_float(lengths)) tf.summary.scalar("train_ll_bound", ll_per_t) tf.summary.scalar("train_true_ll", true_ll_per_t) tf.summary.scalar("bound_gap", bound_gap) return -ll_per_t, ll_per_t, true_ll_per_t, bound_gap
def test_fivo(self): """A golden-value test for the FIVO bound.""" tf.set_random_seed(1234) with self.test_session() as sess: model, inputs, targets, lengths = create_vrnn(random_seed=1234) outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, random_seed=1234, parallel_iterations=1) sess.run(tf.global_variables_initializer()) log_p_hat, weights, resampled, _ = sess.run(outs) self.assertAllClose([-22.98902512, -14.21689224], log_p_hat) weights_gt = np.array( [[[-3.66708851, -2.07074022, -4.91751671, -5.03293562], [-2.99690723, -3.17782736, -4.50084877, -3.48536515]], [[-2.67100811, -2.30541706, -2.34178066, -2.81751347], [-8.27518654, -6.71545124, -8.96198845, -7.05567837]], [[-5.65190411, -5.94563246, -6.55041981, -5.4783473], [-12.34527206, -11.54284477, -11.8667469, -9.69417381]], [[-8.71947861, -8.40143299, -8.54593086, -8.42822266], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], [[-12.7003831, -13.5039815, -12.3569726, -12.9489622], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], [[-16.4520301, -16.3611698, -15.0314846, -16.4197006], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], [[-20.7010765, -20.1379165, -19.0020351, -20.2395458], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]]]) self.assertAllClose(weights_gt, weights) resampled_gt = np.array([[1., 0.], [0., 0.], [0., 1.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]) self.assertAllClose(resampled_gt, resampled)
def test_fivo_aux_relaxed(self): """A golden-value test for the FIVO-AUX bound with relaxed sampling.""" tf.set_random_seed(1234) with self.test_session() as sess: model, inputs, targets, lengths = create_vrnn(random_seed=1234, use_tilt=True) outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, random_seed=1234, parallel_iterations=1, resampling_type="relaxed") sess.run(tf.global_variables_initializer()) log_p_hat, weights, resampled, _ = sess.run(outs) self.assertAllClose([-23.1395, -14.271059], log_p_hat) weights_gt = np.array( [[[-5.19826221, -3.55476403, -5.98663855, -6.08058834], [-6.31685925, -5.70243931, -7.07638931, -6.18138981]], [[-3.97986865, -3.58831525, -3.85753584, -3.5010016], [-11.38203049, -8.66213989, -11.23646641, -10.02024746]], [[-6.62269831, -6.36680222, -6.78096485, -5.80072498], [-3.55419445, -8.11326408, -3.48766923, -3.08593249]], [[-10.56472301, -10.16084099, -9.96741676, -8.5270071], [-6.04880285, -7.80853653, -4.72652149, -3.49711013]], [[-13.36585426, -16.08720398, -13.33416367, -13.1017189], [-0., -0., -0., -0.]], [[-17.54233551, -17.35167503, -16.79163361, -16.51471138], [0., -0., -0., -0.]], [[-19.74024963, -18.69452858, -17.76246452, -18.76182365], [0., -0., -0., -0.]]]) self.assertAllClose(weights_gt, weights) resampled_gt = np.array([[1., 0.], [0., 1.], [0., 0.], [0., 1.], [0., 0.], [0., 0.], [0., 0.]]) self.assertAllClose(resampled_gt, resampled)
def test_fivo_relaxed(self): """A golden-value test for the FIVO bound with relaxed sampling.""" tf.set_random_seed(1234) with self.test_session() as sess: model, inputs, targets, lengths = create_vrnn(random_seed=1234) outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, random_seed=1234, parallel_iterations=1, resampling_type="relaxed") sess.run(tf.global_variables_initializer()) log_p_hat, weights, resampled, _ = sess.run(outs) self.assertAllClose([-22.942394, -14.273882], log_p_hat) weights_gt = np.array( [[[-3.66708851, -2.07074118, -4.91751575, -5.03293514], [-2.99690628, -3.17782831, -4.50084877, -3.48536515]], [[-2.84939098, -2.30087185, -2.35649204, -2.48417377], [-8.27518654, -6.71545172, -8.96199131, -7.05567837]], [[-5.92327023, -5.9433074, -6.5826683, -5.04259014], [-12.34527206, -11.54284668, -11.86675072, -9.69417477]], [[-8.95323944, -8.40061855, -8.52760506, -7.99130583], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], [[-12.87836456, -13.49628639, -12.31680107, -12.74228859], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], [[-16.78347397, -16.35150909, -14.98797417, -16.35162735], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], [[-20.81165886, -20.1307621, -18.92229652, -20.17458153], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]]]) self.assertAllClose(weights_gt, weights) resampled_gt = np.array([[1., 0.], [0., 0.], [0., 1.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]) self.assertAllClose(resampled_gt, resampled)
def create_bound(model, xs, lengths): """Creates the bound to be evaluated.""" if config.bound == "elbo": ll_per_seq, log_weights, _ = bounds.iwae( model, xs, lengths, num_samples=1, parallel_iterations=config.parallel_iterations) elif config.bound == "iwae": ll_per_seq, log_weights, _ = bounds.iwae( model, xs, lengths, num_samples=config.num_samples, parallel_iterations=config.parallel_iterations) elif config.bound == "fivo": ll_per_seq, log_weights, resampled, _ = bounds.fivo( model, xs, lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, resampling_type=config.resampling_type, random_seed=config.random_seed, parallel_iterations=config.parallel_iterations) # Compute bound scaled by number of timesteps. bound_per_t = ll_per_seq / tf.to_float(lengths) if config.bound == "fivo": return bound_per_t, log_weights, resampled else: return bound_per_t, log_weights
def test_fivo(self): """A golden-value test for the FIVO bound.""" tf.set_random_seed(1234) with self.test_session() as sess: model, inputs, targets, lengths = create_vrnn(random_seed=1234) outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, random_seed=1234, parallel_iterations=1) sess.run(tf.global_variables_initializer()) log_p_hat, weights, resampled, _ = sess.run(outs) self.assertAllClose([-22.98902512, -14.21689224], log_p_hat) weights_gt = np.array( [[[-3.66708851, -2.07074022, -4.91751671, -5.03293562], [-2.99690723, -3.17782736, -4.50084877, -3.48536515]], [[-2.67100811, -2.30541706, -2.34178066, -2.81751347], [-8.27518654, -6.71545124, -8.96198845, -7.05567837]], [[-5.65190411, -5.94563246, -6.55041981, -5.4783473], [-12.34527206, -11.54284477, -11.8667469, -9.69417381]], [[-8.71947861, -8.40143299, -8.54593086, -8.42822266], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], [[-12.7003831, -13.5039815, -12.3569726, -12.9489622], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], [[-16.4520301, -16.3611698, -15.0314846, -16.4197006], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]], [[-20.7010765, -20.1379165, -19.0020351, -20.2395458], [-4.28782988, -4.50591278, -3.40847206, -2.63650274]]]) self.assertAllClose(weights_gt, weights) resampled_gt = np.array( [[1., 0.], [0., 0.], [0., 1.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]) self.assertAllClose(resampled_gt, resampled)
def create_bound(model, xs, lengths): """Creates the bound to be evaluated.""" if config.bound == "elbo": ll_per_seq, log_weights, _ = bounds.iwae( model, xs, lengths, num_samples=1, parallel_iterations=config.parallel_iterations ) elif config.bound == "iwae": ll_per_seq, log_weights, _ = bounds.iwae( model, xs, lengths, num_samples=config.num_samples, parallel_iterations=config.parallel_iterations ) elif config.bound == "fivo": ll_per_seq, log_weights, resampled, _ = bounds.fivo( model, xs, lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, resampling_type=config.resampling_type, random_seed=config.random_seed, parallel_iterations=config.parallel_iterations ) # Compute bound scaled by number of timesteps. bound_per_t = ll_per_seq / tf.to_float(lengths) if config.bound == "fivo": return bound_per_t, log_weights, resampled else: return bound_per_t, log_weights
def create_graph(): """Creates the evaluation graph. Returns: lower_bounds: A tuple of float Tensors containing the values of the 3 evidence lower bounds, summed across the batch. total_batch_length: The total number of timesteps in the batch, summed across batch examples. batch_size: The batch size. global_step: The global step the checkpoint was loaded from. """ global_step = tf.train.get_or_create_global_step() inputs, targets, lengths, model, _ = create_dataset_and_model_fn( config, split=config.split, shuffle=False, repeat=False) # Compute lower bounds on the log likelihood. elbo_ll_per_seq, _, _ = bounds.iwae( model, (inputs, targets), lengths, num_samples=1, parallel_iterations=config.parallel_iterations) iwae_ll_per_seq, _, _ = bounds.iwae( model, (inputs, targets), lengths, num_samples=config.num_samples, parallel_iterations=config.parallel_iterations) # The resampling type should only be used for training, so we ignore it. fivo_ll_per_seq, _, _, _ = bounds.fivo( model, (inputs, targets), lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, random_seed=config.random_seed, parallel_iterations=config.parallel_iterations) elbo_ll = tf.reduce_sum(elbo_ll_per_seq) iwae_ll = tf.reduce_sum(iwae_ll_per_seq) fivo_ll = tf.reduce_sum(fivo_ll_per_seq) batch_size = tf.shape(lengths)[0] total_batch_length = tf.reduce_sum(lengths) return ((elbo_ll, iwae_ll, fivo_ll), total_batch_length, batch_size, global_step)
def create_graph(): """Creates the graph to sample from the model. First, the model is conditioned on a prefix by sampling a batch of data and trimming it to prefix_length. The configured bound is used to do the conditioning. Then the final state from the conditioning is used to sample from the model. Returns: samples: A Tensor of shape [sample_length, batch_size, num_samples, data_dimension] representing samples from the model. prefixes: A Tensor of shape [prefix_length, batch_size, data_dimension] representing the prefixes the model was conditioned on. """ inputs, targets, lengths, model, mean = create_dataset_and_model_fn( config, split=config.split, shuffle=True, repeat=True) input_prefixes = inputs[:config.prefix_length] target_prefixes = targets[:config.prefix_length] prefix_lengths = tf.ones_like(lengths) * config.prefix_length if config.bound == "elbo": _, _, state = bounds.iwae(model, (input_prefixes, target_prefixes), prefix_lengths, num_samples=1) elif config.bound == "iwae": _, _, state = bounds.iwae(model, (input_prefixes, target_prefixes), prefix_lengths, num_samples=config.num_samples) elif config.bound == "fivo": _, _, _, state = bounds.fivo( model, (input_prefixes, target_prefixes), prefix_lengths, num_samples=config.num_samples, resampling_criterion=smc.ess_criterion, random_seed=config.random_seed) sample_inputs = tf.tile(inputs[config.prefix_length], [config.num_samples, 1]) samples = sample_from_model(model, state, sample_inputs, mean) return samples, target_prefixes
def test_fivo_relaxed(self): """A golden-value test for the FIVO bound with relaxed sampling.""" tf.set_random_seed(1234) with self.test_session() as sess: model, inputs, targets, lengths = create_vrnn(random_seed=1234) outs = bounds.fivo(model, (inputs, targets), lengths, num_samples=4, random_seed=1234, parallel_iterations=1, resampling_type="relaxed") sess.run(tf.global_variables_initializer()) log_p_hat, weights, resampled, _ = sess.run(outs) self.assertAllClose([-22.942394, -14.273882], log_p_hat) weights_gt = np.array( [[[-3.66708851, -2.07074118, -4.91751575, -5.03293514], [-2.99690628, -3.17782831, -4.50084877, -3.48536515]], [[-2.84939098, -2.30087185, -2.35649204, -2.48417377], [-8.27518654, -6.71545172, -8.96199131, -7.05567837]], [[-5.92327023, -5.9433074, -6.5826683, -5.04259014], [-12.34527206, -11.54284668, -11.86675072, -9.69417477]], [[-8.95323944, -8.40061855, -8.52760506, -7.99130583], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], [[-12.87836456, -13.49628639, -12.31680107, -12.74228859], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], [[-16.78347397, -16.35150909, -14.98797417, -16.35162735], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]], [[-20.81165886, -20.1307621, -18.92229652, -20.17458153], [-4.58102798, -4.56017351, -3.46283388, -2.65550804]]]) self.assertAllClose(weights_gt, weights) resampled_gt = np.array( [[1., 0.], [0., 0.], [0., 1.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]) self.assertAllClose(resampled_gt, resampled)