def testDiscreteAutoregressiveFlowReverseGradients(self, loc_only): batch_size = 2 length = 4 vocab_size = 2 if loc_only: units = vocab_size network_ = ed.layers.MADE(units, [16, 16]) network = network_ else: units = 2 * vocab_size network_ = ed.layers.MADE(units, [16, 16]) mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network = lambda inputs, **kwargs: mask + network_( inputs, **kwargs) with tf.GradientTape() as tape: base = ed.OneHotCategorical( logits=tf.random.normal([batch_size, length, vocab_size])) flow = ed.layers.DiscreteAutoregressiveFlow(network, 1.) flow_rv = flow(base) features = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) features = tf.one_hot(features, depth=vocab_size, dtype=tf.float32) loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits( labels=flow.reverse(features), logits=flow_rv.distribution.base.logits)) grads = tape.gradient(loss, network_.weights) for grad in grads: self.assertIsNotNone(grad)
def testDiscreteAutoregressiveFlowSample(self, loc_only): batch_size = 5 length = 2 vocab_size = 2 if loc_only: units = vocab_size network = ed.layers.MADE(units, []) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = ed.layers.MADE(units, []) network = lambda inputs, **kwargs: mask + network_( inputs, **kwargs) layer = ed.layers.DiscreteAutoregressiveFlow(network, 1.) logits = tf.tile( tf.random.normal([length, vocab_size])[tf.newaxis], [batch_size, 1, 1]) base = ed.OneHotCategorical(logits=logits, dtype=tf.float32) outputs = layer(base) _ = outputs.value # need to do this to instantiate tf.variables self.evaluate(tf1.global_variables_initializer()) res = self.evaluate(outputs) self.assertEqual(res.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(res, 0) self.assertAllLessEqual(res, vocab_size - 1)
def testDiscreteAutoregressiveFlowRandomVariable(self, loc_only): batch_size = 2 length = 4 vocab_size = 5 if loc_only: units = vocab_size network = ed.layers.MADE(units, []) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = ed.layers.MADE(units, []) network = lambda inputs, **kwargs: mask + network_(inputs, **kwargs) base = ed.OneHotCategorical(logits=tf.random.normal([batch_size, length, vocab_size]), dtype=tf.float32) flow = ed.layers.DiscreteAutoregressiveFlow(network, 1.) flow_rv = flow(base) self.assertEqual(flow_rv.dtype, tf.float32) self.assertEqual(flow_rv.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(tf.convert_to_tensor(flow_rv), 0) self.assertAllLessEqual(tf.convert_to_tensor(flow_rv), vocab_size - 1) inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) inputs = tf.one_hot(inputs, depth=vocab_size, dtype=tf.float32) outputs = flow(inputs) rev_outputs = flow.reverse(outputs) self.assertAllClose(inputs, rev_outputs) inputs_log_prob = base.distribution.log_prob(inputs) outputs_log_prob = flow_rv.distribution.log_prob(outputs) self.assertEqual(inputs_log_prob.shape, (batch_size, length)) self.assertAllClose(inputs_log_prob, outputs_log_prob)
def testDiscreteAutoregressiveFlowSample(self, loc_only): batch_size = 5 length = 2 vocab_size = 2 if loc_only: units = vocab_size network = ed.layers.MADE(units, []) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = ed.layers.MADE(units, []) network = lambda inputs, **kwargs: mask + network_(inputs, **kwargs) layer = ed.layers.DiscreteAutoregressiveFlow(network, 1.) logits = tf.tile(tf.random.normal([length, vocab_size])[tf.newaxis], [batch_size, 1, 1]) base = ed.OneHotCategorical(logits=logits, dtype=tf.float32) outputs = layer(base) self.assertEqual(outputs.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(tf.convert_to_tensor(outputs), 0) self.assertAllLessEqual(tf.convert_to_tensor(outputs), vocab_size - 1)