def testDiscreteAutoregressiveFlowSample(self, loc_only): batch_size = 5 length = 2 vocab_size = 2 if loc_only: units = vocab_size network = reversible.MADE(units, []) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = reversible.MADE(units, []) network = lambda inputs: mask + network_(inputs) layer = reversible.DiscreteAutoregressiveFlow(network, 1.) logits = tf.tile( tf.random_normal([length, vocab_size])[tf.newaxis], [batch_size, 1, 1]) base = tfp.edward2.OneHotCategorical(logits=logits, dtype=tf.float32) outputs = layer(base) _ = outputs.value # need to do this to instantiate tf.variables self.evaluate(tf.global_variables_initializer()) res = self.evaluate(outputs) self.assertEqual(res.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(res, 0) self.assertAllLessEqual(res, vocab_size - 1)
def testDiscreteAutoregressiveFlowReverseGradients(self, loc_only): batch_size = 2 length = 4 vocab_size = 2 if loc_only: units = vocab_size network = reversible.MADE(units, [16, 16]) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = reversible.MADE(units, [16, 16]) network = lambda inputs: mask + network_(inputs) base = tfp.edward2.OneHotCategorical( logits=tf.random_normal([batch_size, length, vocab_size])) flow = reversible.DiscreteAutoregressiveFlow(network, 1.) flow_rv = flow(base) features = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) features = tf.one_hot(features, depth=vocab_size, dtype=tf.float32) loss = -tf.reduce_sum(flow_rv.distribution.log_prob(features)) grads = tf.gradients(loss, tf.trainable_variables()) self.evaluate(tf.global_variables_initializer()) _ = self.evaluate(grads) for grad in grads: self.assertIsNotNone(grad)
def testDiscreteAutoregressiveFlowInverse(self, loc_only): batch_size = 2 vocab_size = 79 length = 5 if loc_only: units = vocab_size network = reversible.MADE(units, []) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = reversible.MADE(units, []) network = lambda inputs: mask + network_(inputs) inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) inputs = tf.one_hot(inputs, depth=vocab_size, dtype=tf.float32) layer = reversible.DiscreteAutoregressiveFlow(network, 1.) rev_fwd_inputs = layer.reverse(layer(inputs)) fwd_rev_inputs = layer(layer.reverse(inputs)) self.evaluate(tf.global_variables_initializer()) inputs_val, rev_fwd_inputs_val, fwd_rev_inputs_val = self.evaluate( [inputs, rev_fwd_inputs, fwd_rev_inputs]) self.assertAllClose(inputs_val, rev_fwd_inputs_val) self.assertAllClose(inputs_val, fwd_rev_inputs_val)
def testMADERightToLeft(self): np.random.seed(1328) batch_size = 2 length = 3 channels = 5 units = 1 network = reversible.MADE(units, [4, 3], input_order='right-to-left', activation=tf.nn.relu, use_bias=False) inputs = tf.zeros([batch_size, length, channels]) outputs = network(inputs) num_weights = sum( [np.prod(weight.shape) for weight in network.weights]) # Disable lint error for open-source. pylint: disable=g-generic-assert self.assertEqual(len(network.weights), 3) # pylint: enable=g-generic-assert self.assertEqual(num_weights, 3 * 5 * 4 + 4 * 3 + 3 * 3 * 1) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertAllEqual(outputs_val[:, -1, :], np.zeros( (batch_size, units))) self.assertEqual(outputs_val.shape, (batch_size, length, units))
def testDiscreteAutoregressiveFlowRandomVariable(self, loc_only): batch_size = 2 length = 4 vocab_size = 5 if loc_only: units = vocab_size else: units = 2 * vocab_size base = tfp.edward2.OneHotCategorical(logits=tf.random_normal([batch_size, length, vocab_size]), dtype=tf.float32) flow = reversible.DiscreteAutoregressiveFlow( reversible.MADE(units, [16, 16]), 1.) flow_rv = flow(base) self.assertEqual(flow_rv.dtype, tf.float32) self.evaluate(tf.global_variables_initializer()) res = self.evaluate(flow_rv) self.assertEqual(res.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(res, 0) self.assertAllLessEqual(res, vocab_size - 1) inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) inputs = tf.one_hot(inputs, depth=vocab_size, dtype=tf.float32) outputs = flow(inputs) rev_outputs = flow.reverse(outputs) inputs_val, rev_outputs_val = self.evaluate([inputs, rev_outputs]) self.assertAllClose(inputs_val, rev_outputs_val) inputs_log_prob = base.distribution.log_prob(inputs) outputs_log_prob = flow_rv.distribution.log_prob(outputs) res1, res2 = self.evaluate([inputs_log_prob, outputs_log_prob]) self.assertEqual(res1.shape, (batch_size, length)) self.assertAllClose(res1, res2)
def testDiscreteAutoregressiveFlowCall(self, loc_only): batch_size = 3 vocab_size = 79 length = 5 if loc_only: units = vocab_size network = reversible.MADE(units, []) else: units = 2 * vocab_size mask = tf.reshape([0] * vocab_size + [-1e10] + [0] * (vocab_size - 1), [1, 1, 2 * vocab_size]) network_ = reversible.MADE(units, []) network = lambda inputs: mask + network_(inputs) inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) inputs = tf.one_hot(inputs, depth=vocab_size, dtype=tf.float32) layer = reversible.DiscreteAutoregressiveFlow(network, 1.) outputs = layer(inputs) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertEqual(outputs_val.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(outputs_val, 0) self.assertAllLessEqual(outputs_val, vocab_size - 1)
def testSinkhornAutoregressiveFlowCall(self): batch_size = 3 vocab_size = 79 length = 5 units = vocab_size ** 2 inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) inputs = tf.one_hot(inputs, depth=vocab_size, dtype=tf.float32) layer = reversible.SinkhornAutoregressiveFlow( reversible.MADE(units, []), 1.) outputs = layer(inputs) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertEqual(outputs_val.shape, (batch_size, length, vocab_size)) self.assertAllGreaterEqual(outputs_val, 0) self.assertAllLessEqual(outputs_val, vocab_size - 1)
def testDiscreteSinkhornFlowInverse(self): batch_size = 2 vocab_size = 79 length = 5 units = vocab_size ** 2 inputs = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) inputs = tf.one_hot(inputs, depth=vocab_size, dtype=tf.float32) layer = reversible.SinkhornAutoregressiveFlow( reversible.MADE(units, []), 1.) rev_fwd_inputs = layer.reverse(layer(inputs)) fwd_rev_inputs = layer(layer.reverse(inputs)) self.evaluate(tf.global_variables_initializer()) inputs_val, rev_fwd_inputs_val, fwd_rev_inputs_val = self.evaluate( [inputs, rev_fwd_inputs, fwd_rev_inputs]) self.assertAllEqual(inputs_val, rev_fwd_inputs_val) self.assertAllEqual(inputs_val, fwd_rev_inputs_val)
def testMADENoHidden(self): np.random.seed(532) batch_size = 2 length = 3 network = reversible.MADE([], input_order='left-to-right') inputs = tf.zeros([batch_size, length]) outputs = network(inputs) num_weights = sum([np.prod(weight.shape) for weight in network.weights]) self.assertLen(network.weights, 2) self.assertEqual(num_weights, 3*3*2 + 3*2) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertAllEqual(outputs_val[:, 0], tf.zeros(batch_size)) self.assertEqual(outputs_val.shape, (batch_size, 2 * length))
def testMADELeftToRight(self): np.random.seed(83243) batch_size = 2 length = 3 network = reversible.MADE([4], activation=tf.nn.relu) inputs = tf.zeros([batch_size, length]) outputs = network(inputs) num_weights = sum([np.prod(weight.shape) for weight in network.weights]) self.assertLen(network.weights, 4) self.assertEqual(num_weights, (3*4 + 4) + (4*3*2 + 3*2)) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertAllEqual(outputs_val[:, 0], tf.zeros(batch_size)) self.assertEqual(outputs_val.shape, (batch_size, 2 * length))
def testMADERightToLeft(self): np.random.seed(1328) batch_size = 2 length = 3 network = reversible.MADE([4, 3], input_order='right-to-left', activation=tf.nn.relu, use_bias=False) inputs = tf.zeros([batch_size, length]) outputs = network(inputs) num_weights = sum([np.prod(weight.shape) for weight in network.weights]) self.assertLen(network.weights, 3) self.assertEqual(num_weights, 3*4 + 4*3 + 3*3*2) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertAllEqual(outputs_val[:, -1], tf.zeros(batch_size)) self.assertEqual(outputs_val.shape, (batch_size, 2 * length))
def testMADELeftToRight(self): np.random.seed(83243) batch_size = 2 length = 3 channels = 1 units = 5 network = reversible.MADE(units, [4], activation=tf.nn.relu) inputs = tf.zeros([batch_size, length, channels]) outputs = network(inputs) num_weights = sum( [np.prod(weight.shape) for weight in network.weights]) self.assertEqual(len(network.weights), 4) self.assertEqual(num_weights, (3 * 1 * 4 + 4) + (4 * 3 * 5 + 3 * 5)) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertAllEqual(outputs_val[:, 0, :], np.zeros( (batch_size, units))) self.assertEqual(outputs_val.shape, (batch_size, length, units))
def testMADENoHidden(self): np.random.seed(532) batch_size = 2 length = 3 channels = 5 units = 4 network = reversible.MADE(units, [], input_order='left-to-right') inputs = tf.zeros([batch_size, length, channels]) outputs = network(inputs) num_weights = sum( [np.prod(weight.shape) for weight in network.weights]) self.assertEqual(len(network.weights), 2) self.assertEqual(num_weights, 3 * 5 * 3 * 4 + 3 * 4) self.evaluate(tf.global_variables_initializer()) outputs_val = self.evaluate(outputs) self.assertAllEqual(outputs_val[:, 0, :], np.zeros( (batch_size, units))) self.assertEqual(outputs_val.shape, (batch_size, length, units))
def testDiscreteAutoregressiveFlowReverseGradients(self, loc_only): batch_size = 2 length = 4 vocab_size = 2 if loc_only: units = vocab_size else: units = 2 * vocab_size base = tfp.edward2.OneHotCategorical( logits=tf.random_normal([batch_size, length, vocab_size])) flow = reversible.DiscreteAutoregressiveFlow( reversible.MADE(units, [16, 16]), 1.) flow_rv = flow(base) features = np.random.randint(0, vocab_size - 1, size=(batch_size, length)) features = tf.one_hot(features, depth=vocab_size, dtype=tf.float32) loss = -tf.reduce_sum(flow_rv.distribution.log_prob(features)) grads = tf.gradients(loss, flow.layer.weights) self.evaluate(tf.global_variables_initializer()) _ = self.evaluate(grads) for grad in grads: self.assertIsNotNone(grad)