def testNeuralGPU(self): hparams = common_hparams.basic_params1() batch_size = 3 input_length = 5 target_length = input_length input_vocab_size = 9 target_vocab_size = 11 p_hparams = problem_hparams.test_problem_hparams( input_vocab_size, target_vocab_size) inputs = -1 + np.random.random_integers( input_vocab_size, size=(batch_size, input_length, 1, 1)) targets = -1 + np.random.random_integers( target_vocab_size, size=(batch_size, target_length, 1, 1)) with self.test_session() as session: features = { "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32) } model = neural_gpu.NeuralGPU(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) shadred_logits, _ = model.model_fn(features) logits = tf.concat(shadred_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (batch_size, target_length, 1, 1, target_vocab_size))
def testLSTMSeq2SeqAttention(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = lstm.lstm_attention() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size) x = tf.constant(x, dtype=tf.int32) x._shape = tf.TensorShape([None, None, 1, 1]) with self.test_session() as session: features = { "inputs": x, "targets": tf.constant(y, dtype=tf.int32), } model = lstm.LSTMSeq2seqAttention(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def testTransformer(self): batch_size = 3 input_length = 5 target_length = 7 vocab_size = 9 hparams = transformer_revnet_test() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) hparams.problems = [p_hparams] inputs = -1 + np.random.random_integers( vocab_size, size=(batch_size, input_length, 1, 1)) targets = -1 + np.random.random_integers( vocab_size, size=(batch_size, target_length, 1, 1)) features = { "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } model = transformer_revnet.TransformerRevnet( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) grads = tf.gradients( tf.reduce_mean(logits), [features["inputs"]] + tf.global_variables()) grads = [g for g in grads if g is not None] with self.test_session() as session: session.run(tf.global_variables_initializer()) logits_val, _ = session.run([logits, grads]) self.assertEqual(logits_val.shape, (batch_size, target_length, 1, 1, vocab_size))
def testByteNet(self): vocab_size = 9 x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1)) y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1)) hparams = bytenet.bytenet_base() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = bytenet.ByteNet( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) sharded_logits, _ = model.model_fn(features) logits = tf.concat(sharded_logits, 0) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (3, 50, 1, 1, vocab_size))
def getModel(self, hparams, mode=tf.estimator.ModeKeys.TRAIN): hparams.hidden_size = 8 hparams.filter_size = 32 hparams.num_heads = 1 hparams.layer_prepostprocess_dropout = 0.0 p_hparams = problem_hparams.test_problem_hparams( VOCAB_SIZE, VOCAB_SIZE) hparams.problems = [p_hparams] inputs = -1 + np.random.random_integers( VOCAB_SIZE, size=(BATCH_SIZE, INPUT_LENGTH, 1, 1)) targets = -1 + np.random.random_integers( VOCAB_SIZE, size=(BATCH_SIZE, TARGET_LENGTH, 1, 1)) features = { "inputs": tf.constant(inputs, dtype=tf.int32), "targets": tf.constant(targets, dtype=tf.int32), "target_space_id": tf.constant(1, dtype=tf.int32), } return transformer.Transformer(hparams, tf.estimator.ModeKeys.PREDICT, p_hparams), features