def testVqaAttentionBaseline(self): batch_size = 3 image_size = 448 vocab_size = 100 num_classes = 10 question_length = 5 answer_length = 10 x = 2 * np.random.rand(batch_size, image_size, image_size, 3) - 1 q = np.random.randint(1, high=vocab_size, size=(batch_size, question_length, 1, 1)) a = np.random.randint(num_classes + 1, size=(batch_size, answer_length, 1, 1)) hparams = vqa_attention.vqa_attention_base() p_hparams = problem_hparams.test_problem_hparams( vocab_size, num_classes + 1, hparams) p_hparams.modality["inputs"] = modalities.ModalityType.IMAGE p_hparams.modality["targets"] = modalities.ModalityType.MULTI_LABEL p_hparams.modality["question"] = modalities.ModalityType.SYMBOL p_hparams.vocab_size["question"] = vocab_size with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.float32), "question": tf.constant(q, dtype=tf.int32), "targets": tf.constant(a, dtype=tf.int32), } model = vqa_attention.VqaAttentionBaseline( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, losses = model(features) session.run(tf.global_variables_initializer()) logits_, losses_ = session.run([logits, losses]) self.assertEqual(logits_.shape, (batch_size, 1, 1, 1, num_classes + 1)) self.assertEqual(losses_["training"].shape, ())
def testVqaAttentionBaseline(self): batch_size = 3 image_size = 448 vocab_size = 100 num_classes = 10 question_length = 5 answer_length = 10 x = 2 * np.random.rand(batch_size, image_size, image_size, 3) - 1 q = np.random.random_integers( 1, high=vocab_size - 1, size=(batch_size, question_length, 1, 1)) a = np.random.random_integers( 0, high=num_classes, size=(batch_size, answer_length, 1, 1)) hparams = vqa_attention.vqa_attention_base() p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size) p_hparams.input_modality["inputs"] = (registry.Modalities.IMAGE, None) p_hparams.input_modality["question"] = (registry.Modalities.SYMBOL, vocab_size) p_hparams.target_modality = (registry.Modalities.CLASS_LABEL + ":multi_label", num_classes + 1) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.float32), "question": tf.constant(q, dtype=tf.int32), "targets": tf.constant(a, dtype=tf.int32), } model = vqa_attention.VqaAttentionBaseline( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, losses = model(features) session.run(tf.global_variables_initializer()) logits_, losses_ = session.run([logits, losses]) self.assertEqual(logits_.shape, (batch_size, 1, 1, 1, num_classes + 1)) self.assertEqual(losses_["training"].shape, ())