def _test_resnet(self, img_size, output_size): vocab_size = 9 batch_size = 2 x = np.random.random_integers(0, high=255, size=(batch_size, img_size, img_size, 3)) y = np.random.random_integers(1, high=vocab_size - 1, size=(batch_size, 1, 1, 1)) hparams = resnet_tiny_cpu() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size, hparams) p_hparams.input_modality["inputs"] = modalities.ImageModality(hparams) p_hparams.target_modality = modalities.ClassLabelModality( hparams, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = resnet.Resnet(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (batch_size, ) + output_size + (1, vocab_size))
def _test_resnet(self, img_size, output_size): vocab_size = 1 batch_size = 1 x = np.random.random_integers(0, high=255, size=(batch_size, img_size, img_size, 3)) y = np.random.random_integers(1, high=vocab_size, size=(batch_size, 1, 1, 1)) #hparams = resnet_tiny_cpu() #hparams = resnet_50() hparams = resnet_32() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size, hparams) p_hparams.input_modality["inputs"] = modalities.ImageModality(hparams) p_hparams.target_modality = modalities.ClassLabelModality( hparams, vocab_size) run_meta = tf.RunMetadata() with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } #model = resnet.Resnet(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) model = shake_shake.ShakeShake(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) print(logits.get_shape()) #opts = tf.profiler.ProfileOptionBuilder.float_operation() #flops = tf.profiler.profile(tf.get_default_graph(), run_meta=run_meta, options=opts) #print(flops.total_float_ops) session.run(tf.global_variables_initializer()) #res = session.run(logits) tf.get_variable_scope().set_initializer( optimize.get_variable_initializer(hparams)) loss = tf.losses.sparse_softmax_cross_entropy(labels=tf.constant( 0, dtype=tf.int32, shape=[1, 1, 1, 1, 1]), logits=logits) train_op = optimize.optimize(loss, 0.1, hparams) session.run(loss) opts = tf.profiler.ProfileOptionBuilder.float_operation() flops = tf.profiler.profile(tf.get_default_graph(), run_meta=run_meta, options=opts) print(flops.total_float_ops)
def _test_xception(self, img_size): vocab_size = 9 batch_size = 3 x = np.random.randint(256, size=(batch_size, img_size, img_size, 3)) y = np.random.randint(1, high=vocab_size, size=(batch_size, 1, 1, 1)) hparams = xception.xception_tiny() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size, hparams) p_hparams.modality["inputs"] = modalities.ImageModality(hparams) p_hparams.modality["targets"] = modalities.ClassLabelModality( hparams, vocab_size) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.int32), "targets": tf.constant(y, dtype=tf.int32), } model = xception.Xception(hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, _ = model(features) session.run(tf.global_variables_initializer()) res = session.run(logits) self.assertEqual(res.shape, (batch_size, 1, 1, 1, vocab_size))
def testVqaAttentionBaseline(self): batch_size = 3 image_size = 448 vocab_size = 100 num_classes = 10 question_length = 5 answer_length = 10 x = 2 * np.random.rand(batch_size, image_size, image_size, 3) - 1 q = np.random.random_integers(1, high=vocab_size - 1, size=(batch_size, question_length, 1, 1)) a = np.random.random_integers(0, high=num_classes, size=(batch_size, answer_length, 1, 1)) hparams = vqa_attention.vqa_attention_base() p_hparams = problem_hparams.test_problem_hparams( vocab_size, vocab_size, hparams) p_hparams.modality["inputs"] = modalities.ImageModality(hparams) p_hparams.modality["question"] = modalities.SymbolModality( hparams, vocab_size) p_hparams.modality["targets"] = modalities.MultiLabelModality( hparams, num_classes + 1) with self.test_session() as session: features = { "inputs": tf.constant(x, dtype=tf.float32), "question": tf.constant(q, dtype=tf.int32), "targets": tf.constant(a, dtype=tf.int32), } model = vqa_attention.VqaAttentionBaseline( hparams, tf.estimator.ModeKeys.TRAIN, p_hparams) logits, losses = model(features) session.run(tf.global_variables_initializer()) logits_, losses_ = session.run([logits, losses]) self.assertEqual(logits_.shape, (batch_size, 1, 1, 1, num_classes + 1)) self.assertEqual(losses_["training"].shape, ())