def testTransformerWithEncoderDecoderAttentionLoss(self): model, features = get_model( transformer.transformer_supervised_attention()) expected_attention_weights = np.random.random_sample( size=(BATCH_SIZE, TARGET_LENGTH, INPUT_LENGTH)) features["expected_attentions"] = tf.constant( expected_attention_weights, dtype=tf.float32) _, extra_loss = model(features) with self.test_session() as session: session.run(tf.global_variables_initializer()) res = session.run(extra_loss["attention_loss"]) self.assertEqual(res.shape, ())