def testRougeLMetricE2E(self): vocab_size = 4 batch_size = 12 seq_length = 12 predictions = tf.one_hot( np.random.randint(vocab_size, size=(batch_size, seq_length, 1, 1)), depth=4, dtype=tf.float32) targets = np.random.randint(4, size=(12, 12, 1, 1)) with self.test_session() as session: scores, _ = rouge.rouge_l_fscore( predictions, tf.constant(targets, dtype=tf.int32)) a = tf.reduce_mean(scores) session.run(tf.global_variables_initializer()) session.run(a)