def testAccuracy(self): """ Unit test for compute_accuracy """ labels = [0, 3, 2, 2.] # Predicted label based on scores: [3, 3, 1, 2] scores = [[1.1, 2.1, 3.1, 4.1], [-1, -2, -3, 4], [-2, 2, -1, 0], [1, 2, 5, 3]] metric_name = 'accuracy' # Classification metric = metrics.get_metric_fn(metric_name, task_type=TaskType.CLASSIFICATION, num_classes=4)() metric.reset_states() metric.update_state(tf.constant(labels), tf.constant(scores)) acc = metric.result().numpy() self.assertEqual(metric.name, metric_name) self.assertEqual(acc, 0.5) # Binary classification labels = [0, 1, 1, 0.] scores = [1, -1, 1, 1.] metric = metrics.get_metric_fn(metric_name, task_type=TaskType.BINARY_CLASSIFICATION, num_classes=1)() metric.reset_states() metric.update_state(tf.constant(labels), tf.constant(scores)) acc = metric.result().numpy() self.assertEqual(metric.name, metric_name) self.assertEqual(acc, 0.25)
def _testComputePrecision(self, scores, labels, expected_precision_at_1, expected_precision_at_2): """ Test compute_precision() given input data """ metric_name_1 = 'precision@1' metric_at_1 = metrics.get_metric_fn(metric_name_1, task_type=TaskType.RANKING, num_classes=None)() metric_at_1.update_state(labels, scores) preat1_tfr = metric_at_1.result().numpy() self.assertEqual(metric_at_1.name, metric_name_1) metric_name_2 = 'precision@2' metric_at_2 = metrics.get_metric_fn(metric_name_2, task_type=TaskType.RANKING, num_classes=None)() metric_at_2.update_state(labels, scores) preat2_tfr = metric_at_2.result().numpy() self.assertEqual(metric_at_2.name, metric_name_2) self.assertAllEqual(expected_precision_at_1, preat1_tfr) self.assertAllEqual(expected_precision_at_2, preat2_tfr)
def _testNdcg(self, scores, labels, scores_for_test_utils, labels_for_test_utils, topk): """Test compute_ndcg() given input data """ metric_name = f'ndcg@{topk}' metric = metrics.get_metric_fn(metric_name, task_type=TaskType.RANKING, num_classes=None)() metric.update_state(labels, scores) ndcg_tfr = metric.result().numpy() ndcg_p2 = testing_utils.compute_ndcg_power2(scores_for_test_utils, labels_for_test_utils, topk=topk) self.assertEqual(metric.name, metric_name) self.assertAlmostEqual(ndcg_p2, ndcg_tfr, places=self.places)
def _testAuc(self, prob, labels, expected_auc): """ Unit test for auc Ground truth is obtained by `using roc_auc_score()` from sklearn """ for task_type in [TaskType.BINARY_CLASSIFICATION, TaskType.RANKING]: metric_name = 'auc' metric = metrics.get_metric_fn(metric_name, task_type=task_type, num_classes=1)() metric.reset_states() metric.update_state(tf.constant(labels.reshape([-1, 1])), tf.constant(prob.reshape([-1, 1]))) auc = metric.result().numpy() self.assertEqual(metric.name, metric_name) self.assertAlmostEqual(auc, expected_auc, places=self.places)
def testMrr(self): """ Test compute_mrr() """ scores = [[1, 2, 3, 4], [5, 1, 2, 6]] labels = [[0, 0, 1, label_padding], [1, 0, 0, 0]] topk = 4 metric_name = f'mrr@{topk}' metric = metrics.get_metric_fn(metric_name, task_type=TaskType.RANKING, num_classes=None)() metric.update_state(labels, scores) mrr = metric.result().numpy() self.assertEqual(metric.name, metric_name) self.assertAllEqual(mrr, 0.75)
def testConfusionMatrix(self): """ Test compute_confusion_matrix """ labels = [[0], [3], [2], [2], [3], [3]] # Predicted label based on scores: [3, 3, 1, 2] scores = [[1.1, 2.1, 3.1, 4.1], [-1, -2, -3, 4], [-2, 2, -1, 0], [1, 2, 5, 3], [2, 3, 4, 5], [4, 5, 6, 7]] # Expected confusion matrix expected_cm = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 1, 1, 0], [0, 0, 0, 3]] metric_name = 'confusion_matrix' metric = metrics.get_metric_fn(metric_name, num_classes=4, task_type=TaskType.CLASSIFICATION)() metric.update_state(tf.constant(labels), tf.constant(scores)) cm = metric.result().numpy() self.assertEqual(metric.name, metric_name) self.assertAllEqual(cm, expected_cm)