class TestAUCROCStatistics(unittest.TestCase):

    def setUp(self):
        self.meta = simple_meta_attrs(['-', '+'])
        self.cs = lambda i, v: Sample([i, self.meta[1].set_value(v)], self.meta, last_is_class=True)
        self.classifier = OrangeClassifier('kNNLearner', k=1)
        test_samples = '+++-++-+-+--+---'
        N = len(test_samples)
        train_samples = ('+' * (N / 2)) + ('-' * (N / 2))
        self.test_samples, self.train_samples = ([self.cs(i, v) for i, v in enumerate(samples)]
            for samples in [test_samples, train_samples])
        random.shuffle(self.test_samples)
        self.classifier.train(self.train_samples)

    def _test_roc_eq(self):
        roc = ROC(self.classifier, self.test_samples, '+')
        middle = roc[1]
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR])
        self.assertEqual(middle[1], tpr)
        self.assertEqual(middle[0], fpr)

    def test_ROC(self):
        self._test_roc_eq()

    def _test_auc_eq(self):
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR])
        auc = AUCROC(self.classifier, self.test_samples)
        expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2
        expected_area_v2 = (1 + tpr - fpr) / 2.
        self.assertEqual(expected_area, expected_area_v2)
        # ^^^ just checking my math :)
        self.assertEqual(auc, expected_area)

    def test_AUC(self):
        self._test_auc_eq()

    def test_multiplerandom_test(self):
        N = len(self.test_samples)

        def gen_test_case():
            schema = ''.join(['+' if random.random() >= 0.5 else '-' for _ in xrange(N)])
            return [self.cs(i, v) for i, v in enumerate(schema)]

        for _ in xrange(200):
            train = gen_test_case()
            self.classifier.train(train)
            self._test_roc_eq()
            self._test_auc_eq()

    def test_avg_auc_roc_with_splited_cv(self):
        sets = split_data_cv(self.test_samples)

        def tmp(train, test):
            self.classifier.train(train)
            return AUCROC(self.classifier, test)
        aucs = [tmp(train, test) for train, test in sets]
        max_auc, min_auc = (f(aucs) for f in (max, min))
        avg_auc = aucroc_avg_classifier_performance(self.classifier, sets)
        self.assertTrue(min_auc <= avg_auc[0] <= max_auc)
Exemple #2
0
class TestAUCROCStatistics(unittest.TestCase):
    def setUp(self):
        self.meta = simple_meta_attrs(['-', '+'])
        self.cs = lambda i, v: Sample(
            [i, self.meta[1].set_value(v)], self.meta, last_is_class=True)
        self.classifier = OrangeClassifier('kNNLearner', k=1)
        test_samples = '+++-++-+-+--+---'
        N = len(test_samples)
        train_samples = ('+' * (N / 2)) + ('-' * (N / 2))
        self.test_samples, self.train_samples = ([
            self.cs(i, v) for i, v in enumerate(samples)
        ] for samples in [test_samples, train_samples])
        random.shuffle(self.test_samples)
        self.classifier.train(self.train_samples)

    def _test_roc_eq(self):
        roc = ROC(self.classifier, self.test_samples, '+')
        middle = roc[1]
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'),
                       [TPR, FPR])
        self.assertEqual(middle[1], tpr)
        self.assertEqual(middle[0], fpr)

    def test_ROC(self):
        self._test_roc_eq()

    def _test_auc_eq(self):
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'),
                       [TPR, FPR])
        auc = AUCROC(self.classifier, self.test_samples)
        expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2
        expected_area_v2 = (1 + tpr - fpr) / 2.
        self.assertEqual(expected_area, expected_area_v2)
        # ^^^ just checking my math :)
        self.assertEqual(auc, expected_area)

    def test_AUC(self):
        self._test_auc_eq()

    def test_multiplerandom_test(self):
        N = len(self.test_samples)

        def gen_test_case():
            schema = ''.join(
                ['+' if random.random() >= 0.5 else '-' for _ in xrange(N)])
            return [self.cs(i, v) for i, v in enumerate(schema)]

        for _ in xrange(200):
            train = gen_test_case()
            self.classifier.train(train)
            self._test_roc_eq()
            self._test_auc_eq()

    def test_avg_auc_roc_with_splited_cv(self):
        sets = split_data_cv(self.test_samples)

        def tmp(train, test):
            self.classifier.train(train)
            return AUCROC(self.classifier, test)

        aucs = [tmp(train, test) for train, test in sets]
        max_auc, min_auc = (f(aucs) for f in (max, min))
        avg_auc = aucroc_avg_classifier_performance(self.classifier, sets)
        self.assertTrue(min_auc <= avg_auc[0] <= max_auc)