예제 #1
0
 def setUp(self):
     self.classifiers = [("BayesLearner", [], {}), ("TreeLearner", [], {}),
                         ("kNNLearner", [], {
                             "k": 1
                         }), ("kNNLearner", [], {
                             "k": 3
                         }), ("TreeLearner", [], {})]
     self.knn1 = OrangeClassifier(self.classifiers[2][0])
     self.knn3 = OrangeClassifier(self.classifiers[3][0])
     self.tree = OrangeClassifier(self.classifiers[4][0])
예제 #2
0
 def setUp(self):
     self.meta = simple_meta_attrs(['-', '+'])
     self.cs = lambda i, v: Sample(
         [i, self.meta[1].set_value(v)], self.meta, last_is_class=True)
     self.classifier = OrangeClassifier('kNNLearner', k=1)
     test_samples = '+++-++-+-+--+---'
     N = len(test_samples)
     train_samples = ('+' * (N / 2)) + ('-' * (N / 2))
     self.test_samples, self.train_samples = ([
         self.cs(i, v) for i, v in enumerate(samples)
     ] for samples in [test_samples, train_samples])
     random.shuffle(self.test_samples)
     self.classifier.train(self.train_samples)
예제 #3
0
 def setUp(self):
     self.meta = simple_meta_attrs(['-', '+'])
     self.cs = lambda i, v: Sample([i, self.meta[1].set_value(v)], self.meta, last_is_class=True)
     self.classifier = OrangeClassifier('kNNLearner', k=1)
     test_samples = '+++-++-+-+--+---'
     N = len(test_samples)
     train_samples = ('+' * (N / 2)) + ('-' * (N / 2))
     self.test_samples, self.train_samples = ([self.cs(i, v) for i, v in enumerate(samples)]
         for samples in [test_samples, train_samples])
     random.shuffle(self.test_samples)
     self.classifier.train(self.train_samples)
예제 #4
0
 def setUp(self):
     self.classifiers = [("BayesLearner", [], {}), ("TreeLearner", [], {}),
                         ("kNNLearner", [], {
                             "k": 1
                         }), ("kNNLearner", [], {
                             "k": 3
                         }), ("TreeLearner", [], {})]
     self.knn1 = OrangeClassifier(self.classifiers[2][0])
     self.knn3 = OrangeClassifier(self.classifiers[3][0])
     self.tree = OrangeClassifier(self.classifiers[4][0])
     self.cls_meta = NominalAttribute([0, 1])
     self.meta = [NumericAttribute() for _ in xrange(3)]
     self.train_set = [
         Sample([0, 0, 0], self.meta, 0, self.cls_meta),
         Sample([0, 1, 0], self.meta, 0, self.cls_meta),
         Sample([0, 0, 1], self.meta, 0, self.cls_meta),
         Sample([3, 0, 0], self.meta, 1, self.cls_meta),
         Sample([3, 1, 0], self.meta, 1, self.cls_meta),
         Sample([3, 0, 1], self.meta, 1, self.cls_meta),
     ]
예제 #5
0
class TestAUCROCStatistics(unittest.TestCase):

    def setUp(self):
        self.meta = simple_meta_attrs(['-', '+'])
        self.cs = lambda i, v: Sample([i, self.meta[1].set_value(v)], self.meta, last_is_class=True)
        self.classifier = OrangeClassifier('kNNLearner', k=1)
        test_samples = '+++-++-+-+--+---'
        N = len(test_samples)
        train_samples = ('+' * (N / 2)) + ('-' * (N / 2))
        self.test_samples, self.train_samples = ([self.cs(i, v) for i, v in enumerate(samples)]
            for samples in [test_samples, train_samples])
        random.shuffle(self.test_samples)
        self.classifier.train(self.train_samples)

    def _test_roc_eq(self):
        roc = ROC(self.classifier, self.test_samples, '+')
        middle = roc[1]
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR])
        self.assertEqual(middle[1], tpr)
        self.assertEqual(middle[0], fpr)

    def test_ROC(self):
        self._test_roc_eq()

    def _test_auc_eq(self):
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR])
        auc = AUCROC(self.classifier, self.test_samples)
        expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2
        expected_area_v2 = (1 + tpr - fpr) / 2.
        self.assertEqual(expected_area, expected_area_v2)
        # ^^^ just checking my math :)
        self.assertEqual(auc, expected_area)

    def test_AUC(self):
        self._test_auc_eq()

    def test_multiplerandom_test(self):
        N = len(self.test_samples)

        def gen_test_case():
            schema = ''.join(['+' if random.random() >= 0.5 else '-' for _ in xrange(N)])
            return [self.cs(i, v) for i, v in enumerate(schema)]

        for _ in xrange(200):
            train = gen_test_case()
            self.classifier.train(train)
            self._test_roc_eq()
            self._test_auc_eq()

    def test_avg_auc_roc_with_splited_cv(self):
        sets = split_data_cv(self.test_samples)

        def tmp(train, test):
            self.classifier.train(train)
            return AUCROC(self.classifier, test)
        aucs = [tmp(train, test) for train, test in sets]
        max_auc, min_auc = (f(aucs) for f in (max, min))
        avg_auc = aucroc_avg_classifier_performance(self.classifier, sets)
        self.assertTrue(min_auc <= avg_auc[0] <= max_auc)
예제 #6
0
    def test_classifier_creation(self):
        """ Proper classifier creation """

        for (c, args, kargs) in self.classifiers:
            classifier = OrangeClassifier(c, *args, **kargs)
            self.assertEqual(getattr(orange, c), type(classifier.classifier))
예제 #7
0
class TestAUCROCStatistics(unittest.TestCase):
    def setUp(self):
        self.meta = simple_meta_attrs(['-', '+'])
        self.cs = lambda i, v: Sample(
            [i, self.meta[1].set_value(v)], self.meta, last_is_class=True)
        self.classifier = OrangeClassifier('kNNLearner', k=1)
        test_samples = '+++-++-+-+--+---'
        N = len(test_samples)
        train_samples = ('+' * (N / 2)) + ('-' * (N / 2))
        self.test_samples, self.train_samples = ([
            self.cs(i, v) for i, v in enumerate(samples)
        ] for samples in [test_samples, train_samples])
        random.shuffle(self.test_samples)
        self.classifier.train(self.train_samples)

    def _test_roc_eq(self):
        roc = ROC(self.classifier, self.test_samples, '+')
        middle = roc[1]
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'),
                       [TPR, FPR])
        self.assertEqual(middle[1], tpr)
        self.assertEqual(middle[0], fpr)

    def test_ROC(self):
        self._test_roc_eq()

    def _test_auc_eq(self):
        tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'),
                       [TPR, FPR])
        auc = AUCROC(self.classifier, self.test_samples)
        expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2
        expected_area_v2 = (1 + tpr - fpr) / 2.
        self.assertEqual(expected_area, expected_area_v2)
        # ^^^ just checking my math :)
        self.assertEqual(auc, expected_area)

    def test_AUC(self):
        self._test_auc_eq()

    def test_multiplerandom_test(self):
        N = len(self.test_samples)

        def gen_test_case():
            schema = ''.join(
                ['+' if random.random() >= 0.5 else '-' for _ in xrange(N)])
            return [self.cs(i, v) for i, v in enumerate(schema)]

        for _ in xrange(200):
            train = gen_test_case()
            self.classifier.train(train)
            self._test_roc_eq()
            self._test_auc_eq()

    def test_avg_auc_roc_with_splited_cv(self):
        sets = split_data_cv(self.test_samples)

        def tmp(train, test):
            self.classifier.train(train)
            return AUCROC(self.classifier, test)

        aucs = [tmp(train, test) for train, test in sets]
        max_auc, min_auc = (f(aucs) for f in (max, min))
        avg_auc = aucroc_avg_classifier_performance(self.classifier, sets)
        self.assertTrue(min_auc <= avg_auc[0] <= max_auc)