def setUp(self): self.classifiers = [("BayesLearner", [], {}), ("TreeLearner", [], {}), ("kNNLearner", [], { "k": 1 }), ("kNNLearner", [], { "k": 3 }), ("TreeLearner", [], {})] self.knn1 = OrangeClassifier(self.classifiers[2][0]) self.knn3 = OrangeClassifier(self.classifiers[3][0]) self.tree = OrangeClassifier(self.classifiers[4][0])
def setUp(self): self.meta = simple_meta_attrs(['-', '+']) self.cs = lambda i, v: Sample( [i, self.meta[1].set_value(v)], self.meta, last_is_class=True) self.classifier = OrangeClassifier('kNNLearner', k=1) test_samples = '+++-++-+-+--+---' N = len(test_samples) train_samples = ('+' * (N / 2)) + ('-' * (N / 2)) self.test_samples, self.train_samples = ([ self.cs(i, v) for i, v in enumerate(samples) ] for samples in [test_samples, train_samples]) random.shuffle(self.test_samples) self.classifier.train(self.train_samples)
def setUp(self): self.meta = simple_meta_attrs(['-', '+']) self.cs = lambda i, v: Sample([i, self.meta[1].set_value(v)], self.meta, last_is_class=True) self.classifier = OrangeClassifier('kNNLearner', k=1) test_samples = '+++-++-+-+--+---' N = len(test_samples) train_samples = ('+' * (N / 2)) + ('-' * (N / 2)) self.test_samples, self.train_samples = ([self.cs(i, v) for i, v in enumerate(samples)] for samples in [test_samples, train_samples]) random.shuffle(self.test_samples) self.classifier.train(self.train_samples)
def setUp(self): self.classifiers = [("BayesLearner", [], {}), ("TreeLearner", [], {}), ("kNNLearner", [], { "k": 1 }), ("kNNLearner", [], { "k": 3 }), ("TreeLearner", [], {})] self.knn1 = OrangeClassifier(self.classifiers[2][0]) self.knn3 = OrangeClassifier(self.classifiers[3][0]) self.tree = OrangeClassifier(self.classifiers[4][0]) self.cls_meta = NominalAttribute([0, 1]) self.meta = [NumericAttribute() for _ in xrange(3)] self.train_set = [ Sample([0, 0, 0], self.meta, 0, self.cls_meta), Sample([0, 1, 0], self.meta, 0, self.cls_meta), Sample([0, 0, 1], self.meta, 0, self.cls_meta), Sample([3, 0, 0], self.meta, 1, self.cls_meta), Sample([3, 1, 0], self.meta, 1, self.cls_meta), Sample([3, 0, 1], self.meta, 1, self.cls_meta), ]
class TestAUCROCStatistics(unittest.TestCase): def setUp(self): self.meta = simple_meta_attrs(['-', '+']) self.cs = lambda i, v: Sample([i, self.meta[1].set_value(v)], self.meta, last_is_class=True) self.classifier = OrangeClassifier('kNNLearner', k=1) test_samples = '+++-++-+-+--+---' N = len(test_samples) train_samples = ('+' * (N / 2)) + ('-' * (N / 2)) self.test_samples, self.train_samples = ([self.cs(i, v) for i, v in enumerate(samples)] for samples in [test_samples, train_samples]) random.shuffle(self.test_samples) self.classifier.train(self.train_samples) def _test_roc_eq(self): roc = ROC(self.classifier, self.test_samples, '+') middle = roc[1] tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) self.assertEqual(middle[1], tpr) self.assertEqual(middle[0], fpr) def test_ROC(self): self._test_roc_eq() def _test_auc_eq(self): tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) auc = AUCROC(self.classifier, self.test_samples) expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2 expected_area_v2 = (1 + tpr - fpr) / 2. self.assertEqual(expected_area, expected_area_v2) # ^^^ just checking my math :) self.assertEqual(auc, expected_area) def test_AUC(self): self._test_auc_eq() def test_multiplerandom_test(self): N = len(self.test_samples) def gen_test_case(): schema = ''.join(['+' if random.random() >= 0.5 else '-' for _ in xrange(N)]) return [self.cs(i, v) for i, v in enumerate(schema)] for _ in xrange(200): train = gen_test_case() self.classifier.train(train) self._test_roc_eq() self._test_auc_eq() def test_avg_auc_roc_with_splited_cv(self): sets = split_data_cv(self.test_samples) def tmp(train, test): self.classifier.train(train) return AUCROC(self.classifier, test) aucs = [tmp(train, test) for train, test in sets] max_auc, min_auc = (f(aucs) for f in (max, min)) avg_auc = aucroc_avg_classifier_performance(self.classifier, sets) self.assertTrue(min_auc <= avg_auc[0] <= max_auc)
def test_classifier_creation(self): """ Proper classifier creation """ for (c, args, kargs) in self.classifiers: classifier = OrangeClassifier(c, *args, **kargs) self.assertEqual(getattr(orange, c), type(classifier.classifier))
class TestAUCROCStatistics(unittest.TestCase): def setUp(self): self.meta = simple_meta_attrs(['-', '+']) self.cs = lambda i, v: Sample( [i, self.meta[1].set_value(v)], self.meta, last_is_class=True) self.classifier = OrangeClassifier('kNNLearner', k=1) test_samples = '+++-++-+-+--+---' N = len(test_samples) train_samples = ('+' * (N / 2)) + ('-' * (N / 2)) self.test_samples, self.train_samples = ([ self.cs(i, v) for i, v in enumerate(samples) ] for samples in [test_samples, train_samples]) random.shuffle(self.test_samples) self.classifier.train(self.train_samples) def _test_roc_eq(self): roc = ROC(self.classifier, self.test_samples, '+') middle = roc[1] tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) self.assertEqual(middle[1], tpr) self.assertEqual(middle[0], fpr) def test_ROC(self): self._test_roc_eq() def _test_auc_eq(self): tpr, fpr = map(lambda f: f(self.classifier, self.test_samples, '+'), [TPR, FPR]) auc = AUCROC(self.classifier, self.test_samples) expected_area = fpr * tpr / 2 + (1 - fpr) * (tpr + 1) / 2 expected_area_v2 = (1 + tpr - fpr) / 2. self.assertEqual(expected_area, expected_area_v2) # ^^^ just checking my math :) self.assertEqual(auc, expected_area) def test_AUC(self): self._test_auc_eq() def test_multiplerandom_test(self): N = len(self.test_samples) def gen_test_case(): schema = ''.join( ['+' if random.random() >= 0.5 else '-' for _ in xrange(N)]) return [self.cs(i, v) for i, v in enumerate(schema)] for _ in xrange(200): train = gen_test_case() self.classifier.train(train) self._test_roc_eq() self._test_auc_eq() def test_avg_auc_roc_with_splited_cv(self): sets = split_data_cv(self.test_samples) def tmp(train, test): self.classifier.train(train) return AUCROC(self.classifier, test) aucs = [tmp(train, test) for train, test in sets] max_auc, min_auc = (f(aucs) for f in (max, min)) avg_auc = aucroc_avg_classifier_performance(self.classifier, sets) self.assertTrue(min_auc <= avg_auc[0] <= max_auc)