def test_avg_auc_roc_with_splited_cv(self): sets = split_data_cv(self.test_samples) def tmp(train, test): self.classifier.train(train) return AUCROC(self.classifier, test) aucs = [tmp(train, test) for train, test in sets] max_auc, min_auc = (f(aucs) for f in (max, min)) avg_auc = aucroc_avg_classifier_performance(self.classifier, sets) self.assertTrue(min_auc <= avg_auc[0] <= max_auc)
def test_split_data_cv(self): N = 100 for _ in xrange(100): samples = range(N) folds = random.randint(2, N / 3) sets = split_data_cv(samples, folds) for train, test in sets: for ts in test: self.assertTrue(ts not in train) self.assertTrue(N / folds <= len(test) <= N / folds + 1) self.assertEqual(N, len(test) + len(train))