def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier( clf=SameSignClassifier(), enable_ca=['stats', 'training_stats', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.stats.error tr_error = clf.ca.training_stats.error clf2 = clf.clone() cv = CrossValidation(clf2, NFoldPartitioner(), postproc=mean_sample(), enable_ca=['stats', 'training_stats']) cverror = cv(ds) cverror = cverror.samples.squeeze() tr_cverror = cv.ca.training_stats.error self.assertEqual( error, cverror, msg="We should get the same error using split classifier as" " using CrossValidation. Got %s and %s" % (error, cverror)) self.assertEqual( tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidation. Got %s and %s" % (tr_error, tr_cverror)) self.assertEqual(clf.ca.stats.percent_correct, 100, msg="Dummy clf should train perfectly") # CV and SplitClassifier should get the same confusion matrices assert_array_equal(clf.ca.stats.matrix, cv.ca.stats.matrix) self.assertEqual(len(clf.ca.stats.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.assertEqual( len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.assertEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.assertEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.assertTrue(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()
def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier(clf=SameSignClassifier(), enable_ca=['stats', 'training_stats', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.stats.error tr_error = clf.ca.training_stats.error clf2 = clf.clone() cv = CrossValidation(clf2, NFoldPartitioner(), postproc=mean_sample(), enable_ca=['stats', 'training_stats']) cverror = cv(ds) cverror = cverror.samples.squeeze() tr_cverror = cv.ca.training_stats.error self.assertEqual(error, cverror, msg="We should get the same error using split classifier as" " using CrossValidation. Got %s and %s" % (error, cverror)) self.assertEqual(tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidation. Got %s and %s" % (tr_error, tr_cverror)) self.assertEqual(clf.ca.stats.percent_correct, 100, msg="Dummy clf should train perfectly") # CV and SplitClassifier should get the same confusion matrices assert_array_equal(clf.ca.stats.matrix, cv.ca.stats.matrix) self.assertEqual(len(clf.ca.stats.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.assertEqual(len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.assertEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.assertEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.assertTrue(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()