def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier( clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error tr_error = clf.ca.training_confusion.error clf2 = clf.clone() cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() tr_cverror = cv.ca.training_confusion.error self.failUnlessEqual( error, cverror, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) self.failUnlessEqual( tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (tr_error, tr_cverror)) self.failUnlessEqual(clf.ca.confusion.percent_correct, 100, msg="Dummy clf should train perfectly") self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual( len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.failUnlessEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.failUnless(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()
def test_split_classifier_extended(self, clf_): clf2 = clf_.clone() ds = datasets['uni2%s' % self._get_clf_ds(clf2)] clf = SplitClassifier(clf=clf_, #SameSignClassifier(), enable_ca=['stats', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.stats.error cv = CrossValidation(clf2, NFoldPartitioner(), postproc=mean_sample(), enable_ca=['stats', 'training_stats']) cverror = cv(ds).samples.squeeze() self.failUnless(abs(error-cverror)<0.01, msg="We should get the same error using split classifier as" " using CrossValidation. Got %s and %s" % (error, cverror)) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(error < 0.25, msg="clf should generalize more or less fine. " "Got error %s" % error) self.failUnlessEqual(len(clf.ca.stats.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual(len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs")
def test_split_classifier_extended(self, clf_): clf2 = clf_.clone() ds = datasets['uni2medium'] #self.data_bin_1 clf = SplitClassifier( clf=clf_, #SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() self.failUnless( abs(error - cverror) < 0.01, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(error < 0.25, msg="clf should generalize more or less fine. " "Got error %s" % error) self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual( len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs")
def testSplitClassifierExtended(self, clf_): clf2 = clf_.clone() ds = datasets['uni2medium']#self.data_bin_1 clf = SplitClassifier(clf=clf_, #SameSignClassifier(), splitter=NFoldSplitter(1), enable_states=['confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.confusion.error cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), enable_states=['confusion', 'training_confusion']) cverror = cv(ds) self.failUnless(abs(error-cverror)<0.01, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(error < 0.25, msg="clf should generalize more or less fine. " "Got error %s" % error) self.failUnlessEqual(len(clf.confusion.sets), len(ds.uniquechunks), msg="Should have 1 confusion per each split") self.failUnlessEqual(len(clf.clfs), len(ds.uniquechunks), msg="Should have number of classifiers equal # of epochs")
def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier(clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error tr_error = clf.ca.training_confusion.error clf2 = clf.clone() cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() tr_cverror = cv.ca.training_confusion.error self.failUnlessEqual(error, cverror, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) self.failUnlessEqual(tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (tr_error, tr_cverror)) self.failUnlessEqual(clf.ca.confusion.percent_correct, 100, msg="Dummy clf should train perfectly") self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual(len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.failUnlessEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.failUnless(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()
def testRegressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] cve = CrossValidatedTransferError( TransferError(regr, CorrErrorFx()), splitter=NFoldSplitter(), enable_states=['training_confusion', 'confusion']) corr = cve(ds) self.failUnless(corr == cve.confusion.stats['CCe']) splitregr = SplitClassifier(regr, splitter=OddEvenSplitter(), enable_states=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.confusion.stats['CCe'] split_corr_tr = splitregr.training_confusion.stats['CCe'] for confusion, error in ((cve.confusion, corr), (splitregr.confusion, split_corr), (splitregr.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.asstring(short=True) s1 = confusion.asstring(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) self.failUnless(error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) #if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) split_predictions = splitregr.predict(ds.samples) # just to check if it works fine
def test_harvesting(self): """Basic testing of harvesting based on SplitClassifier """ ds = self.data_bin_1 clf = SplitClassifier(clf=SameSignClassifier(), enable_ca=['stats', 'training_stats'], harvest_attribs=['clf.ca.training_time'], descr="DESCR") clf.train(ds) # train the beast # Number of harvested items should be equal to number of chunks self.failUnlessEqual( len(clf.ca.harvested['clf.ca.training_time']), len(ds.UC)) # if we can blame multiple inheritance and ClassWithCollections.__init__ self.failUnlessEqual(clf.descr, "DESCR")
def test_harvesting(self): """Basic testing of harvesting based on SplitClassifier """ ds = self.data_bin_1 clf = SplitClassifier( clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids'], harvest_attribs=['clf.ca.feature_ids', 'clf.ca.training_time'], descr="DESCR") clf.train(ds) # train the beast # Number of harvested items should be equal to number of chunks self.failUnlessEqual(len(clf.ca.harvested['clf.ca.feature_ids']), len(ds.UC)) # if we can blame multiple inheritance and ClassWithCollections.__init__ self.failUnlessEqual(clf.descr, "DESCR")
def testHarvesting(self): """Basic testing of harvesting based on SplitClassifier """ ds = self.data_bin_1 clf = SplitClassifier(clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_states=['confusion', 'training_confusion', 'feature_ids'], harvest_attribs=['clf.feature_ids', 'clf.training_time'], descr="DESCR") clf.train(ds) # train the beast # Number of harvested items should be equal to number of chunks self.failUnlessEqual(len(clf.harvested['clf.feature_ids']), len(ds.uniquechunks)) # if we can blame multiple inheritance and ClassWithCollections.__init__ self.failUnlessEqual(clf.descr, "DESCR")
def test_regressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] # we want numeric labels to maintain the previous behavior, especially # since we deal with regressions here ds.sa.targets = AttributeMap().to_numeric(ds.targets) cve = CrossValidatedTransferError( TransferError(regr), splitter=NFoldSplitter(), postproc=mean_sample(), enable_ca=['training_confusion', 'confusion']) # check the default self.failUnless(isinstance(cve.transerror.errorfx, CorrErrorFx)) corr = np.asscalar(cve(ds).samples) # Our CorrErrorFx should never return NaN self.failUnless(not np.isnan(corr)) self.failUnless(corr == cve.ca.confusion.stats['CCe']) splitregr = SplitClassifier( regr, splitter=OddEvenSplitter(), enable_ca=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.ca.confusion.stats['CCe'] split_corr_tr = splitregr.ca.training_confusion.stats['CCe'] for confusion, error in ( (cve.ca.confusion, corr), (splitregr.ca.confusion, split_corr), (splitregr.ca.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.as_string(short=True) s1 = confusion.as_string(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) # YOH: not now -- issues with libsvr in SG and linear kernel if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) # just to check if it works fine split_predictions = splitregr.predict(ds.samples)
def test_regressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] # we want numeric labels to maintain the previous behavior, especially # since we deal with regressions here ds.sa.targets = AttributeMap().to_numeric(ds.targets) cve = CrossValidatedTransferError( TransferError(regr), splitter=NFoldSplitter(), postproc=mean_sample(), enable_ca=['training_confusion', 'confusion']) # check the default self.failUnless(isinstance(cve.transerror.errorfx, CorrErrorFx)) corr = np.asscalar(cve(ds).samples) # Our CorrErrorFx should never return NaN self.failUnless(not np.isnan(corr)) self.failUnless(corr == cve.ca.confusion.stats['CCe']) splitregr = SplitClassifier( regr, splitter=OddEvenSplitter(), enable_ca=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.ca.confusion.stats['CCe'] split_corr_tr = splitregr.ca.training_confusion.stats['CCe'] for confusion, error in ( (cve.ca.confusion, corr), (splitregr.ca.confusion, split_corr), (splitregr.ca.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.as_string(short=True) s1 = confusion.as_string(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless( error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) # YOH: not now -- issues with libsvr in SG and linear kernel if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) # just to check if it works fine split_predictions = splitregr.predict(ds.samples)