def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier( clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error tr_error = clf.ca.training_confusion.error clf2 = clf.clone() cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() tr_cverror = cv.ca.training_confusion.error self.failUnlessEqual( error, cverror, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) self.failUnlessEqual( tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (tr_error, tr_cverror)) self.failUnlessEqual(clf.ca.confusion.percent_correct, 100, msg="Dummy clf should train perfectly") self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual( len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.failUnlessEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.failUnless(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()
def test_split_classifier(self): ds = self.data_bin_1 clf = SplitClassifier(clf=SameSignClassifier(), splitter=NFoldSplitter(1), enable_ca=['confusion', 'training_confusion', 'feature_ids']) clf.train(ds) # train the beast error = clf.ca.confusion.error tr_error = clf.ca.training_confusion.error clf2 = clf.clone() cv = CrossValidatedTransferError( TransferError(clf2), NFoldSplitter(), postproc=mean_sample(), enable_ca=['confusion', 'training_confusion']) cverror = cv(ds).samples.squeeze() tr_cverror = cv.ca.training_confusion.error self.failUnlessEqual(error, cverror, msg="We should get the same error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (error, cverror)) self.failUnlessEqual(tr_error, tr_cverror, msg="We should get the same training error using split classifier as" " using CrossValidatedTransferError. Got %s and %s" % (tr_error, tr_cverror)) self.failUnlessEqual(clf.ca.confusion.percent_correct, 100, msg="Dummy clf should train perfectly") self.failUnlessEqual(len(clf.ca.confusion.sets), len(ds.UC), msg="Should have 1 confusion per each split") self.failUnlessEqual(len(clf.clfs), len(ds.UC), msg="Should have number of classifiers equal # of epochs") self.failUnlessEqual(clf.predict(ds.samples), list(ds.targets), msg="Should classify correctly") # feature_ids must be list of lists, and since it is not # feature-selecting classifier used - we expect all features # to be utilized # NOT ANYMORE -- for BoostedClassifier we have now union of all # used features across slave classifiers. That makes # semantics clear. If you need to get deeper -- use upcoming # harvesting facility ;-) # self.failUnlessEqual(len(clf.feature_ids), len(ds.uniquechunks)) # self.failUnless(np.array([len(ids)==ds.nfeatures # for ids in clf.feature_ids]).all()) # Just check if we get it at all ;-) summary = clf.summary()
def testRegressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] cve = CrossValidatedTransferError( TransferError(regr, CorrErrorFx()), splitter=NFoldSplitter(), enable_states=['training_confusion', 'confusion']) corr = cve(ds) self.failUnless(corr == cve.confusion.stats['CCe']) splitregr = SplitClassifier(regr, splitter=OddEvenSplitter(), enable_states=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.confusion.stats['CCe'] split_corr_tr = splitregr.training_confusion.stats['CCe'] for confusion, error in ((cve.confusion, corr), (splitregr.confusion, split_corr), (splitregr.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.asstring(short=True) s1 = confusion.asstring(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) self.failUnless(error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) #if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) split_predictions = splitregr.predict(ds.samples) # just to check if it works fine
def test_regressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] # we want numeric labels to maintain the previous behavior, especially # since we deal with regressions here ds.sa.targets = AttributeMap().to_numeric(ds.targets) cve = CrossValidatedTransferError( TransferError(regr), splitter=NFoldSplitter(), postproc=mean_sample(), enable_ca=['training_confusion', 'confusion']) # check the default self.failUnless(isinstance(cve.transerror.errorfx, CorrErrorFx)) corr = np.asscalar(cve(ds).samples) # Our CorrErrorFx should never return NaN self.failUnless(not np.isnan(corr)) self.failUnless(corr == cve.ca.confusion.stats['CCe']) splitregr = SplitClassifier( regr, splitter=OddEvenSplitter(), enable_ca=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.ca.confusion.stats['CCe'] split_corr_tr = splitregr.ca.training_confusion.stats['CCe'] for confusion, error in ( (cve.ca.confusion, corr), (splitregr.ca.confusion, split_corr), (splitregr.ca.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.as_string(short=True) s1 = confusion.as_string(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) # YOH: not now -- issues with libsvr in SG and linear kernel if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) # just to check if it works fine split_predictions = splitregr.predict(ds.samples)
def test_regressions(self, regr): """Simple tests on regressions """ ds = datasets['chirp_linear'] # we want numeric labels to maintain the previous behavior, especially # since we deal with regressions here ds.sa.targets = AttributeMap().to_numeric(ds.targets) cve = CrossValidatedTransferError( TransferError(regr), splitter=NFoldSplitter(), postproc=mean_sample(), enable_ca=['training_confusion', 'confusion']) # check the default self.failUnless(isinstance(cve.transerror.errorfx, CorrErrorFx)) corr = np.asscalar(cve(ds).samples) # Our CorrErrorFx should never return NaN self.failUnless(not np.isnan(corr)) self.failUnless(corr == cve.ca.confusion.stats['CCe']) splitregr = SplitClassifier( regr, splitter=OddEvenSplitter(), enable_ca=['training_confusion', 'confusion']) splitregr.train(ds) split_corr = splitregr.ca.confusion.stats['CCe'] split_corr_tr = splitregr.ca.training_confusion.stats['CCe'] for confusion, error in ( (cve.ca.confusion, corr), (splitregr.ca.confusion, split_corr), (splitregr.ca.training_confusion, split_corr_tr), ): #TODO: test confusion statistics # Part of it for now -- CCe for conf in confusion.summaries: stats = conf.stats if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(stats['CCe'] < 0.5) self.failUnlessEqual(stats['CCe'], stats['Summary CCe']) s0 = confusion.as_string(short=True) s1 = confusion.as_string(short=False) for s in [s0, s1]: self.failUnless(len(s) > 10, msg="We should get some string representation " "of regression summary. Got %s" % s) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless( error < 0.2, msg="Regressions should perform well on a simple " "dataset. Got correlation error of %s " % error) # Test access to summary statistics # YOH: lets start making testing more reliable. # p-value for such accident to have is verrrry tiny, # so if regression works -- it better has at least 0.5 ;) # otherwise fix it! ;) # YOH: not now -- issues with libsvr in SG and linear kernel if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(confusion.stats['CCe'] < 0.5) # just to check if it works fine split_predictions = splitregr.predict(ds.samples)