def testAnalyzerWithSplitClassifier(self, clf): """Test analyzers in split classifier """ # assumming many defaults it is as simple as mclf = SplitClassifier(clf=clf, enable_states=['training_confusion', 'confusion']) sana = mclf.getSensitivityAnalyzer(transformer=Absolute, enable_states=["sensitivities"]) # Test access to transformers and combiners self.failUnless(sana.transformer is Absolute) self.failUnless(sana.combiner is FirstAxisMean) # and lets look at all sensitivities # and we get sensitivity analyzer which works on splits map_ = sana(self.dataset) self.failUnlessEqual(len(map_), self.dataset.nfeatures) if cfg.getboolean('tests', 'labile', default='yes'): for conf_matrix in [sana.clf.training_confusion] \ + sana.clf.confusion.matrices: self.failUnless( conf_matrix.percentCorrect>75, msg="We must have trained on each one more or " \ "less correctly. Got %f%% correct on %d labels" % (conf_matrix.percentCorrect, len(self.dataset.uniquelabels))) errors = [x.percentCorrect for x in sana.clf.confusion.matrices] # XXX # That is too much to ask if the dataset is easy - thus # disabled for now #self.failUnless(N.min(errors) != N.max(errors), # msg="Splits should have slightly but different " \ # "generalization") # lets go through all sensitivities and see if we selected the right # features # XXX yoh: disabled checking of each map separately since in # BoostedClassifierSensitivityAnalyzer and # ProxyClassifierSensitivityAnalyzer # we don't have yet way to provide transformers thus internal call # to getSensitivityAnalyzer in _call of them is not parametrized if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2: # Some meta classifiers (5% of ANOVA) are too harsh ;-) return for map__ in [map_]: # + sana.combined_analyzer.sensitivities: selected = FixedNElementTailSelector( self.dataset.nfeatures - len(self.dataset.nonbogus_features))(map__) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnlessEqual( list(selected), list(self.dataset.nonbogus_features), msg="At the end we should have selected the right features")
def __testFSPipelineWithAnalyzerWithSplitClassifier(self, basic_clf): #basic_clf = LinearNuSVMC() multi_clf = MulticlassClassifier(clf=basic_clf) #svm_weigths = LinearSVMWeights(svm) # Proper RFE: aggregate sensitivities across multiple splits, # but also due to multi class those need to be aggregated # somehow. Transfer error here should be 'leave-1-out' error # of split classifier itself sclf = SplitClassifier(clf=basic_clf) rfe = RFE(sensitivity_analyzer= sclf.getSensitivityAnalyzer( enable_states=["sensitivities"]), transfer_error=trans_error, feature_selector=FeatureSelectionPipeline( [FractionTailSelector(0.5), FixedNElementTailSelector(1)]), train_clf=True) # and we get sensitivity analyzer which works on splits and uses # sensitivity selected_features = rfe(self.dataset)