def test_feature_selection_classifier_with_regression(self): from mvpa.featsel.base import \ SensitivityBasedFeatureSelection from mvpa.featsel.helpers import \ FixedNElementTailSelector if sample_clf_reg is None: # none regression was found, so nothing to test return # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection(sens_ana, FixedNElementTailSelector(1, mode='discard')) # now test with regression-based classifier. The problem is # that it is determining predictions twice from values and # then setting the values from the results, which the second # time is set to predictions. The final outcome is that the # values are actually predictions... dat = dataset_wizard(samples=np.random.randn(4, 10), targets=[-1, -1, 1, 1]) clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) clf_reg.train(dat) _ = clf_reg.predict(dat.samples) self.failIf((np.array(clf_reg.ca.estimates) - clf_reg.ca.predictions).sum()==0, msg="Values were set to the predictions in %s." % sample_clf_reg)
def test_feature_selection_classifier_with_regression(self): from mvpa.featsel.base import \ SensitivityBasedFeatureSelection from mvpa.featsel.helpers import \ FixedNElementTailSelector if sample_clf_reg is None: # none regression was found, so nothing to test return # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection( sens_ana, FixedNElementTailSelector(1, mode='discard')) # now test with regression-based classifier. The problem is # that it is determining predictions twice from values and # then setting the values from the results, which the second # time is set to predictions. The final outcome is that the # values are actually predictions... dat = dataset_wizard(samples=np.random.randn(4, 10), targets=[-1, -1, 1, 1]) clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel) clf_reg.train(dat) _ = clf_reg.predict(dat.samples) self.failIf( (np.array(clf_reg.ca.estimates) - clf_reg.ca.predictions).sum() == 0, msg="Values were set to the predictions in %s." % sample_clf_reg)
def some_svms(): """Returns a couple of FeatureSelectionClassifiers based on SVMs with different numbers of features and/or sensitivity measure""" clfr1 = FeatureSelectionClassifier(SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( OneWayAnova(), FixedNElementTailSelector( 500, mode='select', tail='upper')), descr="LinSVM on 500 (ANOVA)") clfr2 = FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM().getSensitivityAnalyzer(transformer=Absolute), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 500 (SVM)") clfr3 = SVM() clfr4 = FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM().getSensitivityAnalyzer(transformer=Absolute), FractionTailSelector(0.05, mode='select', tail='upper'), ), descr="LinSVM on 5 % (SVM)") return [clfr1, clfr2, clfr3, clfr3]
def test_feature_selection_classifier(self): from mvpa.featsel.base import \ SensitivityBasedFeatureSelection from mvpa.featsel.helpers import \ FixedNElementTailSelector # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # should give lowest weight to the feature with highest index sens_ana_rev = SillySensitivityAnalyzer(mult=-1) # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection( sens_ana, FixedNElementTailSelector(1, mode='discard')) feat_sel_rev = SensitivityBasedFeatureSelection( sens_ana_rev, FixedNElementTailSelector(1)) samples = np.array([[0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1], [1, -1, 1]]) testdata3 = dataset_wizard(samples=samples, targets=1) # dummy train data so proper mapper gets created traindata = dataset_wizard(samples=np.array([[0, 0, -1], [1, 0, 1]]), targets=[1, 2]) # targets res110 = [1, 1, 1, -1, -1] res011 = [-1, 1, -1, 1, -1] # first classifier -- 0th feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, enable_ca=['feature_ids']) self.clf_sign.ca.change_temporarily(enable_ca=['estimates']) clf011.train(traindata) self.failUnlessEqual(clf011.predict(testdata3.samples), res011) # just silly test if we get values assigned in the 'ProxyClassifier' self.failUnless(len(clf011.ca.estimates) == len(res110), msg="We need to pass values into ProxyClassifier") self.clf_sign.ca.reset_changed_temporarily() self.failUnlessEqual(len(clf011.ca.feature_ids), 2) "Feature selection classifier had to be trained on 2 features" # first classifier -- last feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) clf011.train(traindata) self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
def __test_matthias_question(self): rfe_clf = LinearCSVMC(C=1) rfesvm_split = SplitClassifier(rfe_clf) clf = \ FeatureSelectionClassifier( clf = LinearCSVMC(C=1), feature_selection = RFE( sensitivity_analyzer = rfesvm_split.get_sensitivity_analyzer( combiner=first_axis_mean, transformer=np.abs), transfer_error=ConfusionBasedError( rfesvm_split, confusion_state="confusion"), stopping_criterion=FixedErrorThresholdStopCrit(0.20), feature_selector=FractionTailSelector( 0.2, mode='discard', tail='lower'), update_sensitivity=True)) splitter = NFoldSplitter(cvtype=1) no_permutations = 1000 cv = CrossValidatedTransferError( TransferError(clf), splitter, null_dist=MCNullDist(permutations=no_permutations, tail='left'), enable_ca=['confusion']) error = cv(datasets['uni2small']) self.failUnless(error < 0.4) self.failUnless(cv.ca.null_prob < 0.05)
def test_james_problem(self): percent = 80 dataset = datasets['uni2small'] rfesvm_split = LinearCSVMC() fs = \ RFE(sensitivity_analyzer=rfesvm_split.get_sensitivity_analyzer(), transfer_error=TransferError(rfesvm_split), feature_selector=FractionTailSelector( percent / 100.0, mode='select', tail='upper'), update_sensitivity=True) clf = FeatureSelectionClassifier( clf = LinearCSVMC(), # on features selected via RFE feature_selection = fs) # update sensitivity at each step (since we're not using the # same CLF as sensitivity analyzer) clf.ca.enable('feature_ids') cv = CrossValidatedTransferError( TransferError(clf), NFoldSplitter(cvtype=1), postproc=mean_sample(), enable_ca=['confusion'], expose_testdataset=True) #cv = SplitClassifier(clf) try: error = cv(dataset).samples.squeeze() except Exception, e: self.fail('CrossValidation cannot handle classifier with RFE ' 'feature selection. Got exception: %s' % (e,))
def test_mapped_classifier_sensitivity_analyzer(self, clf): """Test sensitivity of the mapped classifier """ # Assuming many defaults it is as simple as mclf = FeatureSelectionClassifier( clf, SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.5, mode='select', tail='upper')), enable_ca=['training_stats']) sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(), enable_ca=["sensitivities"]) # and lets look at all sensitivities dataset = datasets['uni2medium'] # and we get sensitivity analyzer which works on splits sens = sana(dataset) self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
def testMappedClassifierSensitivityAnalyzer(self, clf): """Test sensitivity of the mapped classifier """ # Assuming many defaults it is as simple as mclf = FeatureSelectionClassifier( clf, SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.5, mode='select', tail='upper')), enable_states=['training_confusion']) sana = mclf.getSensitivityAnalyzer(transformer=Absolute, enable_states=["sensitivities"]) # and lets look at all sensitivities dataset = datasets['uni2medium'] # and we get sensitivity analyzer which works on splits map_ = sana(dataset) self.failUnlessEqual(len(map_), dataset.nfeatures)
def test_mapped_classifier_sensitivity_analyzer(self, clf): """Test sensitivity of the mapped classifier """ # Assuming many defaults it is as simple as mclf = FeatureSelectionClassifier(clf, SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector( 0.5, mode='select', tail='upper')), enable_ca=['training_confusion']) sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(), enable_ca=["sensitivities"]) # and lets look at all sensitivities dataset = datasets['uni2medium'] # and we get sensitivity analyzer which works on splits sens = sana(dataset) self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
def test_feature_selection_classifier(self): from mvpa.featsel.base import \ SensitivityBasedFeatureSelection from mvpa.featsel.helpers import \ FixedNElementTailSelector # should give lowest weight to the feature with lowest index sens_ana = SillySensitivityAnalyzer() # should give lowest weight to the feature with highest index sens_ana_rev = SillySensitivityAnalyzer(mult=-1) # corresponding feature selections feat_sel = SensitivityBasedFeatureSelection(sens_ana, FixedNElementTailSelector(1, mode='discard')) feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev, FixedNElementTailSelector(1)) samples = np.array([ [0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1], [1, -1, 1] ]) testdata3 = dataset_wizard(samples=samples, targets=1) # dummy train data so proper mapper gets created traindata = dataset_wizard(samples=np.array([ [0, 0, -1], [1, 0, 1] ]), targets=[1, 2]) # targets res110 = [1, 1, 1, -1, -1] res011 = [-1, 1, -1, 1, -1] # first classifier -- 0th feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel, enable_ca=['feature_ids']) self.clf_sign.ca.change_temporarily(enable_ca=['estimates']) clf011.train(traindata) self.failUnlessEqual(clf011.predict(testdata3.samples), res011) # just silly test if we get values assigned in the 'ProxyClassifier' self.failUnless(len(clf011.ca.estimates) == len(res110), msg="We need to pass values into ProxyClassifier") self.clf_sign.ca.reset_changed_temporarily() self.failUnlessEqual(clf011.mapper._oshape, (2,)) "Feature selection classifier had to be trained on 2 features" # first classifier -- last feature should be discarded clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev) clf011.train(traindata) self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
# glmnet from R via RPy if externals.exists('glmnet'): from mvpa.clfs.glmnet import GLMNET_C, GLMNET_R clfswh += GLMNET_C(descr="GLMNET_C()") regrswh += GLMNET_R(descr="GLMNET_R()") # kNN clfswh += kNN(k=5, descr="kNN(k=5)") clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1.0, implementation="C"), postproc=maxofabs_sample()), RangeElementSelector(mode='select')), descr="kNN on SMLR(lm=1) non-0") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), descr="kNN on 5%(ANOVA)") clfswh += \ FeatureSelectionClassifier( kNN(),
def svms_for_CombinedClassifier(): """For my iEEG study, I use a CombinedClassifier. The components are defined here""" clfrs = [] clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 500 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(300, mode='select', tail='upper')), descr="LinSVM on 300 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(200, mode='select', tail='upper')), descr="LinSVM on 200 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 100 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 500 (SVM)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(300, mode='select', tail='upper')), descr="LinSVM on 300 (SVM)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(200, mode='select', tail='upper')), descr="LinSVM on 200 (SVM)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 100 (SVM)")) return clfrs