def test_SplitRFE(self): # just a smoke test ATM from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.misc.data_generators import normal_feature_dataset #import mvpa2.featsel.rfe #reload(mvpa2.featsel.rfe) from mvpa2.featsel.rfe import RFE, SplitRFE from mvpa2.generators.partition import NFoldPartitioner from mvpa2.featsel.helpers import FractionTailSelector from mvpa2.testing import ok_, assert_equal clf = LinearCSVMC(C=1) dataset = normal_feature_dataset(perlabel=20, nlabels=2, nfeatures=30, snr=1., nonbogus_features=[1, 5]) # flip one of the meaningful features around to see # if we are still getting proper selection dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1 # 4 partitions should be enough for testing partitioner = NFoldPartitioner(count=4) rfeclf = MappedClassifier( clf, SplitRFE(clf, partitioner, fselector=FractionTailSelector(0.2, mode='discard', tail='lower'))) r0 = repr(rfeclf) ok_(rfeclf.mapper.nfeatures_min == 0) rfeclf.train(dataset) ok_(rfeclf.mapper.nfeatures_min > 0) predictions = rfeclf(dataset).samples # at least 1 of the nonbogus-features should be chosen ok_( len( set(dataset.a.nonbogus_features).intersection( rfeclf.mapper.slicearg)) > 0) # check repr to have all needed pieces r = repr(rfeclf) s = str(rfeclf) ok_(('partitioner=NFoldP' in r) or ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r)) ok_('lrn=' in r) ok_(not 'slicearg=' in r) assert_equal(r, r0)
clfswh += \ FeatureSelectionClassifier( linearSVMC.clone(), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=0.1, implementation="C"), postproc=maxofabs_sample()), RangeElementSelector(mode='select')), descr="LinSVM on SMLR(lm=0.1) non-0") _rfeclf = linearSVMC.clone() clfswh += \ FeatureSelectionClassifier( _rfeclf, SplitRFE( _rfeclf, OddEvenPartitioner(), fselector=FractionTailSelector( 0.2, mode='discard', tail='lower')), descr="LinSVM with nested-CV RFE") _clfs_splitrfe_svm_anova = \ FeatureSelectionClassifier( linearSVMC.clone(), SplitRFE( linearSVMC.clone(), OddEvenPartitioner(), fselector=FractionTailSelector( 0.2, mode='discard', tail='lower'), fmeasure=OneWayAnova()), descr="LinSVM with nested-CV Anova RFE") clfswh += _clfs_splitrfe_svm_anova
clfswh += \ FeatureSelectionClassifier( linearSVMC.clone(), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=0.1, implementation="C"), postproc=maxofabs_sample()), RangeElementSelector(mode='select')), descr="LinSVM on SMLR(lm=0.1) non-0") _rfeclf = linearSVMC.clone() clfswh += \ FeatureSelectionClassifier( _rfeclf, SplitRFE( _rfeclf, OddEvenPartitioner(), fselector=FractionTailSelector( 0.2, mode='discard', tail='lower')), descr="LinSVM with nested-CV RFE") clfswh += \ FeatureSelectionClassifier( linearSVMC.clone(), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1.0, implementation="C"), postproc=maxofabs_sample()), RangeElementSelector(mode='select')), descr="LinSVM on SMLR(lm=1) non-0") # "Interesting" classifiers clfswh += \
def test_SplitRFE(self, fmeasure): # just a smoke test ATM from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.misc.data_generators import normal_feature_dataset #import mvpa2.featsel.rfe #reload(mvpa2.featsel.rfe) from mvpa2.featsel.rfe import RFE, SplitRFE from mvpa2.generators.partition import NFoldPartitioner from mvpa2.featsel.helpers import FractionTailSelector from mvpa2.testing import ok_, assert_equal clf = LinearCSVMC(C=1) dataset = normal_feature_dataset(perlabel=20, nlabels=2, nfeatures=11, snr=1., nonbogus_features=[1, 5]) # flip one of the meaningful features around to see # if we are still getting proper selection dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1 # 3 partitions should be enough for testing partitioner = NFoldPartitioner(count=3) rfeclf = MappedClassifier( clf, SplitRFE( clf, partitioner, fselector=FractionTailSelector(0.5, mode='discard', tail='lower'), fmeasure=fmeasure, # need to update only when using clf's sens anal update_sensitivity=fmeasure is None)) r0 = repr(rfeclf) ok_(rfeclf.mapper.nfeatures_min == 0) rfeclf.train(dataset) ok_(rfeclf.mapper.nfeatures_min > 0) predictions = rfeclf(dataset).samples # at least 1 of the nonbogus-features should be chosen ok_( len( set(dataset.a.nonbogus_features).intersection( rfeclf.mapper.slicearg)) > 0) # check repr to have all needed pieces r = repr(rfeclf) s = str(rfeclf) ok_(('partitioner=NFoldP' in r) or ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r)) ok_('lrn=' in r) ok_(not 'slicearg=' in r) assert_equal(r, r0) if externals.exists('joblib'): rfeclf.mapper.nproc = -1 # compare results against the one ran in parallel _slicearg = rfeclf.mapper.slicearg _predictions = predictions rfeclf.train(dataset) predictions = rfeclf(dataset).samples assert_array_equal(predictions, _predictions) assert_array_equal(_slicearg, rfeclf.mapper.slicearg) # Test that we can collect stats from cas within cross-validation sensitivities = [] nested_errors = [] nested_nfeatures = [] def store_me(data, node, result): sens = node.measure.get_sensitivity_analyzer( force_train=False)(data) sensitivities.append(sens) nested_errors.append(node.measure.mapper.ca.nested_errors) nested_nfeatures.append(node.measure.mapper.ca.nested_nfeatures) cv = CrossValidation(rfeclf, NFoldPartitioner(count=1), callback=store_me, enable_ca=['stats']) _ = cv(dataset) # just to make sure we collected them assert_equal(len(sensitivities), 1) assert_equal(len(nested_errors), 1) assert_equal(len(nested_nfeatures), 1)