def test_remove_invariant_as_a_mapper(): from mvpa2.featsel.helpers import RangeElementSelector from mvpa2.featsel.base import StaticFeatureSelection, SensitivityBasedFeatureSelection from mvpa2.testing.datasets import datasets from mvpa2.datasets.miscfx import remove_invariant_features mapper = SensitivityBasedFeatureSelection( lambda x: np.std(x, axis=0), RangeElementSelector(lower=0, inclusive=False), train_analyzer=False, auto_train=True) ds = datasets['uni2large'].copy() ds.a['mapper'] = StaticFeatureSelection(np.arange(ds.nfeatures)) ds.fa['index'] = np.arange(ds.nfeatures) ds.samples[:, [1, 8]] = 10 ds_out = mapper(ds) # Validate that we are getting the same results as remove_invariant_features ds_rifs = remove_invariant_features(ds) assert_array_equal(ds_out.samples, ds_rifs.samples) assert_array_equal(ds_out.fa.index, ds_rifs.fa.index) assert_equal(ds_out.fa.index[1], 2) assert_equal(ds_out.fa.index[8], 10)
def test_union_feature_selection(self): # two methods: 5% highes F-scores, non-zero SMLR weights fss = [SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1, implementation="C"), postproc=sumofabs_sample()), RangeElementSelector(mode='select'))] fs = CombinedFeatureSelection(fss, method='union') od_union = fs(self.dataset) self.assertTrue(fs.method == 'union') # check output dataset self.assertTrue(od_union.nfeatures <= self.dataset.nfeatures) # again for intersection fs = CombinedFeatureSelection(fss, method='intersection') od_intersect = fs(self.dataset) assert_true(od_intersect.nfeatures < od_union.nfeatures)
def test_feature_selector(self): """Test feature selector""" # remove 10% weekest selector = FractionTailSelector(0.1) data = np.array([3.5, 10, 7, 5, -0.4, 0, 0, 2, 10, 9]) # == rank [4, 5, 6, 7, 0, 3, 2, 9, 1, 8] target10 = np.array([0, 1, 2, 3, 5, 6, 7, 8, 9]) target30 = np.array([0, 1, 2, 3, 7, 8, 9]) self.assertRaises(UnknownStateError, selector.ca.__getattribute__, 'ndiscarded') self.assertTrue((selector(data) == target10).all()) selector.felements = 0.30 # discard 30% self.assertTrue(selector.felements == 0.3) self.assertTrue((selector(data) == target30).all()) self.assertTrue(selector.ca.ndiscarded == 3) # se 3 were discarded selector = FixedNElementTailSelector(1) # 0 1 2 3 4 5 6 7 8 9 data = np.array([3.5, 10, 7, 5, -0.4, 0, 0, 2, 10, 9]) self.assertTrue((selector(data) == target10).all()) selector.nelements = 3 self.assertTrue(selector.nelements == 3) self.assertTrue((selector(data) == target30).all()) self.assertTrue(selector.ca.ndiscarded == 3) # test range selector # simple range 'above' self.assertTrue((RangeElementSelector(lower=0)(data) == \ np.array([0,1,2,3,7,8,9])).all()) self.assertTrue((RangeElementSelector(lower=0, inclusive=True)(data) == \ np.array([0,1,2,3,5,6,7,8,9])).all()) self.assertTrue((RangeElementSelector(lower=0, mode='discard', inclusive=True)(data) == \ np.array([4])).all()) # simple range 'below' self.assertTrue((RangeElementSelector(upper=2)(data) == \ np.array([4,5,6])).all()) self.assertTrue((RangeElementSelector(upper=2, inclusive=True)(data) == \ np.array([4,5,6,7])).all()) self.assertTrue((RangeElementSelector(upper=2, mode='discard', inclusive=True)(data) == \ np.array([0,1,2,3,8,9])).all()) # ranges self.assertTrue((RangeElementSelector(lower=2, upper=9)(data) == \ np.array([0,2,3])).all()) self.assertTrue((RangeElementSelector(lower=2, upper=9, inclusive=True)(data) == \ np.array([0,2,3,7,9])).all()) self.assertTrue((RangeElementSelector( upper=2, lower=9, mode='discard', inclusive=True)(data) == RangeElementSelector( lower=2, upper=9, inclusive=False)(data)).all()) # non-0 elements -- should be equivalent to np.nonzero()[0] self.assertTrue((RangeElementSelector()(data) == \ np.nonzero(data)[0]).all())
regrswh += [_lasso_lars_ic] clfswh += [ RegressionAsClassifier(_lasso_lars_ic, descr='skl.LassoLarsIC_C()') ] # kNN clfswh += kNN(k=5, descr="kNN(k=5)") clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1.0, implementation="C"), postproc=maxofabs_sample()), RangeElementSelector(mode='select')), descr="kNN on SMLR(lm=1) non-0") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), descr="kNN on 5%(ANOVA)") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( OneWayAnova(),