def test_null_dist_prob(self, null): """Testing null dist probability""" if not isinstance(null, NullDist): return ds = datasets['uni2small'] null.fit(OneWayAnova(), ds) # check reasonable output. # p-values for non-bogus features should significantly different, # while bogus (0) not prob = null.p([20, 0, 0, 0, 0, np.nan]) # XXX this is labile! it also needs checking since the F-scores # of the MCNullDists using normal distribution are apparently not # distributed that way, hence the test often (if not always) fails. if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless(np.abs(prob[0]) < 0.05, msg="Expected small p, got %g" % prob[0]) if cfg.getboolean('tests', 'labile', default='yes'): self.failUnless((np.abs(prob[1:]) > 0.05).all(), msg="Bogus features should have insignificant p." " Got %s" % (np.abs(prob[1:]), )) # has to have matching shape if not isinstance(null, FixedNullDist): # Fixed dist is univariate ATM so it doesn't care # about dimensionality and gives 1 output value self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
def some_svms(): """Returns a couple of FeatureSelectionClassifiers based on SVMs with different numbers of features and/or sensitivity measure""" clfr1 = FeatureSelectionClassifier(SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( OneWayAnova(), FixedNElementTailSelector( 500, mode='select', tail='upper')), descr="LinSVM on 500 (ANOVA)") clfr2 = FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM().getSensitivityAnalyzer(transformer=Absolute), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 500 (SVM)") clfr3 = SVM() clfr4 = FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM().getSensitivityAnalyzer(transformer=Absolute), FractionTailSelector(0.05, mode='select', tail='upper'), ), descr="LinSVM on 5 % (SVM)") return [clfr1, clfr2, clfr3, clfr3]
def test_anova(self): """Additional aspects of OnewayAnova """ oa = OneWayAnova() oa_custom = OneWayAnova(targets_attr='custom') ds = datasets['uni4large'] ds_custom = Dataset(ds.samples, sa={'custom': ds.targets}) r = oa(ds) self.failUnlessRaises(KeyError, oa_custom, ds) r_custom = oa_custom(ds_custom) self.failUnless(np.allclose(r.samples, r_custom.samples)) # we should get the same results on subsequent runs r2 = oa(ds) r_custom2 = oa_custom(ds_custom) self.failUnless(np.allclose(r.samples, r2.samples)) self.failUnless(np.allclose(r_custom.samples, r_custom2.samples))
def test_features01(): from mvpa.testing.datasets import datasets from mvpa.measures.anova import OneWayAnova # TODO: might be worth creating appropriate factory # help in mappers/fx aov = OneWayAnova( postproc=FxMapper('features', lambda x: x / x.max(), attrfx=None)) f = aov(datasets['uni2small']) ok_((f.samples != 1.0).any()) ok_(f.samples.max() == 1.0)
def test_mapped_classifier_sensitivity_analyzer(self, clf): """Test sensitivity of the mapped classifier """ # Assuming many defaults it is as simple as mclf = FeatureSelectionClassifier(clf, SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector( 0.5, mode='select', tail='upper')), enable_ca=['training_confusion']) sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(), enable_ca=["sensitivities"]) # and lets look at all sensitivities dataset = datasets['uni2medium'] # and we get sensitivity analyzer which works on splits sens = sana(dataset) self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
def test_union_feature_selection(self): # two methods: 5% highes F-scores, non-zero SMLR weights fss = [ SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1, implementation="C"), postproc=sumofabs_sample()), RangeElementSelector(mode='select')) ] fs = CombinedFeatureSelection( fss, combiner='union', enable_ca=['selected_ids', 'selections_ids']) od = fs(self.dataset) self.failUnless(fs.combiner == 'union') self.failUnless(len(fs.ca.selections_ids)) self.failUnless(len(fs.ca.selections_ids) <= self.dataset.nfeatures) # should store one set per methods self.failUnless(len(fs.ca.selections_ids) == len(fss)) # no individual can be larger than union for s in fs.ca.selections_ids: self.failUnless(len(s) <= len(fs.ca.selected_ids)) # check output dataset self.failUnless(od.nfeatures == len(fs.ca.selected_ids)) for i, id in enumerate(fs.ca.selected_ids): self.failUnless( (od.samples[:, i] == self.dataset.samples[:, id]).all()) # again for intersection fs = CombinedFeatureSelection( fss, combiner='intersection', enable_ca=['selected_ids', 'selections_ids']) # simply run it for now -- can't think of additional tests od = fs(self.dataset)
def test_anova(self): """Do some extended testing of OneWayAnova in particular -- compound estimation """ m = OneWayAnova() # default must be not compound ? mc = CompoundOneWayAnova() ds = datasets['uni2medium'] # For 2 labels it must be identical for both and equal to # simple OneWayAnova a, ac = m(ds), mc(ds) self.failUnless(a.shape == (1, ds.nfeatures)) self.failUnless(ac.shape == (len(ds.UT), ds.nfeatures)) assert_array_equal(ac[0], ac[1]) assert_array_equal(a, ac[1]) # check for p-value attrs if externals.exists('scipy'): assert_true('fprob' in a.fa.keys()) assert_equal(len(ac.fa), len(ac)) ds = datasets['uni4large'] ac = mc(ds) if cfg.getboolean('tests', 'labile', default='yes'): # All non-bogus features must be high for a corresponding feature self.failUnless( (ac.samples[np.arange(4), np.array(ds.a.nonbogus_features)] >= 1).all()) # All features should have slightly but different CompoundAnova # values. I really doubt that there will be a case when this # test would fail just to being 'labile' self.failUnless(np.max(np.std(ac, axis=1)) > 0, msg='In compound anova, we should get different' ' results for different labels. Got %s' % ac)
clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( SMLRWeights(SMLR(lm=1.0, implementation="C"), postproc=maxofabs_sample()), RangeElementSelector(mode='select')), descr="kNN on SMLR(lm=1) non-0") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( OneWayAnova(), FractionTailSelector(0.05, mode='select', tail='upper')), descr="kNN on 5%(ANOVA)") clfswh += \ FeatureSelectionClassifier( kNN(), SensitivityBasedFeatureSelection( OneWayAnova(), FixedNElementTailSelector(50, mode='select', tail='upper')), descr="kNN on 50(ANOVA)") # GNB clfswh += GNB(descr="GNB()") clfswh += GNB(common_variance=True, descr="GNB(common_variance=True)") clfswh += GNB(prior='uniform', descr="GNB(prior='uniform')")
def svms_for_CombinedClassifier(): """For my iEEG study, I use a CombinedClassifier. The components are defined here""" clfrs = [] clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 500 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(300, mode='select', tail='upper')), descr="LinSVM on 300 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(200, mode='select', tail='upper')), descr="LinSVM on 200 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute), OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 100 (Anova)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 500 (SVM)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(300, mode='select', tail='upper')), descr="LinSVM on 300 (SVM)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(200, mode='select', tail='upper')), descr="LinSVM on 200 (SVM)")) clfrs.append( FeatureSelectionClassifier( SVM(descr="libsvm.LinSVM(C=def)", probability=1), SensitivityBasedFeatureSelection( SVM(descr="libsvm.LinSVM(C=def)", probability=1).getSensitivityAnalyzer( transformer=mvpa.misc.transformers.Absolute), #OneWayAnova(), FixedNElementTailSelector(500, mode='select', tail='upper')), descr="LinSVM on 100 (SVM)")) return clfrs