Beispiel #1
0
def some_svms():
    """Returns a couple of FeatureSelectionClassifiers
    based on SVMs with different numbers of features and/or
    sensitivity measure"""
    clfr1 = FeatureSelectionClassifier(SVM(descr="libsvm.LinSVM(C=def)",
                                           probability=1),
                                       SensitivityBasedFeatureSelection(
                                           OneWayAnova(),
                                           FixedNElementTailSelector(
                                               500,
                                               mode='select',
                                               tail='upper')),
                                       descr="LinSVM on 500 (ANOVA)")
    clfr2 = FeatureSelectionClassifier(
        SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        SensitivityBasedFeatureSelection(
            SVM().getSensitivityAnalyzer(transformer=Absolute),
            FixedNElementTailSelector(500, mode='select', tail='upper')),
        descr="LinSVM on 500 (SVM)")
    clfr3 = SVM()
    clfr4 = FeatureSelectionClassifier(
        SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        SensitivityBasedFeatureSelection(
            SVM().getSensitivityAnalyzer(transformer=Absolute),
            FractionTailSelector(0.05, mode='select', tail='upper'),
        ),
        descr="LinSVM on 5 % (SVM)")
    return [clfr1, clfr2, clfr3, clfr3]
Beispiel #2
0
    def test_sensitivity_based_feature_selection(self, clf):

        # sensitivity analyser and transfer error quantifier use the SAME clf!
        sens_ana = clf.get_sensitivity_analyzer(postproc=maxofabs_sample())

        # of features to remove
        Nremove = 2

        # because the clf is already trained when computing the sensitivity
        # map, prevent retraining for transfer error calculation
        # Use absolute of the svm weights as sensitivity
        fe = SensitivityBasedFeatureSelection(sens_ana,
                feature_selector=FixedNElementTailSelector(2),
                enable_ca=["sensitivity", "selected_ids"])

        data = self.get_data()

        data_nfeatures = data.nfeatures

        fe.train(data)
        resds = fe(data)

        # fail if orig datasets are changed
        self.failUnless(data.nfeatures == data_nfeatures)

        # silly check if nfeatures got a single one removed
        self.failUnlessEqual(data.nfeatures, resds.nfeatures+Nremove,
            msg="We had to remove just a single feature")

        self.failUnlessEqual(fe.ca.sensitivity.nfeatures, data_nfeatures,
            msg="Sensitivity have to have # of features equal to original")
Beispiel #3
0
    def test_feature_selection_classifier(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector

        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()
        # should give lowest weight to the feature with highest index
        sens_ana_rev = SillySensitivityAnalyzer(mult=-1)

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(
            sens_ana, FixedNElementTailSelector(1, mode='discard'))

        feat_sel_rev = SensitivityBasedFeatureSelection(
            sens_ana_rev, FixedNElementTailSelector(1))

        samples = np.array([[0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1],
                            [1, -1, 1]])

        testdata3 = dataset_wizard(samples=samples, targets=1)
        # dummy train data so proper mapper gets created
        traindata = dataset_wizard(samples=np.array([[0, 0, -1], [1, 0, 1]]),
                                   targets=[1, 2])

        # targets
        res110 = [1, 1, 1, -1, -1]
        res011 = [-1, 1, -1, 1, -1]

        # first classifier -- 0th feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign,
                                            feat_sel,
                                            enable_ca=['feature_ids'])

        self.clf_sign.ca.change_temporarily(enable_ca=['estimates'])
        clf011.train(traindata)

        self.failUnlessEqual(clf011.predict(testdata3.samples), res011)
        # just silly test if we get values assigned in the 'ProxyClassifier'
        self.failUnless(len(clf011.ca.estimates) == len(res110),
                        msg="We need to pass values into ProxyClassifier")
        self.clf_sign.ca.reset_changed_temporarily()

        self.failUnlessEqual(len(clf011.ca.feature_ids), 2)
        "Feature selection classifier had to be trained on 2 features"

        # first classifier -- last feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
        clf011.train(traindata)
        self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
Beispiel #4
0
    def test_feature_selection_classifier_with_regression(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector
        if sample_clf_reg is None:
            # none regression was found, so nothing to test
            return
        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(
            sens_ana, FixedNElementTailSelector(1, mode='discard'))

        # now test with regression-based classifier. The problem is
        # that it is determining predictions twice from values and
        # then setting the values from the results, which the second
        # time is set to predictions.  The final outcome is that the
        # values are actually predictions...
        dat = dataset_wizard(samples=np.random.randn(4, 10),
                             targets=[-1, -1, 1, 1])
        clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel)
        clf_reg.train(dat)
        _ = clf_reg.predict(dat.samples)
        self.failIf(
            (np.array(clf_reg.ca.estimates) -
             clf_reg.ca.predictions).sum() == 0,
            msg="Values were set to the predictions in %s." % sample_clf_reg)
Beispiel #5
0
    def test_feature_selection_pipeline(self):
        sens_ana = SillySensitivityAnalyzer()

        wdata = self.get_data()
        wdata_nfeatures = wdata.nfeatures
        tdata = self.get_data_t()
        tdata_nfeatures = tdata.nfeatures

        # test silly one first ;-)
        self.failUnlessEqual(sens_ana(wdata).samples[0,0], -int(wdata_nfeatures/2))

        # OLD: first remove 25% == 6, and then 4, total removing 10
        # NOW: test should be independent of the numerical number of features
        feature_selections = [SensitivityBasedFeatureSelection(
                                sens_ana,
                                FractionTailSelector(0.25)),
                              SensitivityBasedFeatureSelection(
                                sens_ana,
                                FixedNElementTailSelector(4))
                              ]

        # create a FeatureSelection pipeline
        feat_sel_pipeline = FeatureSelectionPipeline(
            feature_selections=feature_selections,
            enable_ca=['nfeatures', 'selected_ids'])

        sdata, stdata = feat_sel_pipeline(wdata, tdata)

        self.failUnlessEqual(len(feat_sel_pipeline.feature_selections),
                             len(feature_selections),
                             msg="Test the property feature_selections")

        desired_nfeatures = int(np.ceil(wdata_nfeatures*0.75))
        self.failUnlessEqual(feat_sel_pipeline.ca.nfeatures,
                             [wdata_nfeatures, desired_nfeatures],
                             msg="Test if nfeatures get assigned properly."
                             " Got %s!=%s" % (feat_sel_pipeline.ca.nfeatures,
                                              [wdata_nfeatures, desired_nfeatures]))

        self.failUnlessEqual(list(feat_sel_pipeline.ca.selected_ids),
                             range(int(wdata_nfeatures*0.25)+4, wdata_nfeatures))
    def test_union_feature_selection(self):
        # two methods: 5% highes F-scores, non-zero SMLR weights
        fss = [
            SensitivityBasedFeatureSelection(
                OneWayAnova(),
                FractionTailSelector(0.05, mode='select', tail='upper')),
            SensitivityBasedFeatureSelection(
                SMLRWeights(SMLR(lm=1, implementation="C"),
                            postproc=sumofabs_sample()),
                RangeElementSelector(mode='select'))
        ]

        fs = CombinedFeatureSelection(
            fss,
            combiner='union',
            enable_ca=['selected_ids', 'selections_ids'])

        od = fs(self.dataset)

        self.failUnless(fs.combiner == 'union')
        self.failUnless(len(fs.ca.selections_ids))
        self.failUnless(len(fs.ca.selections_ids) <= self.dataset.nfeatures)
        # should store one set per methods
        self.failUnless(len(fs.ca.selections_ids) == len(fss))
        # no individual can be larger than union
        for s in fs.ca.selections_ids:
            self.failUnless(len(s) <= len(fs.ca.selected_ids))
        # check output dataset
        self.failUnless(od.nfeatures == len(fs.ca.selected_ids))
        for i, id in enumerate(fs.ca.selected_ids):
            self.failUnless(
                (od.samples[:, i] == self.dataset.samples[:, id]).all())

        # again for intersection
        fs = CombinedFeatureSelection(
            fss,
            combiner='intersection',
            enable_ca=['selected_ids', 'selections_ids'])
        # simply run it for now -- can't think of additional tests
        od = fs(self.dataset)
Beispiel #7
0
    def test_sensitivity_based_feature_selection(self, clf):

        # sensitivity analyser and transfer error quantifier use the SAME clf!
        sens_ana = clf.get_sensitivity_analyzer(postproc=maxofabs_sample())

        # of features to remove
        Nremove = 2

        # because the clf is already trained when computing the sensitivity
        # map, prevent retraining for transfer error calculation
        # Use absolute of the svm weights as sensitivity
        fe = SensitivityBasedFeatureSelection(sens_ana,
                feature_selector=FixedNElementTailSelector(2),
                enable_ca=["sensitivity", "selected_ids"])

        wdata = self.get_data()
        tdata = self.get_data_t()
        # XXX for now convert to numeric labels, but should better be taken
        # care of during clf refactoring
        am = AttributeMap()
        wdata.targets = am.to_numeric(wdata.targets)
        tdata.targets = am.to_numeric(tdata.targets)

        wdata_nfeatures = wdata.nfeatures
        tdata_nfeatures = tdata.nfeatures

        sdata, stdata = fe(wdata, tdata)

        # fail if orig datasets are changed
        self.failUnless(wdata.nfeatures == wdata_nfeatures)
        self.failUnless(tdata.nfeatures == tdata_nfeatures)

        # silly check if nfeatures got a single one removed
        self.failUnlessEqual(wdata.nfeatures, sdata.nfeatures+Nremove,
            msg="We had to remove just a single feature")

        self.failUnlessEqual(tdata.nfeatures, stdata.nfeatures+Nremove,
            msg="We had to remove just a single feature in testing as well")

        self.failUnlessEqual(fe.ca.sensitivity.nfeatures, wdata_nfeatures,
            msg="Sensitivity have to have # of features equal to original")

        self.failUnlessEqual(len(fe.ca.selected_ids), sdata.nfeatures,
            msg="# of selected features must be equal the one in the result dataset")
    def test_mapped_classifier_sensitivity_analyzer(self, clf):
        """Test sensitivity of the mapped classifier
        """
        # Assuming many defaults it is as simple as
        mclf = FeatureSelectionClassifier(clf,
                                          SensitivityBasedFeatureSelection(
                                              OneWayAnova(),
                                              FractionTailSelector(
                                                  0.5,
                                                  mode='select',
                                                  tail='upper')),
                                          enable_ca=['training_confusion'])

        sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(),
                                             enable_ca=["sensitivities"])
        # and lets look at all sensitivities

        dataset = datasets['uni2medium']
        # and we get sensitivity analyzer which works on splits
        sens = sana(dataset)
        self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
Beispiel #9
0
# glmnet from R via RPy
if externals.exists('glmnet'):
    from mvpa.clfs.glmnet import GLMNET_C, GLMNET_R
    clfswh += GLMNET_C(descr="GLMNET_C()")
    regrswh += GLMNET_R(descr="GLMNET_R()")

# kNN
clfswh += kNN(k=5, descr="kNN(k=5)")
clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           SMLRWeights(SMLR(lm=1.0, implementation="C"),
                       postproc=maxofabs_sample()),
           RangeElementSelector(mode='select')),
        descr="kNN on SMLR(lm=1) non-0")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FractionTailSelector(0.05, mode='select', tail='upper')),
        descr="kNN on 5%(ANOVA)")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
Beispiel #10
0
def svms_for_CombinedClassifier():
    """For my iEEG study, I use a CombinedClassifier. The components are defined here"""
    clfrs = []
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 500 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(300, mode='select', tail='upper')),
            descr="LinSVM on 300 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(200, mode='select', tail='upper')),
            descr="LinSVM on 200 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 100 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 500 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(300, mode='select', tail='upper')),
            descr="LinSVM on 300 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(200, mode='select', tail='upper')),
            descr="LinSVM on 200 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 100 (SVM)"))
    return clfrs