Ejemplo n.º 1
0
    def test_feature_selection_classifier_with_regression(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector
        if sample_clf_reg is None:
            # none regression was found, so nothing to test
            return
        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(sens_ana,
            FixedNElementTailSelector(1, mode='discard'))

        # now test with regression-based classifier. The problem is
        # that it is determining predictions twice from values and
        # then setting the values from the results, which the second
        # time is set to predictions.  The final outcome is that the
        # values are actually predictions...
        dat = dataset_wizard(samples=np.random.randn(4, 10),
                      targets=[-1, -1, 1, 1])
        clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel)
        clf_reg.train(dat)
        _ = clf_reg.predict(dat.samples)
        self.failIf((np.array(clf_reg.ca.estimates)
                     - clf_reg.ca.predictions).sum()==0,
                    msg="Values were set to the predictions in %s." %
                    sample_clf_reg)
Ejemplo n.º 2
0
    def test_feature_selection_classifier_with_regression(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector
        if sample_clf_reg is None:
            # none regression was found, so nothing to test
            return
        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(
            sens_ana, FixedNElementTailSelector(1, mode='discard'))

        # now test with regression-based classifier. The problem is
        # that it is determining predictions twice from values and
        # then setting the values from the results, which the second
        # time is set to predictions.  The final outcome is that the
        # values are actually predictions...
        dat = dataset_wizard(samples=np.random.randn(4, 10),
                             targets=[-1, -1, 1, 1])
        clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel)
        clf_reg.train(dat)
        _ = clf_reg.predict(dat.samples)
        self.failIf(
            (np.array(clf_reg.ca.estimates) -
             clf_reg.ca.predictions).sum() == 0,
            msg="Values were set to the predictions in %s." % sample_clf_reg)
Ejemplo n.º 3
0
def some_svms():
    """Returns a couple of FeatureSelectionClassifiers
    based on SVMs with different numbers of features and/or
    sensitivity measure"""
    clfr1 = FeatureSelectionClassifier(SVM(descr="libsvm.LinSVM(C=def)",
                                           probability=1),
                                       SensitivityBasedFeatureSelection(
                                           OneWayAnova(),
                                           FixedNElementTailSelector(
                                               500,
                                               mode='select',
                                               tail='upper')),
                                       descr="LinSVM on 500 (ANOVA)")
    clfr2 = FeatureSelectionClassifier(
        SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        SensitivityBasedFeatureSelection(
            SVM().getSensitivityAnalyzer(transformer=Absolute),
            FixedNElementTailSelector(500, mode='select', tail='upper')),
        descr="LinSVM on 500 (SVM)")
    clfr3 = SVM()
    clfr4 = FeatureSelectionClassifier(
        SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        SensitivityBasedFeatureSelection(
            SVM().getSensitivityAnalyzer(transformer=Absolute),
            FractionTailSelector(0.05, mode='select', tail='upper'),
        ),
        descr="LinSVM on 5 % (SVM)")
    return [clfr1, clfr2, clfr3, clfr3]
Ejemplo n.º 4
0
    def test_feature_selection_classifier(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector

        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()
        # should give lowest weight to the feature with highest index
        sens_ana_rev = SillySensitivityAnalyzer(mult=-1)

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(
            sens_ana, FixedNElementTailSelector(1, mode='discard'))

        feat_sel_rev = SensitivityBasedFeatureSelection(
            sens_ana_rev, FixedNElementTailSelector(1))

        samples = np.array([[0, 0, -1], [1, 0, 1], [-1, -1, 1], [-1, 0, 1],
                            [1, -1, 1]])

        testdata3 = dataset_wizard(samples=samples, targets=1)
        # dummy train data so proper mapper gets created
        traindata = dataset_wizard(samples=np.array([[0, 0, -1], [1, 0, 1]]),
                                   targets=[1, 2])

        # targets
        res110 = [1, 1, 1, -1, -1]
        res011 = [-1, 1, -1, 1, -1]

        # first classifier -- 0th feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign,
                                            feat_sel,
                                            enable_ca=['feature_ids'])

        self.clf_sign.ca.change_temporarily(enable_ca=['estimates'])
        clf011.train(traindata)

        self.failUnlessEqual(clf011.predict(testdata3.samples), res011)
        # just silly test if we get values assigned in the 'ProxyClassifier'
        self.failUnless(len(clf011.ca.estimates) == len(res110),
                        msg="We need to pass values into ProxyClassifier")
        self.clf_sign.ca.reset_changed_temporarily()

        self.failUnlessEqual(len(clf011.ca.feature_ids), 2)
        "Feature selection classifier had to be trained on 2 features"

        # first classifier -- last feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
        clf011.train(traindata)
        self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
Ejemplo n.º 5
0
    def __test_matthias_question(self):
        rfe_clf = LinearCSVMC(C=1)

        rfesvm_split = SplitClassifier(rfe_clf)
        clf = \
            FeatureSelectionClassifier(
            clf = LinearCSVMC(C=1),
            feature_selection = RFE(
                sensitivity_analyzer = rfesvm_split.get_sensitivity_analyzer(
                    combiner=first_axis_mean,
                    transformer=np.abs),
                transfer_error=ConfusionBasedError(
                    rfesvm_split,
                    confusion_state="confusion"),
                stopping_criterion=FixedErrorThresholdStopCrit(0.20),
                feature_selector=FractionTailSelector(
                    0.2, mode='discard', tail='lower'),
                update_sensitivity=True))

        splitter = NFoldSplitter(cvtype=1)
        no_permutations = 1000

        cv = CrossValidatedTransferError(
            TransferError(clf),
            splitter,
            null_dist=MCNullDist(permutations=no_permutations,
                                 tail='left'),
            enable_ca=['confusion'])
        error = cv(datasets['uni2small'])
        self.failUnless(error < 0.4)
        self.failUnless(cv.ca.null_prob < 0.05)
Ejemplo n.º 6
0
    def test_james_problem(self):
        percent = 80
        dataset = datasets['uni2small']
        rfesvm_split = LinearCSVMC()
        fs = \
            RFE(sensitivity_analyzer=rfesvm_split.get_sensitivity_analyzer(),
                transfer_error=TransferError(rfesvm_split),
                feature_selector=FractionTailSelector(
                    percent / 100.0,
                    mode='select', tail='upper'), update_sensitivity=True)

        clf = FeatureSelectionClassifier(
            clf = LinearCSVMC(),
            # on features selected via RFE
            feature_selection = fs)
             # update sensitivity at each step (since we're not using the
             # same CLF as sensitivity analyzer)
        clf.ca.enable('feature_ids')

        cv = CrossValidatedTransferError(
            TransferError(clf),
            NFoldSplitter(cvtype=1),
            postproc=mean_sample(),
            enable_ca=['confusion'],
            expose_testdataset=True)
        #cv = SplitClassifier(clf)
        try:
            error = cv(dataset).samples.squeeze()
        except Exception, e:
            self.fail('CrossValidation cannot handle classifier with RFE '
                      'feature selection. Got exception: %s' % (e,))
Ejemplo n.º 7
0
    def test_mapped_classifier_sensitivity_analyzer(self, clf):
        """Test sensitivity of the mapped classifier
        """
        # Assuming many defaults it is as simple as
        mclf = FeatureSelectionClassifier(
            clf,
            SensitivityBasedFeatureSelection(
                OneWayAnova(),
                FractionTailSelector(0.5, mode='select', tail='upper')),
            enable_ca=['training_stats'])

        sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(),
                                           enable_ca=["sensitivities"])
        # and lets look at all sensitivities
        dataset = datasets['uni2medium']
        # and we get sensitivity analyzer which works on splits
        sens = sana(dataset)
        self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
Ejemplo n.º 8
0
    def testMappedClassifierSensitivityAnalyzer(self, clf):
        """Test sensitivity of the mapped classifier
        """
        # Assuming many defaults it is as simple as
        mclf = FeatureSelectionClassifier(
            clf,
            SensitivityBasedFeatureSelection(
                OneWayAnova(),
                FractionTailSelector(0.5, mode='select', tail='upper')),
            enable_states=['training_confusion'])

        sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
                                           enable_states=["sensitivities"])
        # and lets look at all sensitivities

        dataset = datasets['uni2medium']
        # and we get sensitivity analyzer which works on splits
        map_ = sana(dataset)
        self.failUnlessEqual(len(map_), dataset.nfeatures)
Ejemplo n.º 9
0
    def test_mapped_classifier_sensitivity_analyzer(self, clf):
        """Test sensitivity of the mapped classifier
        """
        # Assuming many defaults it is as simple as
        mclf = FeatureSelectionClassifier(clf,
                                          SensitivityBasedFeatureSelection(
                                              OneWayAnova(),
                                              FractionTailSelector(
                                                  0.5,
                                                  mode='select',
                                                  tail='upper')),
                                          enable_ca=['training_confusion'])

        sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(),
                                             enable_ca=["sensitivities"])
        # and lets look at all sensitivities

        dataset = datasets['uni2medium']
        # and we get sensitivity analyzer which works on splits
        sens = sana(dataset)
        self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
Ejemplo n.º 10
0
    def test_feature_selection_classifier(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector

        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()
        # should give lowest weight to the feature with highest index
        sens_ana_rev = SillySensitivityAnalyzer(mult=-1)

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(sens_ana,
            FixedNElementTailSelector(1, mode='discard'))

        feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev,
            FixedNElementTailSelector(1))

        samples = np.array([ [0, 0, -1], [1, 0, 1], [-1, -1, 1],
                            [-1, 0, 1], [1, -1, 1] ])

        testdata3 = dataset_wizard(samples=samples, targets=1)
        # dummy train data so proper mapper gets created
        traindata = dataset_wizard(samples=np.array([ [0, 0, -1], [1, 0, 1] ]),
                            targets=[1, 2])

        # targets
        res110 = [1, 1, 1, -1, -1]
        res011 = [-1, 1, -1, 1, -1]

        # first classifier -- 0th feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel,
                    enable_ca=['feature_ids'])

        self.clf_sign.ca.change_temporarily(enable_ca=['estimates'])
        clf011.train(traindata)

        self.failUnlessEqual(clf011.predict(testdata3.samples), res011)
        # just silly test if we get values assigned in the 'ProxyClassifier'
        self.failUnless(len(clf011.ca.estimates) == len(res110),
                        msg="We need to pass values into ProxyClassifier")
        self.clf_sign.ca.reset_changed_temporarily()

        self.failUnlessEqual(clf011.mapper._oshape, (2,))
        "Feature selection classifier had to be trained on 2 features"

        # first classifier -- last feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
        clf011.train(traindata)
        self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
Ejemplo n.º 11
0
# glmnet from R via RPy
if externals.exists('glmnet'):
    from mvpa.clfs.glmnet import GLMNET_C, GLMNET_R
    clfswh += GLMNET_C(descr="GLMNET_C()")
    regrswh += GLMNET_R(descr="GLMNET_R()")

# kNN
clfswh += kNN(k=5, descr="kNN(k=5)")
clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           SMLRWeights(SMLR(lm=1.0, implementation="C"),
                       postproc=maxofabs_sample()),
           RangeElementSelector(mode='select')),
        descr="kNN on SMLR(lm=1) non-0")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FractionTailSelector(0.05, mode='select', tail='upper')),
        descr="kNN on 5%(ANOVA)")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
Ejemplo n.º 12
0
def svms_for_CombinedClassifier():
    """For my iEEG study, I use a CombinedClassifier. The components are defined here"""
    clfrs = []
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 500 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(300, mode='select', tail='upper')),
            descr="LinSVM on 300 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(200, mode='select', tail='upper')),
            descr="LinSVM on 200 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 100 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 500 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(300, mode='select', tail='upper')),
            descr="LinSVM on 300 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(200, mode='select', tail='upper')),
            descr="LinSVM on 200 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 100 (SVM)"))
    return clfrs