예제 #1
0
    def test_null_dist_prob(self, null):
        """Testing null dist probability"""
        if not isinstance(null, NullDist):
            return
        ds = datasets['uni2small']

        null.fit(OneWayAnova(), ds)

        # check reasonable output.
        # p-values for non-bogus features should significantly different,
        # while bogus (0) not
        prob = null.p([20, 0, 0, 0, 0, np.nan])
        # XXX this is labile! it also needs checking since the F-scores
        # of the MCNullDists using normal distribution are apparently not
        # distributed that way, hence the test often (if not always) fails.
        if cfg.getboolean('tests', 'labile', default='yes'):
            self.failUnless(np.abs(prob[0]) < 0.05,
                            msg="Expected small p, got %g" % prob[0])
        if cfg.getboolean('tests', 'labile', default='yes'):
            self.failUnless((np.abs(prob[1:]) > 0.05).all(),
                            msg="Bogus features should have insignificant p."
                            " Got %s" % (np.abs(prob[1:]), ))

        # has to have matching shape
        if not isinstance(null, FixedNullDist):
            # Fixed dist is univariate ATM so it doesn't care
            # about dimensionality and gives 1 output value
            self.failUnlessRaises(ValueError, null.p, [5, 3, 4])
예제 #2
0
def some_svms():
    """Returns a couple of FeatureSelectionClassifiers
    based on SVMs with different numbers of features and/or
    sensitivity measure"""
    clfr1 = FeatureSelectionClassifier(SVM(descr="libsvm.LinSVM(C=def)",
                                           probability=1),
                                       SensitivityBasedFeatureSelection(
                                           OneWayAnova(),
                                           FixedNElementTailSelector(
                                               500,
                                               mode='select',
                                               tail='upper')),
                                       descr="LinSVM on 500 (ANOVA)")
    clfr2 = FeatureSelectionClassifier(
        SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        SensitivityBasedFeatureSelection(
            SVM().getSensitivityAnalyzer(transformer=Absolute),
            FixedNElementTailSelector(500, mode='select', tail='upper')),
        descr="LinSVM on 500 (SVM)")
    clfr3 = SVM()
    clfr4 = FeatureSelectionClassifier(
        SVM(descr="libsvm.LinSVM(C=def)", probability=1),
        SensitivityBasedFeatureSelection(
            SVM().getSensitivityAnalyzer(transformer=Absolute),
            FractionTailSelector(0.05, mode='select', tail='upper'),
        ),
        descr="LinSVM on 5 % (SVM)")
    return [clfr1, clfr2, clfr3, clfr3]
예제 #3
0
    def test_anova(self):
        """Additional aspects of OnewayAnova
        """
        oa = OneWayAnova()
        oa_custom = OneWayAnova(targets_attr='custom')

        ds = datasets['uni4large']
        ds_custom = Dataset(ds.samples, sa={'custom': ds.targets})

        r = oa(ds)
        self.failUnlessRaises(KeyError, oa_custom, ds)
        r_custom = oa_custom(ds_custom)

        self.failUnless(np.allclose(r.samples, r_custom.samples))

        # we should get the same results on subsequent runs
        r2 = oa(ds)
        r_custom2 = oa_custom(ds_custom)
        self.failUnless(np.allclose(r.samples, r2.samples))
        self.failUnless(np.allclose(r_custom.samples, r_custom2.samples))
예제 #4
0
def test_features01():
    from mvpa.testing.datasets import datasets
    from mvpa.measures.anova import OneWayAnova
    # TODO: might be worth creating appropriate factory
    #       help in mappers/fx
    aov = OneWayAnova(
        postproc=FxMapper('features',
                          lambda x: x / x.max(),
                          attrfx=None))
    f = aov(datasets['uni2small'])
    ok_((f.samples != 1.0).any())
    ok_(f.samples.max() == 1.0)
예제 #5
0
    def test_mapped_classifier_sensitivity_analyzer(self, clf):
        """Test sensitivity of the mapped classifier
        """
        # Assuming many defaults it is as simple as
        mclf = FeatureSelectionClassifier(clf,
                                          SensitivityBasedFeatureSelection(
                                              OneWayAnova(),
                                              FractionTailSelector(
                                                  0.5,
                                                  mode='select',
                                                  tail='upper')),
                                          enable_ca=['training_confusion'])

        sana = mclf.get_sensitivity_analyzer(postproc=sumofabs_sample(),
                                             enable_ca=["sensitivities"])
        # and lets look at all sensitivities

        dataset = datasets['uni2medium']
        # and we get sensitivity analyzer which works on splits
        sens = sana(dataset)
        self.failUnlessEqual(sens.shape, (1, dataset.nfeatures))
예제 #6
0
    def test_union_feature_selection(self):
        # two methods: 5% highes F-scores, non-zero SMLR weights
        fss = [
            SensitivityBasedFeatureSelection(
                OneWayAnova(),
                FractionTailSelector(0.05, mode='select', tail='upper')),
            SensitivityBasedFeatureSelection(
                SMLRWeights(SMLR(lm=1, implementation="C"),
                            postproc=sumofabs_sample()),
                RangeElementSelector(mode='select'))
        ]

        fs = CombinedFeatureSelection(
            fss,
            combiner='union',
            enable_ca=['selected_ids', 'selections_ids'])

        od = fs(self.dataset)

        self.failUnless(fs.combiner == 'union')
        self.failUnless(len(fs.ca.selections_ids))
        self.failUnless(len(fs.ca.selections_ids) <= self.dataset.nfeatures)
        # should store one set per methods
        self.failUnless(len(fs.ca.selections_ids) == len(fss))
        # no individual can be larger than union
        for s in fs.ca.selections_ids:
            self.failUnless(len(s) <= len(fs.ca.selected_ids))
        # check output dataset
        self.failUnless(od.nfeatures == len(fs.ca.selected_ids))
        for i, id in enumerate(fs.ca.selected_ids):
            self.failUnless(
                (od.samples[:, i] == self.dataset.samples[:, id]).all())

        # again for intersection
        fs = CombinedFeatureSelection(
            fss,
            combiner='intersection',
            enable_ca=['selected_ids', 'selections_ids'])
        # simply run it for now -- can't think of additional tests
        od = fs(self.dataset)
예제 #7
0
    def test_anova(self):
        """Do some extended testing of OneWayAnova

        in particular -- compound estimation
        """

        m = OneWayAnova()  # default must be not compound ?
        mc = CompoundOneWayAnova()
        ds = datasets['uni2medium']

        # For 2 labels it must be identical for both and equal to
        # simple OneWayAnova
        a, ac = m(ds), mc(ds)

        self.failUnless(a.shape == (1, ds.nfeatures))
        self.failUnless(ac.shape == (len(ds.UT), ds.nfeatures))

        assert_array_equal(ac[0], ac[1])
        assert_array_equal(a, ac[1])

        # check for p-value attrs
        if externals.exists('scipy'):
            assert_true('fprob' in a.fa.keys())
            assert_equal(len(ac.fa), len(ac))

        ds = datasets['uni4large']
        ac = mc(ds)
        if cfg.getboolean('tests', 'labile', default='yes'):
            # All non-bogus features must be high for a corresponding feature
            self.failUnless(
                (ac.samples[np.arange(4),
                            np.array(ds.a.nonbogus_features)] >= 1).all())
        # All features should have slightly but different CompoundAnova
        # values. I really doubt that there will be a case when this
        # test would fail just to being 'labile'
        self.failUnless(np.max(np.std(ac, axis=1)) > 0,
                        msg='In compound anova, we should get different'
                        ' results for different labels. Got %s' % ac)
예제 #8
0
clfswh += kNN(k=5, voting='majority', descr="kNN(k=5, voting='majority')")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           SMLRWeights(SMLR(lm=1.0, implementation="C"),
                       postproc=maxofabs_sample()),
           RangeElementSelector(mode='select')),
        descr="kNN on SMLR(lm=1) non-0")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FractionTailSelector(0.05, mode='select', tail='upper')),
        descr="kNN on 5%(ANOVA)")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FixedNElementTailSelector(50, mode='select', tail='upper')),
        descr="kNN on 50(ANOVA)")

# GNB
clfswh += GNB(descr="GNB()")
clfswh += GNB(common_variance=True, descr="GNB(common_variance=True)")
clfswh += GNB(prior='uniform', descr="GNB(prior='uniform')")
예제 #9
0
def svms_for_CombinedClassifier():
    """For my iEEG study, I use a CombinedClassifier. The components are defined here"""
    clfrs = []
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 500 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(300, mode='select', tail='upper')),
            descr="LinSVM on 300 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(200, mode='select', tail='upper')),
            descr="LinSVM on 200 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                #SVM(descr = "libsvm.LinSVM(C=def)", probability = 1).getSensitivityAnalyzer(transformer=mvpa.misc.transformers.Absolute),
                OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 100 (Anova)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 500 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(300, mode='select', tail='upper')),
            descr="LinSVM on 300 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(200, mode='select', tail='upper')),
            descr="LinSVM on 200 (SVM)"))
    clfrs.append(
        FeatureSelectionClassifier(
            SVM(descr="libsvm.LinSVM(C=def)", probability=1),
            SensitivityBasedFeatureSelection(
                SVM(descr="libsvm.LinSVM(C=def)",
                    probability=1).getSensitivityAnalyzer(
                        transformer=mvpa.misc.transformers.Absolute),
                #OneWayAnova(),
                FixedNElementTailSelector(500, mode='select', tail='upper')),
            descr="LinSVM on 100 (SVM)"))
    return clfrs