def test_split_featurewise_dataset_measure(self):
        ds = datasets['uni3small']
        sana = SplitFeaturewiseDatasetMeasure(
            analyzer=SMLR(fit_all_weights=True).get_sensitivity_analyzer(),
            splitter=NFoldSplitter(),
        )

        sens = sana(ds)
        # a sensitivity for each chunk and each label combination
        assert_equal(sens.shape, (len(ds.sa['chunks'].unique) *
                                  len(ds.sa['targets'].unique), ds.nfeatures))

        # Lets try more complex example with 'boosting'
        ds = datasets['uni3medium']
        ds.init_origids('samples')
        sana = SplitFeaturewiseDatasetMeasure(
            analyzer=SMLR(fit_all_weights=True).get_sensitivity_analyzer(),
            splitter=NoneSplitter(npertarget=0.25,
                                  mode='first',
                                  nrunspersplit=2),
            enable_ca=['splits', 'sensitivities'])
        sens = sana(ds)

        assert_equal(sens.shape,
                     (2 * len(ds.sa['targets'].unique), ds.nfeatures))
        splits = sana.ca.splits
        self.failUnlessEqual(len(splits), 2)
        self.failUnless(
            np.all([s[0].nsamples == ds.nsamples / 4 for s in splits]))
        # should have used different samples
        self.failUnless(
            np.any([splits[0][0].sa.origids != splits[1][0].sa.origids]))
        # and should have got different sensitivities
        self.failUnless(np.any(sens[0] != sens[1]))
Beispiel #2
0
    def test_smlr_sensitivities(self):
        data = normal_feature_dataset(perlabel=10, nlabels=2, nfeatures=4)

        # use SMLR on binary problem, but not fitting all weights
        clf = SMLR(fit_all_weights=False)
        clf.train(data)

        # now ask for the sensitivities WITHOUT having to pass the dataset
        # again
        sens = clf.get_sensitivity_analyzer(force_training=False)()
        self.failUnless(sens.shape == (len(data.UT) - 1, data.nfeatures))
Beispiel #3
0
    def test_smlr_state(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        clf.ca.enable('estimates')
        clf.ca.enable('predictions')

        p = np.asarray(clf.predict(data.samples))

        self.failUnless((p == clf.ca.predictions).all())
        self.failUnless(
            np.array(clf.ca.estimates).shape[0] == np.array(p).shape[0])
Beispiel #4
0
    def test_smlr(self):
        data = datasets['dumb']

        clf = SMLR()

        clf.train(data)

        # prediction has to be perfect
        #
        # XXX yoh: whos said that?? ;-)
        #
        # There is always a tradeoff between learning and
        # generalization errors so...  but in this case the problem is
        # more interesting: absent bias disallows to learn data you
        # have here -- there is no solution which would pass through
        # (0,0)
        predictions = clf.predict(data.samples)
        self.failUnless((predictions == data.targets).all())
    def test_union_feature_selection(self):
        # two methods: 5% highes F-scores, non-zero SMLR weights
        fss = [
            SensitivityBasedFeatureSelection(
                OneWayAnova(),
                FractionTailSelector(0.05, mode='select', tail='upper')),
            SensitivityBasedFeatureSelection(
                SMLRWeights(SMLR(lm=1, implementation="C"),
                            postproc=sumofabs_sample()),
                RangeElementSelector(mode='select'))
        ]

        fs = CombinedFeatureSelection(
            fss,
            combiner='union',
            enable_ca=['selected_ids', 'selections_ids'])

        od = fs(self.dataset)

        self.failUnless(fs.combiner == 'union')
        self.failUnless(len(fs.ca.selections_ids))
        self.failUnless(len(fs.ca.selections_ids) <= self.dataset.nfeatures)
        # should store one set per methods
        self.failUnless(len(fs.ca.selections_ids) == len(fss))
        # no individual can be larger than union
        for s in fs.ca.selections_ids:
            self.failUnless(len(s) <= len(fs.ca.selected_ids))
        # check output dataset
        self.failUnless(od.nfeatures == len(fs.ca.selected_ids))
        for i, id in enumerate(fs.ca.selected_ids):
            self.failUnless(
                (od.samples[:, i] == self.dataset.samples[:, id]).all())

        # again for intersection
        fs = CombinedFeatureSelection(
            fss,
            combiner='intersection',
            enable_ca=['selected_ids', 'selections_ids'])
        # simply run it for now -- can't think of additional tests
        od = fs(self.dataset)
Beispiel #6
0
        """Registered items
        """
        return self.__items


clfswh = Warehouse(known_tags=_KNOWN_INTERNALS)  # classifiers
regrswh = Warehouse(known_tags=_KNOWN_INTERNALS)  # regressions

# NB:
#  - Nu-classifiers are turned off since for haxby DS default nu
#    is an 'infisible' one
#  - Python's SMLR is turned off for the duration of development
#    since it is slow and results should be the same as of C version
#
clfswh += [
    SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
    SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
    #SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"),
    #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"),
    #SMLR(implementation="Python", descr="SMLR(Python)")
]

clfswh += \
     [ MulticlassClassifier(clfswh['smlr'][0],
                            descr='Pairs+maxvote multiclass on ' + \
                            clfswh['smlr'][0].descr) ]

if externals.exists('libsvm'):
    from mvpa.clfs import libsvmc as libsvm
    clfswh._known_tags.union_update(libsvm.SVM._KNOWN_IMPLEMENTATIONS.keys())
    clfswh += [
Beispiel #7
0
        FeaturewiseDatasetMeasure.__init__(self, **kwargs)
        self.__mult = mult

    def _call(self, dataset):
        """Train linear SVM on `dataset` and extract weights from classifier.
        """
        sens = self.__mult * (np.arange(dataset.nfeatures) -
                              int(dataset.nfeatures / 2))
        return Dataset(sens[np.newaxis])


# Sample universal classifiers (linear and non-linear) which should be
# used whenever it doesn't matter what classifier it is for testing
# some higher level creations -- chosen so it is the fastest universal
# one. Also it should not punch state.py in the face how it is
# happening with kNN...
sample_clf_lin = SMLR(lm=0.1)  #sg.svm.LinearCSVMC(svm_impl='libsvm')

#if externals.exists('shogun'):
#    sample_clf_nl = sg.SVM(kernel_type='RBF', svm_impl='libsvm')
#else:
#classical one which was used for a while
#and surprisingly it is not bad at all for the unittests
sample_clf_nl = kNN(k=5)

# and also a regression-based classifier
r = clfswh['linear', 'regression_based', 'has_sensitivity']
if len(r) > 0: sample_clf_reg = r[0]
else:
    sample_clf_reg = None