Example #1
0
def test_gnb_overflow():
    # https://github.com/PyMVPA/PyMVPA/issues/581
    gnb = GNB(
        enable_ca='estimates',
        #logprob=True,  # implemented only for True ATM
        normalize=True,
        # uncomment if interested to trigger:
        # guard_overflows=False,
    )

    # Having lots of features could trigger under/overflows
    ds = normal_feature_dataset(perlabel=4,
                                nlabels=2,
                                nfeatures=100000,
                                nchunks=2,
                                snr=5,
                                nonbogus_features=[0, 1])

    ds_train = ds[ds.chunks == ds.UC[0]]
    ds_test = ds[ds.chunks == ds.UC[1]]

    gnb.train(ds_train)
    res = gnb.predict(ds_test)
    res_est = gnb.ca.estimates

    probs = np.exp(res_est) if gnb.params.logprob else res_est

    assert np.all(np.isfinite(res_est))
    assert np.all(np.isfinite(probs))
    assert_equal(sorted(np.unique(probs)),
                 [0, 1])  # quantized into 0, 1 given this many samples
Example #2
0
def test_gnb_sensitivities():
    gnb = GNB(common_variance=True)
    ds = normal_feature_dataset(perlabel=4,
                                nlabels=3,
                                nfeatures=5,
                                nchunks=4,
                                snr=10,
                                nonbogus_features=[0, 1, 2]
                                )

    s = gnb.get_sensitivity_analyzer()(ds)
    assert_in('targets', s.sa)
    assert_equal(s.shape, (((len(ds.uniquetargets) * (len(ds.uniquetargets) - 1))/2), ds.nfeatures))
    # test zero variance case
    # set variance of feature to zero
    ds.samples[:,3]=0.3
    s_zerovar = gnb.get_sensitivity_analyzer()
    sens = s_zerovar(ds)
    assert_true(all(sens.samples[:, 3] == 0))

    # test whether tagging and untagging works
    assert 'has_sensitivity' in gnb.__tags__
    gnb.untrain()
    assert 'has_sensitivity' not in gnb.__tags__

    # test whether content of sensitivities makes rough sense
    # e.g.: sensitivity of first feature should be larger than of bogus last feature
    assert_true(abs(sens.samples[i, 0]) > abs(sens.samples[i, 4]) for i in range(np.shape(sens.samples)[0]))
Example #3
0
 def test_gnbsearchlight_matchaccuracy(self):
     # was not able to deal with custom errorfx collapsing samples
     # after 55e147e0bd30fbf4edede3faef3a15c6c65b33ea
     ds = datasets['3dmedium'].copy()
     ds.fa['voxel_indices'] = ds.fa.myspace
     sl_err = sphere_gnbsearchlight(GNB(),
                                    NFoldPartitioner(cvtype=1),
                                    radius=0)
     sl_acc = sphere_gnbsearchlight(GNB(),
                                    NFoldPartitioner(cvtype=1),
                                    radius=0,
                                    errorfx=mean_match_accuracy)
     assert_array_almost_equal(sl_err(ds), 1.0 - sl_acc(ds).samples)
Example #4
0
 def test_cached_qe_gnbsearchlight(self):
     ds1 = datasets['3dsmall'].copy(deep=True)
     qe = IndexQueryEngine(myspace=Sphere(2))
     cached_qe = CachedQueryEngine(qe)
     gnb_sl = GNBSearchlight(GNB(), NFoldPartitioner(), qe=cached_qe)
     res = gnb_sl(ds1)
     assert_false(cached_qe.ids is None)
Example #5
0
def test_chained_crossvalidation_searchlight():
    from mvpa2.clfs.gnb import GNB
    from mvpa2.clfs.meta import MappedClassifier
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.mappers.base import ChainMapper
    from mvpa2.mappers.base import Mapper
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.testing.datasets import datasets

    dataset = datasets['3dlarge'].copy()
    dataset.fa['voxel_indices'] = dataset.fa.myspace
    sample_clf = GNB()              # fast and deterministic

    class ZScoreFeaturesMapper(Mapper):
        """Very basic mapper which would take care about standardizing
        all features within each sample separately
        """
        def _forward_data(self, data):
            return (data - np.mean(data, axis=1)[:, None])/np.std(data, axis=1)[:, None]

    # only do partial to save time
    sl_kwargs = dict(radius=2, center_ids=[3, 50])
    clf_mapped = MappedClassifier(sample_clf, ZScoreFeaturesMapper())
    cv = CrossValidation(clf_mapped, NFoldPartitioner())
    sl = sphere_searchlight(cv, **sl_kwargs)
    results_mapped = sl(dataset)

    cv_chained = ChainMapper([ZScoreFeaturesMapper(auto_train=True),
                              CrossValidation(sample_clf, NFoldPartitioner())])
    sl_chained = sphere_searchlight(cv_chained, **sl_kwargs)
    results_chained = sl_chained(dataset)

    assert_array_equal(results_mapped, results_chained)
Example #6
0
 def test_gnbsearghlight_exclude_partition(self):
     # just a smoke test with a custom partitioner
     ds1 = datasets['3dsmall'].copy(deep=True)
     gnb_sl = GNBSearchlight(GNB(),
                             generator=CustomPartitioner([([0], [1])]),
                             qe=IndexQueryEngine(myspace=Sphere(2)),
                             errorfx=None)
     res = gnb_sl(ds1)
Example #7
0
    def test_gnb(self):
        gnb = GNB()
        gnb_nc = GNB(common_variance=False)
        gnb_n = GNB(normalize=True)
        gnb_n_nc = GNB(normalize=True, common_variance=False)
        gnb_lin = GNB(common_variance=True)

        ds = datasets['uni2medium']

        # Generic silly coverage just to assure that it works in all
        # possible scenarios:
        bools = (True, False)
        # There should be better way... heh
        for cv in bools:                # common_variance?
          for prior in ('uniform', 'laplacian_smoothing', 'ratio'):
            tp = None                   # predictions -- all above should
                                        # result in the same predictions
            for n in bools:             # normalized?
              for ls in bools:          # logspace?
                for es in ((), ('estimates')):
                    gnb_ = GNB(common_variance=cv,
                               prior=prior,
                               normalize=n,
                               logprob=ls,
                               enable_ca=es)
                    tm = TransferMeasure(gnb_, Splitter('train'))
                    predictions = tm(ds).samples[:,0]
                    if tp is None:
                        tp = predictions
                    assert_array_equal(predictions, tp)
                    # if normalized -- check if estimates are such
                    if n and 'estimates' in es:
                        v = gnb_.ca.estimates
                        if ls:          # in log space -- take exp ;)
                            v = np.exp(v)
                        d1 = np.sum(v, axis=1) - 1.0
                        self.assertTrue(np.max(np.abs(d1)) < 1e-5)
                    # smoke test to see whether invocation of sensitivity analyser blows
                    # if gnb classifier isn't linear, and to see whether it doesn't blow
                    # when it is linear.
                    if cv:
                        assert 'has_sensitivity' in gnb_.__tags__
                        gnb_.get_sensitivity_analyzer()
                    if not cv:
                        with self.assertRaises(NotImplementedError):
                            gnb_.get_sensitivity_analyzer()
Example #8
0
    def test_partial_searchlight_with_full_report(self):
        ds = self.dataset.copy()
        center_ids = np.zeros(ds.nfeatures, dtype='bool')
        center_ids[[3, 50]] = True
        ds.fa['center_ids'] = center_ids
        # compute N-1 cross-validation for each sphere
        cv = CrossValidation(GNB(), NFoldPartitioner())
        # contruct diameter 1 (or just radius 0) searchlight
        # one time give center ids as a list, the other one takes it from the
        # dataset itself
        sls = (
            sphere_searchlight(cv, radius=0, center_ids=[3, 50]),
            sphere_searchlight(None, radius=0, center_ids=[3, 50]),
            sphere_searchlight(cv, radius=0, center_ids='center_ids'),
        )
        for sl in sls:
            # assure that we could set cv post constructor
            if sl.datameasure is None:
                sl.datameasure = cv
            # run searchlight
            results = sl(ds)
            # only two spheres but error for all CV-folds
            self.assertEqual(results.shape, (len(self.dataset.UC), 2))
            # Test if results hold if we "set" a "new" datameasure
            sl.datameasure = CrossValidation(GNB(), NFoldPartitioner())
            results2 = sl(ds)
            assert_array_almost_equal(results, results2)

        # test if we graciously puke if center_ids are out of bounds
        dataset0 = ds[:, :50]  # so we have no 50th feature
        self.assertRaises(IndexError, sls[0], dataset0)
        # but it should be fine on the one that gets the ids from the dataset
        # itself
        results = sl(dataset0)
        assert_equal(results.nfeatures, 1)
        # check whether roi_seeds are correct
        sl = sphere_searchlight(lambda x: np.vstack(
            (x.fa.roi_seed, x.samples)),
                                radius=1,
                                add_center_fa=True,
                                center_ids=[12])
        res = sl(ds)
        assert_array_equal(
            res.samples[1:, res.samples[0].astype('bool')].squeeze(),
            ds.samples[:, 12])
Example #9
0
def test_searchlight_errors_per_trial():
    # To make sure that searchlight can return error/accuracy per trial
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.partition import OddEvenPartitioner
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.testing.datasets import datasets
    from mvpa2.misc.errorfx import prediction_target_matches

    dataset = datasets['3dsmall'].copy()
    # randomly permute samples so we break any random correspondence
    # to strengthen tests below
    sample_idx = np.arange(len(dataset))
    dataset = dataset[np.random.permutation(sample_idx)]

    dataset.sa.targets = ['L%d' % l for l in dataset.sa.targets]
    dataset.fa['voxel_indices'] = dataset.fa.myspace
    sample_clf = GNB()              # fast and deterministic

    part = OddEvenPartitioner()
    # only do partial to save time
    cv = CrossValidation(sample_clf, part, errorfx=None) #prediction_target_matches)
    # Just to compare error
    cv_error = CrossValidation(sample_clf, part)

    # Large searchlight radius so we get entire ROI, 2 centers just to make sure
    # that all stacking works correctly
    sl = sphere_searchlight(cv, radius=10, center_ids=[0, 1])
    results = sl(dataset)

    sl_gnb = sphere_gnbsearchlight(sample_clf, part, radius=10, errorfx=None,
                                   center_ids=[0, 1])
    results_gnbsl = sl_gnb(dataset)

    # inspect both results
    # verify that partitioning was done correctly
    partitions = list(part.generate(dataset))
    for res in (results, results_gnbsl):
        assert('targets' in res.sa.keys())  # should carry targets
        assert('cvfolds' in res.sa.keys())  # should carry cvfolds
        for ipart in xrange(len(partitions)):
            assert_array_equal(dataset[partitions[ipart].sa.partitions == 2].targets,
                               res.sa.targets[res.sa.cvfolds == ipart])

    assert_datasets_equal(results, results_gnbsl)

    # one "accuracy" per each trial
    assert_equal(results.shape, (len(dataset), 2))
    # with accuracies the same in both searchlights since the same
    # features were to be selected in both cases due too large radii
    errors_dataset = cv(dataset)
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 0])
    assert_array_equal(errors_dataset.samples[:, 0], results.samples[:, 1])
    # and error matching (up to precision) the one if we run with default error function
    assert_array_almost_equal(np.mean(results.targets[:, None] != results.samples, axis=0)[0],
                              np.mean(cv_error(dataset)))
Example #10
0
 def test_nblocks(self):
     skip_if_no_external('pprocess')
     # just a basic test to see that we are getting the same
     # results with different nblocks
     ds = datasets['3dsmall'].copy(deep=True)[:, :13]
     ds.fa['voxel_indices'] = ds.fa.myspace
     cv = CrossValidation(GNB(), OddEvenPartitioner())
     res1 = sphere_searchlight(cv, radius=1, nproc=2)(ds)
     res2 = sphere_searchlight(cv, radius=1, nproc=2, nblocks=5)(ds)
     assert_array_equal(res1, res2)
Example #11
0
    def test_splitter_gnbsearghlight(self):
        ds1 = datasets['3dsmall'].copy(deep=True)

        gnb_sl = GNBSearchlight(GNB(),
                                generator=CustomPartitioner([([0], [1])]),
                                qe=IndexQueryEngine(myspace=Sphere(2)),
                                splitter=Splitter(attr='partitions',
                                                  attr_values=[1, 2]),
                                errorfx=None)
        res = gnb_sl(ds1)
        assert_equal(res.nsamples, (ds1.chunks == 1).sum())
Example #12
0
    def test_gnbsearchlight_3partitions_and_splitter(self):
        ds = self.dataset[:, :20]
        # custom partitioner which provides 3 partitions
        part = CustomPartitioner([([2], [3], [1])])
        gnb_sl = sphere_gnbsearchlight(GNB(), part)
        res_gnb_sl = gnb_sl(ds)

        # compare results to full blown searchlight
        sl = sphere_searchlight(CrossValidation(GNB(), part))
        res_sl = sl(ds)

        assert_datasets_equal(res_gnb_sl, res_sl)

        # and theoretically for this simple single cross-validation we could
        # just use Splitter
        splitter = Splitter('chunks', [2, 3])
        # we have to put explicit None since can't become a kwarg in 1 day any
        # longer here
        gnb_sl_ = sphere_gnbsearchlight(GNB(), None, splitter=splitter)
        res_gnb_sl_ = gnb_sl_(ds)
        assert_datasets_equal(res_gnb_sl, res_gnb_sl_)
Example #13
0
def test_gnb_sensitivities():
    gnb = GNB(common_variance=True)
    ds = normal_feature_dataset(perlabel=4,
                                nlabels=3,
                                nfeatures=5,
                                nchunks=4,
                                snr=10,
                                nonbogus_features=[0, 1, 2])

    s = gnb.get_sensitivity_analyzer()(ds)
    assert_in('targets', s.sa)
    assert_equal(s.shape, (((len(ds.uniquetargets) *
                             (len(ds.uniquetargets) - 1)) / 2), ds.nfeatures))
    # test zero variance case
    # set variance of feature to zero
    ds.samples[:, 3] = 0.3
    s_zerovar = gnb.get_sensitivity_analyzer()
    sens = s_zerovar(ds)
    assert_true(all(sens.samples[:, 3] == 0))

    # test whether tagging and untagging works
    assert 'has_sensitivity' in gnb.__tags__
    gnb.untrain()
    assert 'has_sensitivity' not in gnb.__tags__

    # test whether content of sensitivities makes rough sense
    # e.g.: sensitivity of first feature should be larger than of bogus last feature
    assert_true(
        abs(sens.samples[i, 0]) > abs(sens.samples[i, 4])
        for i in range(np.shape(sens.samples)[0]))
Example #14
0
def _test_gnb_overflow_haxby():  # pragma: no cover
    # example from https://github.com/PyMVPA/PyMVPA/issues/581
    # a heavier version of the above test
    import os
    import numpy as np

    from mvpa2.datasets.sources.native import load_tutorial_data
    from mvpa2.clfs.gnb import GNB
    from mvpa2.measures.base import CrossValidation
    from mvpa2.generators.partition import HalfPartitioner
    from mvpa2.mappers.zscore import zscore
    from mvpa2.mappers.detrend import poly_detrend
    from mvpa2.datasets.miscfx import remove_invariant_features
    from mvpa2.testing.datasets import *

    datapath = '/usr/share/data/pymvpa2-tutorial/'
    haxby = load_tutorial_data(datapath,
                               roi='vt',
                               add_fa={
                                   'vt_thr_glm':
                                   os.path.join(datapath, 'haxby2001',
                                                'sub001', 'masks', 'orig',
                                                'vt.nii.gz')
                               })
    # poly_detrend(haxby, polyord=1, chunks_attr='chunks')
    haxby = haxby[np.array(
        [
            l in ['rest', 'scrambled']  # ''house', 'face']
            for l in haxby.targets
        ],
        dtype='bool')]
    #zscore(haxby, chunks_attr='chunks', param_est=('targets', ['rest']),
    #       dtype='float32')
    # haxby = haxby[haxby.sa.targets != 'rest']
    haxby = remove_invariant_features(haxby)

    clf = GNB(enable_ca='estimates', logprob=True, normalize=True)

    #clf.train(haxby)
    #clf.predict(haxby)
    # estimates a bit "overfit" to judge in the train/predict on the same data

    cv = CrossValidation(clf,
                         HalfPartitioner(attr='chunks'),
                         postproc=None,
                         enable_ca=['stats'])

    cv_results = cv(haxby)
    res1_est = clf.ca.estimates
    print "Estimates:\n", res1_est
    print "Exp(estimates):\n", np.round(np.exp(res1_est), 3)
    assert np.all(np.isfinite(res1_est))
Example #15
0
def test_gnb_sensitivities():
    gnb = GNB(common_variance=True)
    ds = normal_feature_dataset(perlabel=4,
                                nlabels=3,
                                nfeatures=5,
                                nchunks=4,
                                snr=20,
                                nonbogus_features=[0, 1, 2]
                                )

    s = gnb.get_sensitivity_analyzer()(ds)
    assert_in('targets', s.sa)
    assert_equal(s.shape, (((len(ds.uniquetargets) * (len(ds.uniquetargets) - 1))/2), ds.nfeatures))
    # test zero variance case
    # set variance of feature to zero
    ds.samples[:, 3] = 0.3
    s_zerovar = gnb.get_sensitivity_analyzer()
    sens = s_zerovar(ds)
    assert_equal(sens.T.dtype, 'O')  # we store pairs
    assert_equal(sens.T[0], ('L0', 'L1'))
    assert_true(all(sens.samples[:, 3] == 0))

    gnb.untrain()

    # test whether content of sensitivities makes rough sense
    # First feature has information only about L0, so it would be of
    # no use for L1 -vs- L2 classification, so we will go through each pair
    # and make sure that signs etc all correct for each pair.
    # This in principle should be a generic test for multiclass sensitivities
    abssens = abs(sens.samples)
    for (t1, t2), t1t2sens in zip(sens.T, sens.samples):
        # go from literal L1 to 1, L0 to 0 - corresponds to feature
        i1 = int(t1[1])
        i2 = int(t2[1])
        assert t1t2sens[i1] < 0
        assert t1t2sens[i2] > 0
        assert t1t2sens[i2] > t1t2sens[4]
Example #16
0
    def test_gnb(self):
        gnb = GNB()
        gnb_nc = GNB(common_variance=False)
        gnb_n = GNB(normalize=True)
        gnb_n_nc = GNB(normalize=True, common_variance=False)

        ds = datasets['uni2medium']

        # Generic silly coverage just to assure that it works in all
        # possible scenarios:
        bools = (True, False)
        # There should be better way... heh
        for cv in bools:  # common_variance?
            for prior in ('uniform', 'laplacian_smoothing', 'ratio'):
                tp = None  # predictions -- all above should
                # result in the same predictions
                for n in bools:  # normalized?
                    for ls in bools:  # logspace?
                        for es in ((), ('estimates')):
                            gnb_ = GNB(common_variance=cv,
                                       prior=prior,
                                       normalize=n,
                                       logprob=ls,
                                       enable_ca=es)
                            tm = TransferMeasure(gnb_, Splitter('train'))
                            predictions = tm(ds).samples[:, 0]
                            if tp is None:
                                tp = predictions
                            assert_array_equal(predictions, tp)
                            # if normalized -- check if estimates are such
                            if n and 'estimates' in es:
                                v = gnb_.ca.estimates
                                if ls:  # in log space -- take exp ;)
                                    v = np.exp(v)
                                d1 = np.sum(v, axis=1) - 1.0
                                self.assertTrue(np.max(np.abs(d1)) < 1e-5)
Example #17
0
    def test_confusionmatrix_nulldist(self):
        from mvpa2.clfs.gnb import GNB
        from mvpa2.clfs.transerror import ConfusionMatrixError
        from mvpa2.misc.data_generators import normal_feature_dataset
        for snr in [
                0.,
                2.,
        ]:
            ds = normal_feature_dataset(snr=snr,
                                        perlabel=42,
                                        nchunks=3,
                                        nonbogus_features=[0, 1],
                                        nfeatures=2)

            clf = GNB()
            num_perm = 50
            permutator = AttributePermutator('targets',
                                             limit='chunks',
                                             count=num_perm)
            cv = CrossValidation(
                clf,
                NFoldPartitioner(),
                errorfx=ConfusionMatrixError(labels=ds.sa['targets'].unique),
                postproc=mean_sample(),
                null_dist=MCNullDist(
                    permutator,
                    tail='right',  # because we now look at accuracy not error
                    enable_ca=['dist_samples']),
                enable_ca=['stats'])
            cmatrix = cv(ds)
            #print "Result:\n", cmatrix.samples
            cvnp = cv.ca.null_prob.samples
            #print cvnp
            self.assertTrue(cvnp.shape, (2, 2))
            if cfg.getboolean('tests', 'labile', default='yes'):
                if snr == 0.:
                    # all p should be high since no signal
                    assert_array_less(0.05, cvnp)
                else:
                    # diagonal p is low -- we have signal after all
                    assert_array_less(np.diag(cvnp), 0.05)
                    # off diagonals are high p since for them we would
                    # need to look at the other tail
                    assert_array_less(
                        0.9, cvnp[(np.array([0, 1]), np.array([1, 0]))])
Example #18
0
def test_confusion_as_node():
    from mvpa2.misc.data_generators import normal_feature_dataset
    from mvpa2.clfs.gnb import GNB
    from mvpa2.clfs.transerror import Confusion
    ds = normal_feature_dataset(snr=2.0,
                                perlabel=42,
                                nchunks=3,
                                nonbogus_features=[0, 1],
                                nfeatures=2)
    clf = GNB()
    cv = CrossValidation(clf,
                         NFoldPartitioner(),
                         errorfx=None,
                         postproc=Confusion(labels=ds.UT),
                         enable_ca=['stats'])
    res = cv(ds)
    # needs to be identical to CA
    assert_array_equal(res.samples, cv.ca.stats.matrix)
    assert_array_equal(res.sa.predictions, ds.UT)
    assert_array_equal(res.fa.targets, ds.UT)

    skip_if_no_external('scipy')

    from mvpa2.clfs.transerror import BayesConfusionHypothesis
    from mvpa2.base.node import ChainNode
    # same again, but this time with Bayesian hypothesis testing at the end
    cv = CrossValidation(clf,
                         NFoldPartitioner(),
                         errorfx=None,
                         postproc=ChainNode((Confusion(labels=ds.UT),
                                             BayesConfusionHypothesis())))
    res = cv(ds)
    # only two possible hypothesis with two classes
    assert_equals(len(res), 2)
    # the first hypothesis is the can't discriminate anything
    assert_equal(len(res.sa.hypothesis[0]), 1)
    assert_equal(len(res.sa.hypothesis[0][0]), 2)
    # and the hypothesis is actually less likely than the other one
    # (both classes can be distinguished)
    assert (np.e**res.samples[0, 0] < np.e**res.samples[1, 0])
Example #19
0
def test_gnb_sensitivities(logprob):
    gnb = GNB(common_variance=True, logprob=logprob)
    ds = normal_feature_dataset(perlabel=4,
                                nlabels=3,
                                nfeatures=5,
                                nchunks=4,
                                snr=20,
                                nonbogus_features=[0, 1, 2])

    s = gnb.get_sensitivity_analyzer()(ds)
    assert_in('targets', s.sa)
    assert_equal(s.shape, (((len(ds.uniquetargets) *
                             (len(ds.uniquetargets) - 1)) / 2), ds.nfeatures))
    # test zero variance case
    # set variance of feature to zero
    ds.samples[:, 3] = 0.3
    s_zerovar = gnb.get_sensitivity_analyzer()
    sens = s_zerovar(ds)
    assert_equal(sens.T.dtype, 'O')  # we store pairs
    assert_equal(sens.T[0], ('L0', 'L1'))
    assert_true(all(sens.samples[:, 3] == 0))

    gnb.untrain()

    # test whether content of sensitivities makes rough sense
    # First feature has information only about L0, so it would be of
    # no use for L1 -vs- L2 classification, so we will go through each pair
    # and make sure that signs etc all correct for each pair.
    # This in principle should be a generic test for multiclass sensitivities
    abssens = abs(sens.samples)
    for (t1, t2), t1t2sens in zip(sens.T, sens.samples):
        # go from literal L1 to 1, L0 to 0 - corresponds to feature
        i1 = int(t1[1])
        i2 = int(t2[1])
        assert t1t2sens[i1] < 0
        assert t1t2sens[i2] > 0
        assert t1t2sens[i2] > t1t2sens[4]
    def test_voxel_selection(self):
        '''Compare surface and volume based searchlight'''
        '''
        Tests to see whether results are identical for surface-based
        searchlight (just one plane; Euclidean distnace) and volume-based
        searchlight.

        Note that the current value is a float; if it were int, it would
        specify the number of voxels in each searchlight'''

        radius = 10.
        '''Define input filenames'''
        epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz')
        maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz')
        '''
        Use the EPI datafile to define a surface.
        The surface has as many nodes as there are voxels
        and is parallel to the volume 'slice'
        '''
        vg = volgeom.from_any(maskfn, mask_volume=True)

        aff = vg.affine
        nx, ny, nz = vg.shape[:3]
        '''Plane goes in x and y direction, so we take these vectors
        from the affine transformation matrix of the volume'''
        plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny)
        '''
        Simulate pial and white matter as just above and below
        the central plane
        '''
        normal_vec = aff[:3, 2]
        outer = plane + normal_vec
        inner = plane + -normal_vec
        '''
        Combine volume and surface information
        '''
        vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner)
        '''
        Run voxel selection with specified radius (in mm), using
        Euclidean distance measure
        '''
        surf_voxsel = surf_voxel_selection.voxel_selection(vsm,
                                                           radius,
                                                           distance_metric='e')
        '''Define the measure'''

        # run_slow=True would give an actual cross-validation with meaningful
        # accuracies. Because this is a unit-test only the number of voxels
        # in each searchlight is tested.
        run_slow = False

        if run_slow:
            meas = CrossValidation(GNB(),
                                   OddEvenPartitioner(),
                                   errorfx=lambda p, t: np.mean(p == t))
            postproc = mean_sample
        else:
            meas = _Voxel_Count_Measure()
            postproc = lambda x: x
        '''
        Surface analysis: define the query engine, cross validation,
        and searchlight
        '''
        surf_qe = SurfaceVerticesQueryEngine(surf_voxsel)
        surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc)
        '''
        new (Sep 2012): also test 'simple' queryengine wrapper function
        '''

        surf_qe2 = disc_surface_queryengine(radius,
                                            maskfn,
                                            inner,
                                            outer,
                                            plane,
                                            volume_mask=True,
                                            distance_metric='euclidean')
        surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc)
        '''
        Same for the volume analysis
        '''
        element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2])))
        sph = Sphere(radius, element_sizes=element_sizes)
        kwa = {'voxel_indices': sph}

        vol_qe = IndexQueryEngine(**kwa)
        vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc)
        '''The following steps are similar to start_easy.py'''
        attr = SampleAttributes(
            os.path.join(pymvpa_dataroot, 'attributes_literal.txt'))

        mask = surf_voxsel.get_mask()

        dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot,
                                                    'bold.nii.gz'),
                               targets=attr.targets,
                               chunks=attr.chunks,
                               mask=mask)

        if run_slow:
            # do chunkswise linear detrending on dataset

            poly_detrend(dataset, polyord=1, chunks_attr='chunks')

            # zscore dataset relative to baseline ('rest') mean
            zscore(dataset,
                   chunks_attr='chunks',
                   param_est=('targets', ['rest']))

        # select class face and house for this demo analysis
        # would work with full datasets (just a little slower)
        dataset = dataset[np.array(
            [l in ['face', 'house'] for l in dataset.sa.targets],
            dtype='bool')]
        '''Apply searchlight to datasets'''
        surf_dset = surf_sl(dataset)
        surf_dset2 = surf_sl2(dataset)
        vol_dset = vol_sl(dataset)

        surf_data = surf_dset.samples
        surf_data2 = surf_dset2.samples
        vol_data = vol_dset.samples

        assert_array_equal(surf_data, surf_data2)
        assert_array_equal(surf_data, vol_data)
Example #21
0
def test_gnbsearchlight_permutations():
    import mvpa2
    from mvpa2.base.node import ChainNode
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.base import  Repeater
    from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner
    #import mvpa2.generators.permutation
    #reload(mvpa2.generators.permutation)
    from mvpa2.generators.permutation import AttributePermutator
    from mvpa2.testing.datasets import datasets
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.mappers.fx import mean_sample
    from mvpa2.misc.errorfx import mean_mismatch_error
    from mvpa2.clfs.stats import MCNullDist
    from mvpa2.testing.tools import assert_raises, ok_, assert_array_less

    # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM']
    # mvpa2.debug.metrics += ['pid']
    count = 10
    nproc = 1 + int(mvpa2.externals.exists('pprocess'))
    ds = datasets['3dsmall'].copy()
    ds.fa['voxel_indices'] = ds.fa.myspace

    slkwargs = dict(radius=3, space='voxel_indices',  enable_ca=['roi_sizes'],
                    center_ids=[1, 10, 70, 100])

    mvpa2.seed(mvpa2._random_seed)
    clf  = GNB()
    splt = NFoldPartitioner(cvtype=2, attr='chunks')

    repeater   = Repeater(count=count)
    permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1)

    null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()),
                                    postproc=mean_sample(), errorfx=mean_mismatch_error,
                                    **slkwargs)

    distr_est = MCNullDist(repeater, tail='left', measure=null_sl,
                           enable_ca=['dist_samples'])
    sl = sphere_gnbsearchlight(clf, splt,
                               reuse_neighbors=True,
                               null_dist=distr_est, postproc=mean_sample(),
                               errorfx=mean_mismatch_error,
                               **slkwargs)
    if __debug__:                         # assert is done only without -O mode
        assert_raises(NotImplementedError, sl, ds)

    # "ad-hoc searchlights can't handle yet varying targets across partitions"
    if False:
        # after above limitation is removed -- enable
        sl_map = sl(ds)
        sl_null_prob = sl.ca.null_prob.samples.copy()

    mvpa2.seed(mvpa2._random_seed)
    ### 'normal' Searchlight
    clf  = GNB()
    splt = NFoldPartitioner(cvtype=2, attr='chunks')
    repeater   = Repeater(count=count)
    permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1)
    # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state
    # would be reused across all pprocesses
    null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()),
                              postproc=mean_sample())
    null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs)
    distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal,
                           enable_ca=['dist_samples'])

    cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error,
                         enable_ca=['stats'], postproc=mean_sample() )
    sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs)
    sl_map_normal = sl(ds)
    sl_null_prob_normal = sl.ca.null_prob.samples.copy()

    # For every feature -- we should get some variance in estimates In
    # case of failure they are all really close to each other (up to
    # numerical precision), so variance will be close to 0
    assert_array_less(-np.var(distr_est_normal.ca.dist_samples.samples[0],
                              axis=1), -1e-5)
    for s in distr_est_normal.ca.dist_samples.samples[0]:
        ok_(len(np.unique(s)) > 1)
Example #22
0
        kNN(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FractionTailSelector(0.05, mode='select', tail='upper')),
        descr="kNN on 5%(ANOVA)")

clfswh += \
    FeatureSelectionClassifier(
        kNN(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FixedNElementTailSelector(50, mode='select', tail='upper')),
        descr="kNN on 50(ANOVA)")

# GNB
clfswh += GNB(descr="GNB()")
clfswh += GNB(common_variance=True, descr="GNB(common_variance=True)")
clfswh += GNB(prior='uniform', descr="GNB(prior='uniform')")
clfswh += \
    FeatureSelectionClassifier(
        GNB(),
        SensitivityBasedFeatureSelection(
           OneWayAnova(),
           FractionTailSelector(0.05, mode='select', tail='upper')),
        descr="GNB on 5%(ANOVA)")

# GPR
if externals.exists('scipy'):
    from mvpa2.clfs.gpr import GPR

    regrswh += GPR(kernel=LinearKernel(), descr="GPR(kernel='linear')")
Example #23
0
class SearchlightTests(unittest.TestCase):
    def setUp(self):
        self.dataset = datasets['3dlarge']
        # give the feature coord a more common name, matching the default of
        # the searchlight
        self.dataset.fa['voxel_indices'] = self.dataset.fa.myspace
        self._tested_pprocess = False

    # https://github.com/PyMVPA/PyMVPA/issues/67
    # https://github.com/PyMVPA/PyMVPA/issues/69
    def test_gnbsearchlight_doc(self):
        # Test either we excluded nproc from the docstrings
        ok_(not 'nproc' in GNBSearchlight.__init__.__doc__)
        ok_(not 'nproc' in GNBSearchlight.__doc__)
        ok_(not 'nproc' in sphere_gnbsearchlight.__doc__)
        # but present elsewhere
        ok_('nproc' in sphere_searchlight.__doc__)
        ok_('nproc' in Searchlight.__init__.__doc__)

    # https://github.com/PyMVPA/PyMVPA/issues/106
    def test_searchlights_doc_qe(self):
        # queryengine should not be provided to sphere_* helpers
        for sl in (sphere_searchlight, sphere_gnbsearchlight,
                   sphere_m1nnsearchlight):
            for kw in ('queryengine', 'qe'):
                ok_(not kw in sl.__doc__,
                    msg='There should be no %r in %s.__doc__' % (kw, sl))

        # queryengine should be provided in corresponding classes __doc__s
        for sl in (Searchlight, GNBSearchlight, M1NNSearchlight):
            for kw in ('queryengine', ):
                ok_(kw in sl.__init__.__doc__,
                    msg='There should be %r in %s.__init__.__doc__' % (kw, sl))
            for kw in ('qe', ):
                ok_(not kw in sl.__init__.__doc__,
                    msg='There should be no %r in %s.__init__.__doc__' %
                    (kw, sl))

    #def _test_searchlights(self, ds, sls, roi_ids, result_all):  # pragma: no cover

    @sweepargs(
        lrn_sllrn_SL_partitioner=[
            (
                GNB(common_variance=v, descr='GNB'),
                None,
                sphere_gnbsearchlight,
                NFoldPartitioner(cvtype=1),
                0.  # correction for the error range
            ) for v in (True, False)
        ] +
        # Mean 1 NN searchlights
        [
            (ChainMapper(
                [mean_group_sample(['targets', 'partitions']),
                 kNN(1)],
                space='targets',
                descr='M1NN'), kNN(1), sphere_m1nnsearchlight,
             NFoldPartitioner(0.5, selection_strategy='random',
                              count=20), 0.05),
            # the same but with NFold(1) partitioner since it still should work
            (ChainMapper(
                [mean_group_sample(['targets', 'partitions']),
                 kNN(1)],
                space='targets',
                descr='NF-M1NN'), kNN(1), sphere_m1nnsearchlight,
             NFoldPartitioner(1), 0.05),
        ])
    @sweepargs(do_roi=(False, True))
    @sweepargs(results_backend=('native', 'hdf5'))
    @reseed_rng()
    def test_spatial_searchlight(self,
                                 lrn_sllrn_SL_partitioner,
                                 do_roi=False,
                                 results_backend='native'):
        """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight)
        Test of and adhoc searchlight anyways requires a ground-truth
        comparison to the generic version, so we are doing sweepargs here
        """
        lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner
        ## if results_backend == 'hdf5' and not common_variance:
        ##     # no need for full combination of all possible arguments here
        ##     return

        if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \
           and  isinstance(lrn, ChainMapper):
            raise SkipTest("Known to fail while trying to enable "
                           "training_stats for the ChainMapper (M1NN here)")

        # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn
        #      "learner" we must use a chainmapper atm
        if sllrn is None:
            sllrn = lrn
        ds = datasets['3dsmall'].copy()
        # Let's test multiclass here, so boost # of labels
        ds[6:18].T += 2
        ds.fa['voxel_indices'] = ds.fa.myspace

        # To assure that users do not run into incorrect operation due to overflows
        ds.samples += 5000
        ds.samples *= 1000
        ds.samples = ds.samples.astype(np.int16)

        # compute N-1 cross-validation for each sphere
        # YOH: unfortunately sample_clf_lin is not guaranteed
        #      to provide exactly the same results due to inherent
        #      iterative process.  Therefore lets use something quick
        #      and pure Python
        cv = CrossValidation(lrn, partitioner)

        skwargs = dict(
            radius=1,
            enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids'])

        if do_roi:
            # select some random set of features
            nroi = rnd.randint(1, ds.nfeatures)
            # and lets compute the full one as well once again so we have a reference
            # which will be excluded itself from comparisons but values will be compared
            # for selected roi_id
            sl_all = SL(sllrn, partitioner, **skwargs)
            result_all = sl_all(ds)
            # select random features
            roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi]
            skwargs['center_ids'] = roi_ids
        else:
            nroi = ds.nfeatures
            roi_ids = np.arange(nroi)
            result_all = None

        if results_backend == 'hdf5':
            skip_if_no_external('h5py')

        sls = [
            sphere_searchlight(cv, results_backend=results_backend, **skwargs),
            #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1))
            SL(sllrn, partitioner, indexsum='fancy', **skwargs)
        ]

        if externals.exists('scipy'):
            sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)]

        # Test nproc just once
        if externals.exists('pprocess') and not self._tested_pprocess:
            sls += [sphere_searchlight(cv, nproc=2, **skwargs)]
            self._tested_pprocess = True

        # Provide the dataset and all those searchlights for testing
        #self._test_searchlights(ds, sls, roi_ids, result_all)
        #nroi = len(roi_ids)
        #do_roi = nroi != ds.nfeatures
        all_results = []
        for sl in sls:
            # run searchlight
            mvpa2.seed()  # reseed rng again for m1nnsl
            results = sl(ds)
            all_results.append(results)
            #print `sl`
            # check for correct number of spheres
            self.assertTrue(results.nfeatures == nroi)
            # and measures (one per xfold)
            if partitioner.cvtype == 1:
                self.assertTrue(len(results) == len(ds.UC))
            elif partitioner.cvtype == 0.5:
                # here we had 4 unique chunks, so 6 combinations
                # even though 20 max was specified for NFold
                self.assertTrue(len(results) == 6)
            else:
                raise RuntimeError("Unknown yet type of partitioner to check")
            # check for chance-level performance across all spheres
            # makes sense only if number of features was big enough
            # to get some stable estimate of mean
            if not do_roi or nroi > 20:
                # correction here is for M1NN class which has wider distribution
                self.assertTrue(0.67 - correction < results.samples.mean() <
                                0.85 + correction,
                                msg="Out of range mean result: "
                                "lrn: %s  sllrn: %s  NROI: %d  MEAN: %.3f" % (
                                    lrn,
                                    sllrn,
                                    nroi,
                                    results.samples.mean(),
                                ))

            mean_errors = results.samples.mean(axis=0)
            # that we do get different errors ;)
            self.assertTrue(len(np.unique(mean_errors) > 3))

            # check resonable sphere sizes
            self.assertTrue(len(sl.ca.roi_sizes) == nroi)
            self.assertTrue(len(sl.ca.roi_feature_ids) == nroi)
            for i, fids in enumerate(sl.ca.roi_feature_ids):
                self.assertTrue(len(fids) == sl.ca.roi_sizes[i])
            if do_roi:
                # for roi we should relax conditions a bit
                self.assertTrue(max(sl.ca.roi_sizes) <= 7)
                self.assertTrue(min(sl.ca.roi_sizes) >= 4)
            else:
                self.assertTrue(max(sl.ca.roi_sizes) == 7)
                self.assertTrue(min(sl.ca.roi_sizes) == 4)

            # check base-class state
            self.assertEqual(sl.ca.raw_results.nfeatures, nroi)

            # Test if we got results correctly for 'selected' roi ids
            if do_roi:
                assert_array_equal(result_all[:, roi_ids], results)

        if len(all_results) > 1:
            # if we had multiple searchlights, we can check either they all
            # gave the same result (they should have)
            aresults = np.array([a.samples for a in all_results])
            dresults = np.abs(aresults - aresults.mean(axis=0))
            dmax = np.max(dresults)
            self.assertTrue(dmax <= 1e-13)

        # Test the searchlight's reuse of neighbors
        for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse']
                                     or []):
            sl = SL(sllrn,
                    partitioner,
                    indexsum='fancy',
                    reuse_neighbors=True,
                    **skwargs)
            mvpa2.seed()
            result1 = sl(ds)
            mvpa2.seed()
            result2 = sl(ds)  # must be faster
            assert_array_equal(result1, result2)

    def test_adhocsearchlight_perm_testing(self):
        # just a smoke test pretty much
        ds = datasets['3dmedium'].copy()
        #ds.samples += np.random.normal(size=ds.samples.shape)*10
        mvpa2.seed()
        ds.fa['voxel_indices'] = ds.fa.myspace
        from mvpa2.mappers.fx import mean_sample
        from mvpa2.clfs.stats import MCNullDist
        permutator = AttributePermutator('targets', count=8, limit='chunks')
        distr_est = MCNullDist(permutator,
                               tail='left',
                               enable_ca=['dist_samples'])
        slargs = (kNN(1),
                  NFoldPartitioner(0.5, selection_strategy='random', count=9))
        slkwargs = dict(radius=1, postproc=mean_sample())

        sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs)
        skip_if_no_external('scipy')  # needed for null_t
        sl = sphere_m1nnsearchlight(*slargs,
                                    null_dist=distr_est,
                                    enable_ca=['null_t'],
                                    reuse_neighbors=True,
                                    **slkwargs)
        mvpa2.seed()
        res_nodistr = sl_nodistr(ds)
        mvpa2.seed()
        res = sl(ds)
        # verify that we at least got the same main result
        # ah (yoh) -- null dist is estimated before the main
        # estimate so we can't guarantee correspondence :-/
        # assert_array_equal(res_nodistr, res)
        # only resemblance (TODO, may be we want to get/setstate
        # for rng before null_dist.fit?)

        # and dimensions correspond
        assert_array_equal(distr_est.ca.dist_samples.shape,
                           (1, ds.nfeatures, 8))
        assert_array_equal(sl.ca.null_t.samples.shape, (1, ds.nfeatures))

    def test_partial_searchlight_with_full_report(self):
        ds = self.dataset.copy()
        center_ids = np.zeros(ds.nfeatures, dtype='bool')
        center_ids[[3, 50]] = True
        ds.fa['center_ids'] = center_ids
        # compute N-1 cross-validation for each sphere
        cv = CrossValidation(GNB(), NFoldPartitioner())
        # contruct diameter 1 (or just radius 0) searchlight
        # one time give center ids as a list, the other one takes it from the
        # dataset itself
        sls = (
            sphere_searchlight(cv, radius=0, center_ids=[3, 50]),
            sphere_searchlight(None, radius=0, center_ids=[3, 50]),
            sphere_searchlight(cv, radius=0, center_ids='center_ids'),
        )
        for sl in sls:
            # assure that we could set cv post constructor
            if sl.datameasure is None:
                sl.datameasure = cv
            # run searchlight
            results = sl(ds)
            # only two spheres but error for all CV-folds
            self.assertEqual(results.shape, (len(self.dataset.UC), 2))
            # Test if results hold if we "set" a "new" datameasure
            sl.datameasure = CrossValidation(GNB(), NFoldPartitioner())
            results2 = sl(ds)
            assert_array_almost_equal(results, results2)

        # test if we graciously puke if center_ids are out of bounds
        dataset0 = ds[:, :50]  # so we have no 50th feature
        self.assertRaises(IndexError, sls[0], dataset0)
        # but it should be fine on the one that gets the ids from the dataset
        # itself
        results = sl(dataset0)
        assert_equal(results.nfeatures, 1)
        # check whether roi_seeds are correct
        sl = sphere_searchlight(lambda x: np.vstack(
            (x.fa.roi_seed, x.samples)),
                                radius=1,
                                add_center_fa=True,
                                center_ids=[12])
        res = sl(ds)
        assert_array_equal(
            res.samples[1:, res.samples[0].astype('bool')].squeeze(),
            ds.samples[:, 12])

    def test_partial_searchlight_with_confusion_matrix(self):
        ds = self.dataset
        from mvpa2.clfs.stats import MCNullDist
        from mvpa2.mappers.fx import mean_sample, sum_sample

        # compute N-1 cross-validation for each sphere
        cm = ConfusionMatrix(labels=ds.UT)
        cv = CrossValidation(
            sample_clf_lin,
            NFoldPartitioner(),
            # we have to assure that matrix does not get flatted by
            # first vstack in cv and then hstack in searchlight --
            # thus 2 leading dimensions
            # TODO: RF? make searchlight/crossval smarter?
            errorfx=lambda *a: cm(*a)[None, None, :])
        # contruct diameter 2 (or just radius 1) searchlight
        sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50])

        # our regular searchlight -- to compare results
        cv_gross = CrossValidation(sample_clf_lin, NFoldPartitioner())
        sl_gross = sphere_searchlight(cv_gross,
                                      radius=1,
                                      center_ids=[3, 5, 50])

        # run searchlights
        res = sl(ds)
        res_gross = sl_gross(ds)

        # only two spheres but error for all CV-folds and complete confusion matrix
        assert_equal(res.shape, (len(ds.UC), 3, len(ds.UT), len(ds.UT)))
        assert_equal(res_gross.shape, (len(ds.UC), 3))

        # briefly inspect the confusion matrices
        mat = res.samples
        # since input dataset is probably balanced (otherwise adjust
        # to be per label): sum within columns (thus axis=-2) should
        # be identical to per-class/chunk number of samples
        samples_per_classchunk = len(ds) / (len(ds.UT) * len(ds.UC))
        ok_(np.all(np.sum(mat, axis=-2) == samples_per_classchunk))
        # and if we compute accuracies manually -- they should
        # correspond to the one from sl_gross
        assert_array_almost_equal(
            res_gross.samples,
            # from accuracies to errors
            1 - (mat[..., 0, 0] + mat[..., 1, 1]).astype(float) /
            (2 * samples_per_classchunk))

        # and now for those who remained sited -- lets perform H0 MC
        # testing of this searchlight... just a silly one with minimal
        # number of permutations
        no_permutations = 10
        permutator = AttributePermutator('targets', count=no_permutations)

        # once again -- need explicit leading dimension to avoid
        # vstacking during cross-validation
        cv.postproc = lambda x: sum_sample()(x)[None, :]

        sl = sphere_searchlight(cv,
                                radius=1,
                                center_ids=[3, 5, 50],
                                null_dist=MCNullDist(
                                    permutator,
                                    tail='right',
                                    enable_ca=['dist_samples']))
        res_perm = sl(ds)
        # XXX all of the res_perm, sl.ca.null_prob and
        #     sl.null_dist.ca.dist_samples carry a degenerate leading
        #     dimension which was probably due to introduced new axis
        #     above within cv.postproc
        assert_equal(res_perm.shape, (1, 3, 2, 2))
        assert_equal(sl.null_dist.ca.dist_samples.shape,
                     res_perm.shape + (no_permutations, ))
        assert_equal(sl.ca.null_prob.shape, res_perm.shape)
        # just to make sure ;)
        ok_(np.all(sl.ca.null_prob.samples >= 0))
        ok_(np.all(sl.ca.null_prob.samples <= 1))

        # we should have got sums of hits across the splits
        assert_array_equal(np.sum(mat, axis=0), res_perm.samples[0])

    def test_chi_square_searchlight(self):
        # only do partial to save time

        # Can't yet do this since test_searchlight isn't yet "under nose"
        #skip_if_no_external('scipy')
        if not externals.exists('scipy'):
            return

        from mvpa2.misc.stats import chisquare

        cv = CrossValidation(sample_clf_lin,
                             NFoldPartitioner(),
                             enable_ca=['stats'])

        def getconfusion(data):
            cv(data)
            return chisquare(cv.ca.stats.matrix)[0]

        sl = sphere_searchlight(getconfusion, radius=0, center_ids=[3, 50])

        # run searchlight
        results = sl(self.dataset)
        self.assertTrue(results.nfeatures == 2)

    def test_1d_multispace_searchlight(self):
        ds = Dataset([np.arange(6)])
        ds.fa['coord1'] = np.repeat(np.arange(3), 2)
        # add a second space to the dataset
        ds.fa['coord2'] = np.tile(np.arange(2), 3)
        measure = lambda x: "+".join([str(x) for x in x.samples[0]])
        # simply select each feature once
        res = Searchlight(measure,
                          IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)),
                          nproc=1)(ds)
        assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']])
        res = Searchlight(measure,
                          IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)),
                          nproc=1)(ds)
        assert_array_equal(res.samples,
                           [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']])
        res = Searchlight(measure,
                          IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)),
                          nproc=1)(ds)
        assert_array_equal(res.samples,
                           [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])

    #@sweepargs(regr=regrswh[:])
    @reseed_rng()
    def test_regression_with_additional_sa(self):
        regr = regrswh[:][0]
        ds = datasets['3dsmall'].copy()
        ds.fa['voxel_indices'] = ds.fa.myspace

        # Create a new sample attribute which will be used along with
        # every searchlight
        ds.sa['beh'] = np.random.normal(size=(ds.nsamples, 2))

        # and now for fun -- lets create custom linar regression
        # targets out of some random feature and beh linearly combined
        rfeature = np.random.randint(ds.nfeatures)
        ds.sa.targets = np.dot(
            np.hstack((ds.sa.beh, ds.samples[:, rfeature:rfeature + 1])),
            np.array([0.3, 0.2, 0.3]))

        class CrossValidationWithBeh(CrossValidation):
            """An adapter for regular CV which would hstack
               sa.beh to the searchlighting ds"""
            def _call(self, ds):
                return CrossValidation._call(
                    self, Dataset(np.hstack((ds, ds.sa.beh)), sa=ds.sa))

        cvbeh = CrossValidationWithBeh(regr,
                                       OddEvenPartitioner(),
                                       errorfx=corr_error)
        # regular cv
        cv = CrossValidation(regr, OddEvenPartitioner(), errorfx=corr_error)

        slbeh = sphere_searchlight(cvbeh, radius=1)
        slmapbeh = slbeh(ds)
        sl = sphere_searchlight(cv, radius=1)
        slmap = sl(ds)

        assert_equal(slmap.shape, (2, ds.nfeatures))
        # SL which had access to beh should have got for sure better
        # results especially in the vicinity of the chosen feature...
        features = sl.queryengine.query_byid(rfeature)
        assert_array_lequal(slmapbeh.samples[:, features],
                            slmap.samples[:, features])

        # elsewhere they should tend to be better but not guaranteed

    @labile(5, 1)
    def test_usecase_concordancesl(self):
        import numpy as np
        from mvpa2.base.dataset import vstack
        from mvpa2.mappers.fx import mean_sample

        # Take our sample 3d dataset
        ds1 = datasets['3dsmall'].copy(deep=True)
        ds1.fa['voxel_indices'] = ds1.fa.myspace
        ds1.sa['subject'] = [1
                             ]  # not really necessary -- but let's for clarity
        ds1 = mean_sample()(
            ds1)  # so we get just a single representative sample

        def corr12(ds):
            corr = np.corrcoef(ds.samples)
            assert (corr.shape == (2, 2))  # for paranoid ones
            return corr[0, 1]

        for nsc, thr, thr_mean in ((0, 1.0, 1.0),
                                   (0.1, 0.3, 0.8)):  # just a bit of noise
            ds2 = ds1.copy(deep=True)  # make a copy for the 2nd subject
            ds2.sa['subject'] = [2]
            ds2.samples += nsc * np.random.normal(size=ds1.shape)

            # make sure that both have the same voxel indices
            assert (np.all(ds1.fa.voxel_indices == ds2.fa.voxel_indices))
            ds_both = vstack((ds1, ds2))  # join 2 images into a single dataset
            # with .sa.subject distinguishing both

            sl = sphere_searchlight(corr12, radius=2)
            slmap = sl(ds_both)
            ok_(np.all(slmap.samples >= thr))
            ok_(np.mean(slmap.samples) >= thr)

    def test_swaroop_case(self):
        """Test hdf5 backend to pass results on Swaroop's usecase
        """
        skip_if_no_external('h5py')
        from mvpa2.measures.base import Measure

        class sw_measure(Measure):
            def __init__(self):
                Measure.__init__(self, auto_train=True)

            def _call(self, dataset):
                # For performance measures -- increase to 50-200
                # np.sum here is just to get some meaningful value in
                # them
                #return np.ones(shape=(2, 2))*np.sum(dataset)
                return Dataset(
                    np.array([{
                        'd': np.ones(shape=(5, 5)) * np.sum(dataset)
                    }],
                             dtype=object))

        results = []
        ds = datasets['3dsmall'].copy(deep=True)
        ds.fa['voxel_indices'] = ds.fa.myspace

        our_custom_prefix = tempfile.mktemp()
        for backend in ['native'] + \
                (externals.exists('h5py') and ['hdf5'] or []):
            sl = sphere_searchlight(sw_measure(),
                                    radius=1,
                                    tmp_prefix=our_custom_prefix,
                                    results_backend=backend)
            t0 = time.time()
            results.append(np.asanyarray(sl(ds)))
            # print "Done for backend %s in %d sec" % (backend, time.time() - t0)
        # because of swaroop's ad-hoc (who only could recommend such
        # a construct?) use case, and absent fancy working assert_objectarray_equal
        # let's compare manually
        #assert_objectarray_equal(*results)
        if not externals.exists('h5py'):
            self.assertRaises(RuntimeError,
                              sphere_searchlight,
                              sw_measure(),
                              results_backend='hdf5')
            raise SkipTest('h5py required for test of backend="hdf5"')
        assert_equal(results[0].shape, results[1].shape)
        results = [r.flatten() for r in results]
        for x, y in zip(*results):
            assert_equal(x.keys(), y.keys())
            assert_array_equal(x['d'], y['d'])
        # verify that no junk is left behind
        tempfiles = glob.glob(our_custom_prefix + '*')
        assert_equal(len(tempfiles), 0)

    def test_nblocks(self):
        skip_if_no_external('pprocess')
        # just a basic test to see that we are getting the same
        # results with different nblocks
        ds = datasets['3dsmall'].copy(deep=True)[:, :13]
        ds.fa['voxel_indices'] = ds.fa.myspace
        cv = CrossValidation(GNB(), OddEvenPartitioner())
        res1 = sphere_searchlight(cv, radius=1, nproc=2)(ds)
        res2 = sphere_searchlight(cv, radius=1, nproc=2, nblocks=5)(ds)
        assert_array_equal(res1, res2)

    def test_custom_results_fx_logic(self):
        # results_fx was introduced for the blow-up-the-memory-Swaroop
        # where keeping all intermediate results of the dark-magic SL
        # hyperalignment is not feasible.  So it is desired to split
        # searchlight computation in more blocks while composing the
        # target result "on-the-fly" from available so far results.
        #
        # Implementation relies on using generators feeding the
        # results_fx with fresh results whenever those become
        # available.
        #
        # This test/example's "measure" creates files which should be
        # handled by the results_fx function and removed in this case
        # to check if we indeed have desired high number of blocks while
        # only limited nproc.
        skip_if_no_external('pprocess')

        tfile = tempfile.mktemp('mvpa', 'test-sl')

        ds = datasets['3dsmall'].copy()[:, :25]  # smaller copy
        ds.fa['voxel_indices'] = ds.fa.myspace
        ds.fa['feature_id'] = np.arange(ds.nfeatures)

        nproc = 3  # it is not about computing -- so we will can
        # start more processes than possibly having CPUs just to test
        nblocks = nproc * 7
        # figure out max number of features to be given to any proc_block
        # yoh: not sure why I had to +1 here... but now it became more robust and
        # still seems to be doing what was demanded so be it
        max_block = int(ceil(ds.nfeatures / float(nblocks)) + 1)

        def print_(s, *args):
            """For local debugging"""
            #print s, args
            pass

        def results_fx(sl=None, dataset=None, roi_ids=None, results=None):
            """It will "process" the results by removing those files
               generated inside the measure
            """
            res = []
            print_("READY")
            for x in results:
                ok_(isinstance(x, list))
                res.append(x)
                print_("R: ", x)
                for r in x:
                    # Can happen if we requested those .ca's enabled
                    # -- then automagically _proc_block would wrap
                    # results in a dataset... Originally detected by
                    # running with MVPA_DEBUG=.* which triggered
                    # enabling all ca's
                    if is_datasetlike(r):
                        r = np.asscalar(r.samples)
                    os.unlink(r)  # remove generated file
                print_("WAITING")

            results_ds = hstack(sum(res, []))

            # store the center ids as a feature attribute since we use
            # them for testing
            results_ds.fa['center_ids'] = roi_ids
            return results_ds

        def results_postproc_fx(results):
            for ds in results:
                ds.fa['test_postproc'] = np.atleast_1d(ds.a.roi_center_ids**2)
            return results

        def measure(ds):
            """The "measure" will check if a run with the same "index" from
               previous block has been processed by now
            """
            f = '%s+%03d' % (tfile, ds.fa.feature_id[0] % (max_block * nproc))
            print_("FID:%d f:%s" % (ds.fa.feature_id[0], f))

            # allow for up to few seconds to wait for the file to
            # disappear -- i.e. its result from previous "block" was
            # processed
            t0 = time.time()
            while os.path.exists(f) and time.time() - t0 < 4.:
                time.sleep(0.5)  # so it does take time to compute the measure
                pass
            if os.path.exists(f):
                print_("ERROR: ", f)
                raise AssertionError(
                    "File %s must have been processed by now" % f)
            open(f, 'w').write(
                'XXX')  # signal that we have computing this measure
            print_("RES: %s" % f)
            return f

        sl = sphere_searchlight(measure,
                                radius=0,
                                nproc=nproc,
                                nblocks=nblocks,
                                results_postproc_fx=results_postproc_fx,
                                results_fx=results_fx,
                                center_ids=np.arange(ds.nfeatures))

        assert_equal(len(glob.glob(tfile + '*')), 0)  # so no junk around
        try:
            res = sl(ds)
            assert_equal(res.nfeatures, ds.nfeatures)
            # verify that we did have results_postproc_fx called
            assert_array_equal(res.fa.test_postproc,
                               np.power(res.fa.center_ids, 2))
        finally:
            # remove those generated left-over files
            for f in glob.glob(tfile + '*'):
                os.unlink(f)
Example #24
0
        assert_equal(set(training_stats.keys()),
                     set([('L0', 'L1'), ('L0', 'L2'), ('L1', 'L2')]))
        for pair, cm in training_stats.iteritems():
            assert_array_equal(cm.labels, ds.UT)
            # we should have no predictions for absent label
            assert_array_equal(cm.matrix[~np.in1d(ds.UT, pair)], 0)
            # while altogether all samples were processed once
            assert_array_equal(cm.stats['P'], len(ds))
            # and number of sets should be equal number of chunks here
            assert_equal(len(cm.sets), len(ds.UC))


# Sweep through some representative interesting classifiers
@sweepargs(clf=[
    LinearCSVMC(C=1),
    GNB(common_variance=True),
])
def test_multiclass_without_combiner_sens(clf):
    ds = datasets['uni3small'].copy()
    # do the clone since later we will compare sensitivities and need it
    # independently trained etc
    mclf = MulticlassClassifier(clf.clone(), combiner=None)

    # We have lots of sandwiching
    #    Multiclass.clfs -> [BinaryClassifier] -> clf
    # where BinaryClassifier's estimates are binarized.
    # Let's also check that we are getting sensitivities correctly.
    # With addition of MulticlassClassifierSensitivityAnalyzer we managed to break
    # it and none tests picked it up, so here we will test that sensitivities
    # are computed and labeled correctly
Example #25
0
def test_confusion_as_node():
    from mvpa2.misc.data_generators import normal_feature_dataset
    from mvpa2.clfs.gnb import GNB
    from mvpa2.clfs.transerror import Confusion
    ds = normal_feature_dataset(snr=2.0, perlabel=42, nchunks=3,
                                nonbogus_features=[0,1], nfeatures=2)
    clf = GNB()
    cv = CrossValidation(
        clf, NFoldPartitioner(),
        errorfx=None,
        postproc=Confusion(labels=ds.UT),
        enable_ca=['stats'])
    res = cv(ds)
    # needs to be identical to CA
    assert_array_equal(res.samples, cv.ca.stats.matrix)
    assert_array_equal(res.sa.predictions, ds.UT)
    assert_array_equal(res.fa.targets, ds.UT)

    skip_if_no_external('scipy')

    from mvpa2.clfs.transerror import BayesConfusionHypothesis
    from mvpa2.base.node import ChainNode
    # same again, but this time with Bayesian hypothesis testing at the end
    cv = CrossValidation(
        clf, NFoldPartitioner(),
        errorfx=None,
        postproc=ChainNode([Confusion(labels=ds.UT),
                            BayesConfusionHypothesis()]))
    res = cv(ds)
    # only two possible hypothesis with two classes
    assert_equals(len(res), 2)
    # the first hypothesis is the can't discriminate anything
    assert_equal(len(res.sa.hypothesis[0]), 1)
    assert_equal(len(res.sa.hypothesis[0][0]), 2)
    # and the hypothesis is actually less likely than the other one
    # (both classes can be distinguished)
    assert(np.e**res.samples[0,0] < np.e**res.samples[1,0])

    # Let's see how well it would work within the searchlight when we also
    # would like to store the hypotheses per each voxel
    # Somewhat an ad-hoc solution for the answer posted on the ML
    #
    # run 1d searchlight of radii 0, for that just provide a .fa with coordinates
    ds.fa['voxel_indices'] = [[0], [1]]
    # and a custom Node which would collect .sa.hypothesis to place together along
    # with the posterior probabilities
    from mvpa2.base.node import Node
    from mvpa2.measures.searchlight import sphere_searchlight

    class KeepBothPosteriorAndHypothesis(Node):
        def _call(self, ds):
            out = np.zeros(1, dtype=object)
            out[0] = (ds.samples, ds.sa.hypothesis)
            return out
    cv.postproc.append(KeepBothPosteriorAndHypothesis())
    sl = sphere_searchlight(cv, radius=0, nproc=1)
    res = sl(ds)

    assert_equal(res.shape, (1, 2))
    assert_equal(len(res.samples[0,0]), 2)
    assert_equal(res.samples[0,0][0].shape, (2, 2))   # posteriors per 1st SL
    assert_equal(len(res.samples[0,0][1]), 2)   # 2 of hypotheses
Example #26
0
    def test_gnb(self):
        gnb = GNB()
        gnb_nc = GNB(common_variance=False)
        gnb_n = GNB(normalize=True)
        gnb_n_nc = GNB(normalize=True, common_variance=False)
        gnb_lin = GNB(common_variance=True)

        ds = datasets['uni2medium']

        # Store probabilities for further comparison
        probabilities = {}

        # Generic silly coverage just to assure that it works in all
        # possible scenarios:
        bools = (True, False)
        # There should be better way... heh
        for cv in bools:  # common_variance?
            for prior in ('uniform', 'laplacian_smoothing', 'ratio'):
                tp = None  # predictions -- all above should
                # result in the same predictions
                for n in bools:  # normalized?
                    for ls in bools:  # logspace?
                        for es in ((), ('estimates')):
                            gnb_ = GNB(common_variance=cv,
                                       prior=prior,
                                       normalize=n,
                                       logprob=ls,
                                       enable_ca=es)
                            tm = TransferMeasure(gnb_, Splitter('train'))
                            predictions = tm(ds).samples[:, 0]
                            if tp is None:
                                tp = predictions
                            assert_array_equal(predictions, tp)
                            # if normalized -- check if estimates are such
                            if n and 'estimates' in es:
                                v = gnb_.ca.estimates
                                if ls:  # in log space -- take exp ;)
                                    v = np.exp(v)
                                d1 = np.sum(v, axis=1) - 1.0
                                self.assertTrue(np.max(np.abs(d1)) < 1e-5)
                                probabilities[repr(gnb_)] = v
                            # smoke test to see whether invocation of sensitivity analyser blows
                            # if gnb classifier isn't linear, and to see whether it doesn't blow
                            # when it is linear.
                            if cv:
                                assert 'has_sensitivity' in gnb_.__tags__
                                gnb_.get_sensitivity_analyzer()
                            if not cv:
                                with self.assertRaises(NotImplementedError):
                                    gnb_.get_sensitivity_analyzer()

        # Verify that probabilities are identical when we use logprob or not
        assert_array_almost_equal(
            probabilities[
                "GNB(space='targets', normalize=True, logprob=False)"],
            probabilities["GNB(space='targets', normalize=True)"])
        assert_array_almost_equal(
            probabilities[
                "GNB(space='targets', normalize=True, logprob=False, prior='uniform')"],
            probabilities[
                "GNB(space='targets', normalize=True, prior='uniform')"])