Beispiel #1
0
    def test_partial_searchlight_with_confusion_matrix(self):
        ds = self.dataset
        from mvpa2.clfs.stats import MCNullDist
        from mvpa2.mappers.fx import mean_sample, sum_sample

        # compute N-1 cross-validation for each sphere
        cm = ConfusionMatrix(labels=ds.UT)
        cv = CrossValidation(
            sample_clf_lin,
            NFoldPartitioner(),
            # we have to assure that matrix does not get flatted by
            # first vstack in cv and then hstack in searchlight --
            # thus 2 leading dimensions
            # TODO: RF? make searchlight/crossval smarter?
            errorfx=lambda *a: cm(*a)[None, None, :])
        # contruct diameter 2 (or just radius 1) searchlight
        sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50])

        # our regular searchlight -- to compare results
        cv_gross = CrossValidation(sample_clf_lin, NFoldPartitioner())
        sl_gross = sphere_searchlight(cv_gross,
                                      radius=1,
                                      center_ids=[3, 5, 50])

        # run searchlights
        res = sl(ds)
        res_gross = sl_gross(ds)

        # only two spheres but error for all CV-folds and complete confusion matrix
        assert_equal(res.shape, (len(ds.UC), 3, len(ds.UT), len(ds.UT)))
        assert_equal(res_gross.shape, (len(ds.UC), 3))

        # briefly inspect the confusion matrices
        mat = res.samples
        # since input dataset is probably balanced (otherwise adjust
        # to be per label): sum within columns (thus axis=-2) should
        # be identical to per-class/chunk number of samples
        samples_per_classchunk = len(ds) / (len(ds.UT) * len(ds.UC))
        ok_(np.all(np.sum(mat, axis=-2) == samples_per_classchunk))
        # and if we compute accuracies manually -- they should
        # correspond to the one from sl_gross
        assert_array_almost_equal(
            res_gross.samples,
            # from accuracies to errors
            1 - (mat[..., 0, 0] + mat[..., 1, 1]).astype(float) /
            (2 * samples_per_classchunk))

        # and now for those who remained sited -- lets perform H0 MC
        # testing of this searchlight... just a silly one with minimal
        # number of permutations
        no_permutations = 10
        permutator = AttributePermutator('targets', count=no_permutations)

        # once again -- need explicit leading dimension to avoid
        # vstacking during cross-validation
        cv.postproc = lambda x: sum_sample()(x)[None, :]

        sl = sphere_searchlight(cv,
                                radius=1,
                                center_ids=[3, 5, 50],
                                null_dist=MCNullDist(
                                    permutator,
                                    tail='right',
                                    enable_ca=['dist_samples']))
        res_perm = sl(ds)
        # XXX all of the res_perm, sl.ca.null_prob and
        #     sl.null_dist.ca.dist_samples carry a degenerate leading
        #     dimension which was probably due to introduced new axis
        #     above within cv.postproc
        assert_equal(res_perm.shape, (1, 3, 2, 2))
        assert_equal(sl.null_dist.ca.dist_samples.shape,
                     res_perm.shape + (no_permutations, ))
        assert_equal(sl.ca.null_prob.shape, res_perm.shape)
        # just to make sure ;)
        ok_(np.all(sl.ca.null_prob.samples >= 0))
        ok_(np.all(sl.ca.null_prob.samples <= 1))

        # we should have got sums of hits across the splits
        assert_array_equal(np.sum(mat, axis=0), res_perm.samples[0])
Beispiel #2
0
def test_gnbsearchlight_permutations():
    import mvpa2
    from mvpa2.base.node import ChainNode
    from mvpa2.clfs.gnb import GNB
    from mvpa2.generators.base import Repeater
    from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner
    #import mvpa2.generators.permutation
    #reload(mvpa2.generators.permutation)
    from mvpa2.generators.permutation import AttributePermutator
    from mvpa2.testing.datasets import datasets
    from mvpa2.measures.base import CrossValidation
    from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight
    from mvpa2.measures.searchlight import sphere_searchlight
    from mvpa2.mappers.fx import mean_sample
    from mvpa2.misc.errorfx import mean_mismatch_error
    from mvpa2.clfs.stats import MCNullDist
    from mvpa2.testing.tools import assert_raises, ok_, assert_array_less

    # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM']
    # mvpa2.debug.metrics += ['pid']
    count = 10
    nproc = 1 + int(mvpa2.externals.exists('pprocess'))
    ds = datasets['3dsmall'].copy()
    ds.fa['voxel_indices'] = ds.fa.myspace

    slkwargs = dict(radius=3,
                    space='voxel_indices',
                    enable_ca=['roi_sizes'],
                    center_ids=[1, 10, 70, 100])

    mvpa2.seed(mvpa2._random_seed)
    clf = GNB()
    splt = NFoldPartitioner(cvtype=2, attr='chunks')

    repeater = Repeater(count=count)
    permutator = AttributePermutator('targets',
                                     limit={'partitions': 1},
                                     count=1)

    null_sl = sphere_gnbsearchlight(clf,
                                    ChainNode([splt, permutator],
                                              space=splt.get_space()),
                                    postproc=mean_sample(),
                                    errorfx=mean_mismatch_error,
                                    **slkwargs)

    distr_est = MCNullDist(repeater,
                           tail='left',
                           measure=null_sl,
                           enable_ca=['dist_samples'])
    sl = sphere_gnbsearchlight(clf,
                               splt,
                               reuse_neighbors=True,
                               null_dist=distr_est,
                               postproc=mean_sample(),
                               errorfx=mean_mismatch_error,
                               **slkwargs)
    if __debug__:  # assert is done only without -O mode
        assert_raises(NotImplementedError, sl, ds)

    # "ad-hoc searchlights can't handle yet varying targets across partitions"
    if False:
        # after above limitation is removed -- enable
        sl_map = sl(ds)
        sl_null_prob = sl.ca.null_prob.samples.copy()

    mvpa2.seed(mvpa2._random_seed)
    ### 'normal' Searchlight
    clf = GNB()
    splt = NFoldPartitioner(cvtype=2, attr='chunks')
    repeater = Repeater(count=count)
    permutator = AttributePermutator('targets',
                                     limit={'partitions': 1},
                                     count=1)
    # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state
    # would be reused across all pprocesses
    null_cv = CrossValidation(clf,
                              ChainNode([splt, permutator],
                                        space=splt.get_space()),
                              postproc=mean_sample())
    null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs)
    distr_est_normal = MCNullDist(repeater,
                                  tail='left',
                                  measure=null_sl_normal,
                                  enable_ca=['dist_samples'])

    cv = CrossValidation(clf,
                         splt,
                         errorfx=mean_mismatch_error,
                         enable_ca=['stats'],
                         postproc=mean_sample())
    sl = sphere_searchlight(cv,
                            nproc=nproc,
                            null_dist=distr_est_normal,
                            **slkwargs)
    sl_map_normal = sl(ds)
    sl_null_prob_normal = sl.ca.null_prob.samples.copy()

    # For every feature -- we should get some variance in estimates In
    # case of failure they are all really close to each other (up to
    # numerical precision), so variance will be close to 0
    assert_array_less(
        -np.var(distr_est_normal.ca.dist_samples.samples[0], axis=1), -1e-5)
    for s in distr_est_normal.ca.dist_samples.samples[0]:
        ok_(len(np.unique(s)) > 1)
Beispiel #3
0
from mvpa2.testing.datasets import datasets

from mvpa2 import cfg
from mvpa2.base import externals
from mvpa2.clfs.stats import MCNullDist, FixedNullDist, NullDist
from mvpa2.generators.permutation import AttributePermutator
from mvpa2.datasets import Dataset
from mvpa2.measures.anova import OneWayAnova, CompoundOneWayAnova
from mvpa2.misc.fx import double_gamma_hrf, single_gamma_hrf
from mvpa2.measures.corrcoef import pearson_correlation

# Prepare few distributions to test
#kwargs = {'permutations':10, 'tail':'any'}
permutator = AttributePermutator('targets', count=30)
nulldist_sweep = [
    MCNullDist(permutator, tail='any'),
    MCNullDist(permutator, tail='right')
]

if externals.exists('scipy'):
    from mvpa2.support.stats import scipy
    from scipy.stats import f_oneway
    from mvpa2.clfs.stats import rv_semifrozen
    nulldist_sweep += [
        MCNullDist(permutator, scipy.stats.norm, tail='any'),
        MCNullDist(permutator, scipy.stats.norm, tail='right'),
        MCNullDist(permutator,
                   rv_semifrozen(scipy.stats.norm, loc=0),
                   tail='right'),
        MCNullDist(permutator, scipy.stats.expon, tail='right'),
        FixedNullDist(scipy.stats.norm(0, 10.0), tail='any'),
def do_searchlight(glm_dataset, radius, output_basename, with_null_prob=False, clf=LinearCSVMC(space='condition')):
    if(len(glob(output_basename+"*")) > 0):
        print "sl already ran"
        return
    splt = ChainNode([NFoldPartitioner(),Balancer(attr='condition',count=1,limit='partitions',apply_selection=True)],space='partitions')
    #splt = NFoldPartitioner()
    cv = CrossValidation(clf, splt,
                         errorfx=mean_match_accuracy,
                         enable_ca=['stats'], postproc=mean_sample())
    distr_est = []
    if with_null_prob:
        permutator = AttributePermutator('condition', count=100,
                                         limit='chunks')
        distr_est = MCNullDist(permutator, tail='left',
                               enable_ca=['dist_samples'])
        """
        repeater   = Repeater(count=100)
        permutator = AttributePermutator('condition', limit={'partitions': 1}, count=1)
        null_cv = CrossValidation(clf, ChainNode([splt, permutator],space=splt.get_space()),
                      postproc=mean_sample())
        null_sl = sphere_searchlight(null_cv, radius=radius, space='voxel_indices',
                         enable_ca=['roi_sizes'])
        distr_est = MCNullDist(repeater,tail='left', measure=null_sl,
                       enable_ca=['dist_samples'])

        sl = sphere_searchlight(cv, radius=radius, space='voxel_indices',
                                null_dist=distr_est,
                                enable_ca=['roi_sizes', 'roi_feature_ids']
                                # ,result_fx = _fill_in_scattered_results # average across all spheres
                                )
        """
    else:
        kwa = {'voxel_indices': KNNNeighbourhood(radius, glm_dataset.fa['voxel_indices'])}
        qe = IndexQueryEngine(**kwa)
        # init the searchlight with the queryengine
        sl = Searchlight(cv, queryengine=qe, roi_ids=None,
                       enable_ca=['roi_sizes', 'roi_feature_ids']
                       # ,results_fx = _fill_in_scattered_results # average across all spheres
                          )
       #;v sl = sphere_searchlight(cv, radius=radius, space='voxel_indices',

                                # ,result_fx = _fill_in_scattered_results # average across all spheres
                               # )
    # ds = glm_dataset.copy(deep=False,
    #		       sa=['condition','chunks'],
    #		       fa=['voxel_indices'],
    #		       a=['mapper'])
    from datetime import datetime
    print "starting sl {}".format(datetime.now())
    sl_map = sl(glm_dataset)
    print "finished sl {}".format(datetime.now())
    import pickle
    pickle.dump(sl_map, open("{}_sl_map.p".format(output_basename), "wb"))
#    pickle.dump(sl.ca.roi_feature_ids, open("{}_sl_feature_ids.p".format(output_basename), "wb"))
#    print len(sl.ca.roi_feature_ids[0])
    acc_results = map2nifti(sl_map,
                           imghdr=glm_dataset.a.imghdr)
    acc_nii_filename = '{}-acc.nii.gz'.format(output_basename)
    acc_results.to_filename(acc_nii_filename)
    sl_map.samples *= -1
    sl_map.samples += 1
    niftiresults = map2nifti(sl_map,
                             imghdr=glm_dataset.a.imghdr)
    niftiresults.to_filename('{}-err.nii.gz'.format(output_basename))
    # TODO: check p value map
    if with_null_prob:
        nullt_results = map2nifti(sl_map, data=sl.ca.null_t,
                                  imghdr=glm_dataset.a.imghdr)
        nullt_results.to_filename('{}-t.nii.gz'.format(output_basename))
        nullprob_results = map2nifti(sl_map, data=sl.ca.null_prob,
                                     imghdr=glm_dataset.a.imghdr)
        nullprob_results.to_filename('{}-prob.nii.gz'.format(output_basename))
        nullprob_results = map2nifti(sl_map, data=distr_est.cdf(sl_map.samples),
                                     imghdr=glm_dataset.a.imghdr)
        nullprob_results.to_filename('{}-cdf.nii.gz'.format(output_basename))
    return sl_map
Beispiel #5
0
def get_crossvalidation_instance(learner,
                                 partitioner,
                                 errorfx,
                                 sampling_repetitions=1,
                                 learner_space='targets',
                                 balance_training=None,
                                 permutations=0,
                                 avg_datafold_results=True,
                                 prob_tail='left'):
    from mvpa2.base.node import ChainNode
    from mvpa2.measures.base import CrossValidation
    if not balance_training is None:
        # balance training data
        try:
            amount = int(balance_training)
        except ValueError:
            try:
                amount = float(balance_training)
            except ValueError:
                amount = balance_training
        from mvpa2.generators.resampling import Balancer
        balancer = Balancer(amount=amount,
                            attr=learner_space,
                            count=sampling_repetitions,
                            limit={partitioner.get_space(): 1},
                            apply_selection=True,
                            include_offlimit=True)
    else:
        balancer = None
    # set learner space
    learner.set_space(learner_space)
    # setup generator for data folding -- put in a chain node for easy
    # amending
    gennode = ChainNode([partitioner], space=partitioner.get_space())
    if avg_datafold_results:
        from mvpa2.mappers.fx import mean_sample
        postproc = mean_sample()
    else:
        postproc = None
    if not balancer is None:
        # enable balancing step for each partitioning step
        gennode.append(balancer)
    if permutations > 0:
        from mvpa2.generators.base import Repeater
        from mvpa2.generators.permutation import AttributePermutator
        from mvpa2.clfs.stats import MCNullDist
        # how often do we want to shuffle the data
        repeater = Repeater(count=permutations)
        # permute the training part of a dataset exactly ONCE
        permutator = AttributePermutator(learner_space,
                                         limit={partitioner.get_space(): 1},
                                         count=1)
        # CV with null-distribution estimation that permutes the training data for
        # each fold independently
        perm_gen_node = copy.deepcopy(gennode)
        perm_gen_node.append(permutator)
        null_cv = CrossValidation(learner,
                                  perm_gen_node,
                                  postproc=postproc,
                                  errorfx=errorfx)
        # Monte Carlo distribution estimator
        distr_est = MCNullDist(repeater,
                               tail=prob_tail,
                               measure=null_cv,
                               enable_ca=['dist_samples'])
        # pass the p-values as feature attributes on to the results
        pass_attr = [('ca.null_prob', 'fa', 1)]
    else:
        distr_est = None
        pass_attr = None
    # final CV node
    cv = CrossValidation(learner,
                         gennode,
                         errorfx=errorfx,
                         null_dist=distr_est,
                         postproc=postproc,
                         enable_ca=['stats', 'null_prob'],
                         pass_attr=pass_attr)
    return cv
Beispiel #6
0
def do_searchlight(glm_dataset, radius, output_basename, with_null_prob=False):
    clf = LinearCSVMC(space='condition')
    #		clf = RbfCSVMC(C=5.0)
    splt = NFoldPartitioner()
    cv = CrossValidation(clf,
                         splt,
                         errorfx=mean_match_accuracy,
                         enable_ca=['stats'],
                         postproc=mean_sample())
    distr_est = []
    if with_null_prob:
        permutator = AttributePermutator('condition',
                                         count=100,
                                         limit='chunks')
        distr_est = MCNullDist(permutator,
                               tail='left',
                               enable_ca=['dist_samples'])
        """
		repeater   = Repeater(count=100)
		permutator = AttributePermutator('condition', limit={'partitions': 1}, count=1) 
		null_cv = CrossValidation(clf, ChainNode([splt, permutator],space=splt.get_space()),
					  postproc=mean_sample())
		null_sl = sphere_searchlight(null_cv, radius=radius, space='voxel_indices',
					     enable_ca=['roi_sizes'])
		distr_est = MCNullDist(repeater,tail='left', measure=null_sl,
				       enable_ca=['dist_samples'])
		"""
        sl = sphere_searchlight(cv,
                                radius=radius,
                                space='voxel_indices',
                                null_dist=distr_est,
                                enable_ca=['roi_sizes', 'roi_feature_ids'])
    else:

        sl = sphere_searchlight(cv,
                                radius=radius,
                                space='voxel_indices',
                                enable_ca=['roi_sizes', 'roi_feature_ids'])
    #ds = glm_dataset.copy(deep=False,
    #		       sa=['condition','chunks'],
    #		       fa=['voxel_indices'],
    #		       a=['mapper'])
    #debug.active += ["SLC"]
    sl_map = sl(glm_dataset)
    errresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr)
    errresults.to_filename('{}-acc.nii.gz'.format(output_basename))
    sl_map.samples *= -1
    sl_map.samples += 1
    niftiresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr)
    niftiresults.to_filename('{}-err.nii.gz'.format(output_basename))
    #TODO: save p value map
    if with_null_prob:
        nullt_results = map2nifti(sl_map,
                                  data=sl.ca.null_t,
                                  imghdr=glm_dataset.a.imghdr)
        nullt_results.to_filename('{}-t.nii.gz'.format(output_basename))
        nullprob_results = map2nifti(sl_map,
                                     data=sl.ca.null_prob,
                                     imghdr=glm_dataset.a.imghdr)
        nullprob_results.to_filename('{}-prob.nii.gz'.format(output_basename))
        nullprob_results = map2nifti(sl_map,
                                     data=distr_est.cdf(sl_map.samples),
                                     imghdr=glm_dataset.a.imghdr)
        nullprob_results.to_filename('{}-cdf.nii.gz'.format(output_basename))