def test_partial_searchlight_with_confusion_matrix(self): ds = self.dataset from mvpa2.clfs.stats import MCNullDist from mvpa2.mappers.fx import mean_sample, sum_sample # compute N-1 cross-validation for each sphere cm = ConfusionMatrix(labels=ds.UT) cv = CrossValidation( sample_clf_lin, NFoldPartitioner(), # we have to assure that matrix does not get flatted by # first vstack in cv and then hstack in searchlight -- # thus 2 leading dimensions # TODO: RF? make searchlight/crossval smarter? errorfx=lambda *a: cm(*a)[None, None, :]) # contruct diameter 2 (or just radius 1) searchlight sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50]) # our regular searchlight -- to compare results cv_gross = CrossValidation(sample_clf_lin, NFoldPartitioner()) sl_gross = sphere_searchlight(cv_gross, radius=1, center_ids=[3, 5, 50]) # run searchlights res = sl(ds) res_gross = sl_gross(ds) # only two spheres but error for all CV-folds and complete confusion matrix assert_equal(res.shape, (len(ds.UC), 3, len(ds.UT), len(ds.UT))) assert_equal(res_gross.shape, (len(ds.UC), 3)) # briefly inspect the confusion matrices mat = res.samples # since input dataset is probably balanced (otherwise adjust # to be per label): sum within columns (thus axis=-2) should # be identical to per-class/chunk number of samples samples_per_classchunk = len(ds) / (len(ds.UT) * len(ds.UC)) ok_(np.all(np.sum(mat, axis=-2) == samples_per_classchunk)) # and if we compute accuracies manually -- they should # correspond to the one from sl_gross assert_array_almost_equal( res_gross.samples, # from accuracies to errors 1 - (mat[..., 0, 0] + mat[..., 1, 1]).astype(float) / (2 * samples_per_classchunk)) # and now for those who remained sited -- lets perform H0 MC # testing of this searchlight... just a silly one with minimal # number of permutations no_permutations = 10 permutator = AttributePermutator('targets', count=no_permutations) # once again -- need explicit leading dimension to avoid # vstacking during cross-validation cv.postproc = lambda x: sum_sample()(x)[None, :] sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50], null_dist=MCNullDist( permutator, tail='right', enable_ca=['dist_samples'])) res_perm = sl(ds) # XXX all of the res_perm, sl.ca.null_prob and # sl.null_dist.ca.dist_samples carry a degenerate leading # dimension which was probably due to introduced new axis # above within cv.postproc assert_equal(res_perm.shape, (1, 3, 2, 2)) assert_equal(sl.null_dist.ca.dist_samples.shape, res_perm.shape + (no_permutations, )) assert_equal(sl.ca.null_prob.shape, res_perm.shape) # just to make sure ;) ok_(np.all(sl.ca.null_prob.samples >= 0)) ok_(np.all(sl.ca.null_prob.samples <= 1)) # we should have got sums of hits across the splits assert_array_equal(np.sum(mat, axis=0), res_perm.samples[0])
def test_gnbsearchlight_permutations(): import mvpa2 from mvpa2.base.node import ChainNode from mvpa2.clfs.gnb import GNB from mvpa2.generators.base import Repeater from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner #import mvpa2.generators.permutation #reload(mvpa2.generators.permutation) from mvpa2.generators.permutation import AttributePermutator from mvpa2.testing.datasets import datasets from mvpa2.measures.base import CrossValidation from mvpa2.measures.gnbsearchlight import sphere_gnbsearchlight from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.mappers.fx import mean_sample from mvpa2.misc.errorfx import mean_mismatch_error from mvpa2.clfs.stats import MCNullDist from mvpa2.testing.tools import assert_raises, ok_, assert_array_less # mvpa2.debug.active = ['APERM', 'SLC'] #, 'REPM'] # mvpa2.debug.metrics += ['pid'] count = 10 nproc = 1 + int(mvpa2.externals.exists('pprocess')) ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace slkwargs = dict(radius=3, space='voxel_indices', enable_ca=['roi_sizes'], center_ids=[1, 10, 70, 100]) mvpa2.seed(mvpa2._random_seed) clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) null_sl = sphere_gnbsearchlight(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) distr_est = MCNullDist(repeater, tail='left', measure=null_sl, enable_ca=['dist_samples']) sl = sphere_gnbsearchlight(clf, splt, reuse_neighbors=True, null_dist=distr_est, postproc=mean_sample(), errorfx=mean_mismatch_error, **slkwargs) if __debug__: # assert is done only without -O mode assert_raises(NotImplementedError, sl, ds) # "ad-hoc searchlights can't handle yet varying targets across partitions" if False: # after above limitation is removed -- enable sl_map = sl(ds) sl_null_prob = sl.ca.null_prob.samples.copy() mvpa2.seed(mvpa2._random_seed) ### 'normal' Searchlight clf = GNB() splt = NFoldPartitioner(cvtype=2, attr='chunks') repeater = Repeater(count=count) permutator = AttributePermutator('targets', limit={'partitions': 1}, count=1) # rng=np.random.RandomState(0)) # to trigger failure since the same np.random state # would be reused across all pprocesses null_cv = CrossValidation(clf, ChainNode([splt, permutator], space=splt.get_space()), postproc=mean_sample()) null_sl_normal = sphere_searchlight(null_cv, nproc=nproc, **slkwargs) distr_est_normal = MCNullDist(repeater, tail='left', measure=null_sl_normal, enable_ca=['dist_samples']) cv = CrossValidation(clf, splt, errorfx=mean_mismatch_error, enable_ca=['stats'], postproc=mean_sample()) sl = sphere_searchlight(cv, nproc=nproc, null_dist=distr_est_normal, **slkwargs) sl_map_normal = sl(ds) sl_null_prob_normal = sl.ca.null_prob.samples.copy() # For every feature -- we should get some variance in estimates In # case of failure they are all really close to each other (up to # numerical precision), so variance will be close to 0 assert_array_less( -np.var(distr_est_normal.ca.dist_samples.samples[0], axis=1), -1e-5) for s in distr_est_normal.ca.dist_samples.samples[0]: ok_(len(np.unique(s)) > 1)
from mvpa2.testing.datasets import datasets from mvpa2 import cfg from mvpa2.base import externals from mvpa2.clfs.stats import MCNullDist, FixedNullDist, NullDist from mvpa2.generators.permutation import AttributePermutator from mvpa2.datasets import Dataset from mvpa2.measures.anova import OneWayAnova, CompoundOneWayAnova from mvpa2.misc.fx import double_gamma_hrf, single_gamma_hrf from mvpa2.measures.corrcoef import pearson_correlation # Prepare few distributions to test #kwargs = {'permutations':10, 'tail':'any'} permutator = AttributePermutator('targets', count=30) nulldist_sweep = [ MCNullDist(permutator, tail='any'), MCNullDist(permutator, tail='right') ] if externals.exists('scipy'): from mvpa2.support.stats import scipy from scipy.stats import f_oneway from mvpa2.clfs.stats import rv_semifrozen nulldist_sweep += [ MCNullDist(permutator, scipy.stats.norm, tail='any'), MCNullDist(permutator, scipy.stats.norm, tail='right'), MCNullDist(permutator, rv_semifrozen(scipy.stats.norm, loc=0), tail='right'), MCNullDist(permutator, scipy.stats.expon, tail='right'), FixedNullDist(scipy.stats.norm(0, 10.0), tail='any'),
def do_searchlight(glm_dataset, radius, output_basename, with_null_prob=False, clf=LinearCSVMC(space='condition')): if(len(glob(output_basename+"*")) > 0): print "sl already ran" return splt = ChainNode([NFoldPartitioner(),Balancer(attr='condition',count=1,limit='partitions',apply_selection=True)],space='partitions') #splt = NFoldPartitioner() cv = CrossValidation(clf, splt, errorfx=mean_match_accuracy, enable_ca=['stats'], postproc=mean_sample()) distr_est = [] if with_null_prob: permutator = AttributePermutator('condition', count=100, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) """ repeater = Repeater(count=100) permutator = AttributePermutator('condition', limit={'partitions': 1}, count=1) null_cv = CrossValidation(clf, ChainNode([splt, permutator],space=splt.get_space()), postproc=mean_sample()) null_sl = sphere_searchlight(null_cv, radius=radius, space='voxel_indices', enable_ca=['roi_sizes']) distr_est = MCNullDist(repeater,tail='left', measure=null_sl, enable_ca=['dist_samples']) sl = sphere_searchlight(cv, radius=radius, space='voxel_indices', null_dist=distr_est, enable_ca=['roi_sizes', 'roi_feature_ids'] # ,result_fx = _fill_in_scattered_results # average across all spheres ) """ else: kwa = {'voxel_indices': KNNNeighbourhood(radius, glm_dataset.fa['voxel_indices'])} qe = IndexQueryEngine(**kwa) # init the searchlight with the queryengine sl = Searchlight(cv, queryengine=qe, roi_ids=None, enable_ca=['roi_sizes', 'roi_feature_ids'] # ,results_fx = _fill_in_scattered_results # average across all spheres ) #;v sl = sphere_searchlight(cv, radius=radius, space='voxel_indices', # ,result_fx = _fill_in_scattered_results # average across all spheres # ) # ds = glm_dataset.copy(deep=False, # sa=['condition','chunks'], # fa=['voxel_indices'], # a=['mapper']) from datetime import datetime print "starting sl {}".format(datetime.now()) sl_map = sl(glm_dataset) print "finished sl {}".format(datetime.now()) import pickle pickle.dump(sl_map, open("{}_sl_map.p".format(output_basename), "wb")) # pickle.dump(sl.ca.roi_feature_ids, open("{}_sl_feature_ids.p".format(output_basename), "wb")) # print len(sl.ca.roi_feature_ids[0]) acc_results = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr) acc_nii_filename = '{}-acc.nii.gz'.format(output_basename) acc_results.to_filename(acc_nii_filename) sl_map.samples *= -1 sl_map.samples += 1 niftiresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr) niftiresults.to_filename('{}-err.nii.gz'.format(output_basename)) # TODO: check p value map if with_null_prob: nullt_results = map2nifti(sl_map, data=sl.ca.null_t, imghdr=glm_dataset.a.imghdr) nullt_results.to_filename('{}-t.nii.gz'.format(output_basename)) nullprob_results = map2nifti(sl_map, data=sl.ca.null_prob, imghdr=glm_dataset.a.imghdr) nullprob_results.to_filename('{}-prob.nii.gz'.format(output_basename)) nullprob_results = map2nifti(sl_map, data=distr_est.cdf(sl_map.samples), imghdr=glm_dataset.a.imghdr) nullprob_results.to_filename('{}-cdf.nii.gz'.format(output_basename)) return sl_map
def get_crossvalidation_instance(learner, partitioner, errorfx, sampling_repetitions=1, learner_space='targets', balance_training=None, permutations=0, avg_datafold_results=True, prob_tail='left'): from mvpa2.base.node import ChainNode from mvpa2.measures.base import CrossValidation if not balance_training is None: # balance training data try: amount = int(balance_training) except ValueError: try: amount = float(balance_training) except ValueError: amount = balance_training from mvpa2.generators.resampling import Balancer balancer = Balancer(amount=amount, attr=learner_space, count=sampling_repetitions, limit={partitioner.get_space(): 1}, apply_selection=True, include_offlimit=True) else: balancer = None # set learner space learner.set_space(learner_space) # setup generator for data folding -- put in a chain node for easy # amending gennode = ChainNode([partitioner], space=partitioner.get_space()) if avg_datafold_results: from mvpa2.mappers.fx import mean_sample postproc = mean_sample() else: postproc = None if not balancer is None: # enable balancing step for each partitioning step gennode.append(balancer) if permutations > 0: from mvpa2.generators.base import Repeater from mvpa2.generators.permutation import AttributePermutator from mvpa2.clfs.stats import MCNullDist # how often do we want to shuffle the data repeater = Repeater(count=permutations) # permute the training part of a dataset exactly ONCE permutator = AttributePermutator(learner_space, limit={partitioner.get_space(): 1}, count=1) # CV with null-distribution estimation that permutes the training data for # each fold independently perm_gen_node = copy.deepcopy(gennode) perm_gen_node.append(permutator) null_cv = CrossValidation(learner, perm_gen_node, postproc=postproc, errorfx=errorfx) # Monte Carlo distribution estimator distr_est = MCNullDist(repeater, tail=prob_tail, measure=null_cv, enable_ca=['dist_samples']) # pass the p-values as feature attributes on to the results pass_attr = [('ca.null_prob', 'fa', 1)] else: distr_est = None pass_attr = None # final CV node cv = CrossValidation(learner, gennode, errorfx=errorfx, null_dist=distr_est, postproc=postproc, enable_ca=['stats', 'null_prob'], pass_attr=pass_attr) return cv
def do_searchlight(glm_dataset, radius, output_basename, with_null_prob=False): clf = LinearCSVMC(space='condition') # clf = RbfCSVMC(C=5.0) splt = NFoldPartitioner() cv = CrossValidation(clf, splt, errorfx=mean_match_accuracy, enable_ca=['stats'], postproc=mean_sample()) distr_est = [] if with_null_prob: permutator = AttributePermutator('condition', count=100, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) """ repeater = Repeater(count=100) permutator = AttributePermutator('condition', limit={'partitions': 1}, count=1) null_cv = CrossValidation(clf, ChainNode([splt, permutator],space=splt.get_space()), postproc=mean_sample()) null_sl = sphere_searchlight(null_cv, radius=radius, space='voxel_indices', enable_ca=['roi_sizes']) distr_est = MCNullDist(repeater,tail='left', measure=null_sl, enable_ca=['dist_samples']) """ sl = sphere_searchlight(cv, radius=radius, space='voxel_indices', null_dist=distr_est, enable_ca=['roi_sizes', 'roi_feature_ids']) else: sl = sphere_searchlight(cv, radius=radius, space='voxel_indices', enable_ca=['roi_sizes', 'roi_feature_ids']) #ds = glm_dataset.copy(deep=False, # sa=['condition','chunks'], # fa=['voxel_indices'], # a=['mapper']) #debug.active += ["SLC"] sl_map = sl(glm_dataset) errresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr) errresults.to_filename('{}-acc.nii.gz'.format(output_basename)) sl_map.samples *= -1 sl_map.samples += 1 niftiresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr) niftiresults.to_filename('{}-err.nii.gz'.format(output_basename)) #TODO: save p value map if with_null_prob: nullt_results = map2nifti(sl_map, data=sl.ca.null_t, imghdr=glm_dataset.a.imghdr) nullt_results.to_filename('{}-t.nii.gz'.format(output_basename)) nullprob_results = map2nifti(sl_map, data=sl.ca.null_prob, imghdr=glm_dataset.a.imghdr) nullprob_results.to_filename('{}-prob.nii.gz'.format(output_basename)) nullprob_results = map2nifti(sl_map, data=distr_est.cdf(sl_map.samples), imghdr=glm_dataset.a.imghdr) nullprob_results.to_filename('{}-cdf.nii.gz'.format(output_basename))