def _test_mcasey20120222(): # pragma: no cover # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html # This one is conditioned on allowing # of samples to be changed # by the mapper provided to MappedClassifier. See # https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples import numpy as np from mvpa2.datasets.base import dataset_wizard from mvpa2.generators.partition import NFoldPartitioner from mvpa2.mappers.base import ChainMapper from mvpa2.mappers.svd import SVDMapper from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.svm import LinearCSVMC from mvpa2.clfs.meta import MappedClassifier from mvpa2.measures.base import CrossValidation mapper = ChainMapper([mean_group_sample(['targets','chunks']), SVDMapper()]) clf = MappedClassifier(LinearCSVMC(), mapper) cvte = CrossValidation(clf, NFoldPartitioner(), enable_ca=['repetition_results', 'stats']) ds = dataset_wizard( samples=np.arange(32).reshape((8, -1)), targets=[1, 1, 2, 2, 1, 1, 2, 2], chunks=[1, 1, 1, 1, 2, 2, 2, 2]) errors = cvte(ds)
def main(subject, study_dir, mask, suffix='_stim2'): from mvpa2.mappers.zscore import zscore from mvpa2.mappers.fx import mean_group_sample from wikisim import mvpa # load subject data sp = su.SubjPath(subject, study_dir) vols = task.prex_vols(sp.path('behav', 'log')) # load fmri data ds = mvpa.load_prex_beta(sp, suffix, mask, verbose=1) # zscore ds.sa['run'] = vols.run.values zscore(ds, chunks_attr='run') # average over item presentations ds.sa['itemno'] = vols.itemno.to_numpy() m = mean_group_sample(['itemno']) dsm = ds.get_mapped(m) m_vols = vols.groupby('itemno', as_index=False).mean() # save data samples and corresponding volume information res_dir = os.path.join(sp.study_dir, 'batch', 'glm', 'prex' + suffix, 'roi', mask) if not os.path.exists(res_dir): os.makedirs(res_dir) mat_file = os.path.join(res_dir, f'pattern_{subject}.txt') tab_file = os.path.join(res_dir, f'pattern_{subject}.csv') np.savetxt(mat_file, dsm.samples) m_vols.to_csv(tab_file)
def get_fake_data(nsubjects=20, noise_level=0.2, nbogus_classes=0): orig_ds = mean_group_sample(['targets'])(testing_datasets['uni3large']) # and creating an additional target which is a composition of the other two, so # it should be closer to them than to the left out L2 classes_data = [ orig_ds.samples, orig_ds[0].samples + orig_ds[1].samples, orig_ds[1].samples + 4 * orig_ds[2].samples ] classes_targets = list(orig_ds.T) + ['L0+1', 'L1+4*2'] if nbogus_classes: classes_data.append( np.zeros((nbogus_classes, classes_data[0].shape[1]), dtype=float)) classes_targets += ['B%02d' % i for i in xrange(nbogus_classes)] proto_ds = dataset_wizard(np.vstack(classes_data), targets=classes_targets) ntargets = len(proto_ds.UT) dss = [] for i in xrange(nsubjects): R = get_random_rotation(proto_ds.nfeatures) ds = dataset_wizard(np.dot(proto_ds.samples, R), targets=proto_ds.T) #ds = dataset_wizard(proto_ds.samples, targets=proto_ds.T) ds.sa['subjects'] = [i] # And select a varying number of features ds = ds[:, :np.random.randint(10, ds.nfeatures)] # Add some noise ds.samples += np.random.normal(size=ds.shape) * noise_level dss.append(ds) return dss
def test_PDistTargetSimilaritySearchlight(): # Test ability to use PDistTargetSimilarity in a searchlight from mvpa2.testing.datasets import datasets from mvpa2.mappers.fx import mean_group_sample from mvpa2.mappers.shape import TransposeMapper from mvpa2.measures.searchlight import sphere_searchlight ds = datasets['3dsmall'][:, :3] ds.fa['voxel_indices'] = ds.fa.myspace # use chunks values (4 of them) for targets ds.sa['targets'] = ds.sa.chunks ds = mean_group_sample(['chunks'])(ds) tdsm = np.arange(6) # We can run on full dataset tdcm1 = PDistTargetSimilarity(tdsm) a1 = tdcm1(ds) assert_array_equal(a1.fa.metrics, ['rho', 'p']) tdcm1_rho = PDistTargetSimilarity(tdsm, corrcoef_only=True) sl_rho = sphere_searchlight(tdcm1_rho)(ds) assert_array_equal(sl_rho.shape, (1, ds.nfeatures)) # now with both but we need to transpose datasets tdcm1_both = PDistTargetSimilarity(tdsm, postproc=TransposeMapper()) sl_both = sphere_searchlight(tdcm1_both)(ds) assert_array_equal(sl_both.shape, (2, ds.nfeatures)) assert_array_equal(sl_both.sa.metrics, ['rho', 'p']) # rho must be exactly the same assert_array_equal(sl_both.samples[0], sl_rho.samples[0]) # just because we are here and we can # Actually here for some reason assert_array_lequal gave me a trouble assert_true(np.all(sl_both.samples[1] <= 1.0)) assert_true(np.all(0 <= sl_both.samples[1]))
def _prepare_ds(self, ds): if self.params.sattr is not None: mgs = mean_group_sample(attrs=self.params.sattr) ds_ = mgs(ds) else: ds_ = ds.copy() return ds_
def _test_gideon_weird_case(self): """'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. As it is now, CrossValidation on MappedClassifier would not work observations: chance distribution obviously gets wide, but also gets skewed to anti-learning on nfolds like 4. """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN clf = kNN() print "HERE" ds = datasets['uni2large'].copy() ds = ds[ds.sa.chunks < 9] accs = [] for i in xrange(10): # # of random samples ds.samples = np.random.randn(*ds.shape) if False: # this would have been a native way IF we allowed change of number of samples clf2 = MappedClassifier(clf=kNN(), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(4), postproc=None, enable_ca=['stats']) print cv(ds) else: from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(6) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(clf, Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca = ['stats'] ) for part in partitioner.generate(ds): ds_meaned = meaner(part) error = np.asscalar(te(ds_meaned)) cm += te.ca.stats print i, cm.stats['ACC'] accs.append(cm.stats['ACC'])
def get_dissim_roi(subnr): ds = h5load(fns.betafn(subnr)) ds = ds[:, mask_] ds = ds[ds.sa.condition != 'self'] zscore(ds, chunks_attr='chunks') ds = mean_group_sample(['condition'])(ds) names = [] dissims = [] for roi, (center, ids) in rois.iteritems(): names.append(roi) sample_roi = ds.samples[:, ids] dissim_roi = pdist(sample_roi, 'correlation') dissims.append(dissim_roi) dss = dataset_wizard(dissims, targets=names) return dss
def _call(self,dataset): data = dataset.samples if self.center_data: data = data - np.mean(data,0) #compute comparison sample mgs = mean_group_sample(['targets'])(dataset) comp_sample_data = mgs[mgs.sa['targets'] == self.comparison_sample] #omit all samples from comparison_sample target condition dataset = dataset[dataset.sa.targets != self.comparison_sample] #calculate sample attribute of distance between sample and comparison_sample (corr coef and p value) dataset.sa['sample_comp_dist_r'] = [pearsonr(s.samples[0],comp_sample_data.samples[0])[0] for s in dataset] dataset.sa['sample_comp_dist_p'] = [pearsonr(s.samples[0],comp_sample_data.samples[0])[1] for s in dataset] rho, p = pearsonr(dataset.sa['sample_comp_dist_r'],dataset.sa[self.sample_covariable]) if self.corrcoef_only: return Dataset(np.array([rho,])) else: return Dataset(np.array([rho,p]))
def _call(self,dataset): data = dataset.samples if self.center_data: data = data - np.mean(data,0) #compute comparison sample comp_samps = mean_group_sample(['targets'])(dataset) #omit all samples from comparison_sample target conditions for om in self.targs_comps.values(): dataset = dataset[dataset.sa.targets != om] #calculate sample attribute of distance between sample and comparison_sample (corr coef and p value) dataset.sa['sample_comp_dist_r'] = [pearsonr(s.samples[0],comp_samps[comp_samps.sa.targets == self.targs_comps[s.sa.targets[0]]].samples[0])[0] for s in dataset] dataset.sa['sample_comp_dist_p'] = [pearsonr(s.samples[0],comp_samps[comp_samps.sa.targets == self.targs_comps[s.sa.targets[0]]].samples[0])[1] for s in dataset] #calculate final correlations rho, p = pearsonr(dataset.sa['sample_comp_dist_r'],dataset.sa[self.sample_covariable]) if self.corrcoef_only: return Dataset(np.array([rho,])) else: return Dataset(np.array([rho,]))
def _call(self,dataset): #compute comparison sample ds = mean_group_sample(['targets'])(dataset) # Get neural dissim b/w pairs of targets pairsim = dict((pair[0]+'-'+pair[1],(1 - pearsonr(ds[ds.sa.targets == pair[0]].samples[0], ds[ds.sa.targets == pair[1]].samples[0])[0])) for pair in self.pairs) #Order DMs... pairs_dsm_o = OrderedDict(sorted(self.pairs_dsm.items())).values() pairsim_o = OrderedDict(sorted(pairsim.items())).values() #RSA if self.comparison_metric == 'spearman': res = np.arctanh(pearsonr(rankdata(pairs_dsm_o),rankdata(pairsim_o))[0]) elif self.comparison_metric == 'pearson': res = np.arctanh(pearsonr(pairs_dsm_o,pairsim_o)[0]) elif self.comparison_metric == 'euclidean': res = pdist(np.vstack([self.pairs_dsm,pairsim_vals])) res = np.round((-1 * res) + 2) #why? return Dataset(np.array([res,]))
def label_examples(mri_data, beha_pkldat): # extract volume time-stamps from fMRI dataset (pymvpa2 Dataset) vol_times = mri_data.sa.time_coords # extract stimulus information from psychopy files (pandas DataFrame) onsets = beha_pkldat['TrialOnset'].values if 'trials_1.thisTrialN' in beha_pkldat: trials = beha_pkldat['trials_1.thisTrialN'].values else: trials = beha_pkldat['trials_2.thisTrialN'].values memory_status = beha_pkldat['condition'].values """ NOW CALLS THE FUNCION label_trials - which labes relevant TRs with trialnumber and memory_status name """ mri_data.sa['trials'] = label_trials(onsets,trials,vol_times) mri_data.sa['targets'] = label_trials(onsets,memory_status,vol_times) # remove volumes that are of no interest to us mri_data = mri_data[mri_data.sa.targets != '_no-use_'] #if take_mean is True then use mean of volumes as examples #print [t for t in mri_data.samples[4:8]] #print mri_data.sa.targets #print mri_data.shape mri_data=mri_data.get_mapped(mean_group_sample(['targets', 'trials'], order = 'occurrence')) #print [t for t in mri_data.samples[2]] print mri_data.sa.targets print mri_data.sa.trials print vol_times# = mri_data.sa.time_coords print mri_data.shape """ IMPORTANT CHECK """ print mri_data.summary() return mri_data
def test_gideon_weird_case(self): """Test if MappedClassifier could handle a mapper altering number of samples 'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. It is a peculiar scenario which pin points the problem that so far mappers assumed not to change number of samples """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN from mvpa2.mappers.base import ChainMapper ds = datasets['uni2large'].copy() #ds = ds[ds.sa.chunks < 9] accs = [] k = 1 # for kNN nf = 1 # for NFoldPartitioner for i in xrange(1): # # of random runs ds.samples = np.random.randn(*ds.shape) # # There are 3 ways to accomplish needed goal # # 0. Hard way: overcome the problem by manually # pre-splitting/meaning in a loop from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(nf) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(kNN(k), Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca = ['stats'] ) errors = [] for part in partitioner.generate(ds): ds_meaned = meaner(part) errors.append(np.asscalar(te(ds_meaned))) cm += te.ca.stats #print i, cm.stats['ACC'] accs.append(cm.stats['ACC']) if False: # not yet working -- see _tent/allow_ch_nsamples # branch for attempt to make it work # 1. This is a "native way" IF we allow change of number # of samples via _call to be done by MappedClassifier # while operating solely on the mapped dataset clf2 = MappedClassifier(clf=kNN(k), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(nf), postproc=None, enable_ca=['stats']) # meaning all should be ok since we should have ballanced # sets across all chunks here errors_native = cv(ds) self.assertEqual(np.max(np.abs(errors_native.samples[:,0] - errors)), 0) # 2. Work without fixes to MappedClassifier allowing # change of # of samples # # CrossValidation will operate on a chain mapper which # would perform necessary meaning first before dealing with # kNN cons: .stats would not be exposed since ChainMapper # doesn't expose them from ChainMapper (yet) if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active: raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper") cv2 = CrossValidation(ChainMapper([mean_group_sample(['targets', 'partitions']), kNN(k)], space='targets'), NFoldPartitioner(nf), postproc=None) errors_native2 = cv2(ds) self.assertEqual(np.max(np.abs(errors_native2.samples[:,0] - errors)), 0) # All of the ways should provide the same results #print i, np.max(np.abs(errors_native.samples[:,0] - errors)), \ # np.max(np.abs(errors_native2.samples[:,0] - errors)) if False: # just to investigate the distribution if we have enough iterations import pylab as pl uaccs = np.unique(accs) step = np.asscalar(np.unique(np.round(uaccs[1:] - uaccs[:-1], 4))) bins = np.linspace(0., 1., np.round(1./step+1)) xx = pl.hist(accs, bins=bins, align='left') pl.xlim((0. - step/2, 1.+step/2))
def __init__(self, attributes, **kwargs): self.node = mean_group_sample(attributes) Transformer.__init__(self, name='sample_averager', **kwargs)
def timesegments_classification( dss, hyper=None, part1=HalfPartitioner(), part2=NFoldPartitioner(attr='subjects'), window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. hyper : Hyperalignment-like, optional Beast which if called on a list of datasets should spit out trained mappers. If not specified, `IdentityMapper`s will be used part1 : Partitioner, optional Partitioner to split data for hyperalignment "cross-validation" part2 : Partitioner, optional Partitioner for CV within the hyperalignment test split window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ # Generate outer-most partitioning () parts = [copy.deepcopy(part1).generate(ds) for ds in dss] iter = 1 errors = [] while True: try: dss_partitioned = [p.next() for p in parts] except StopIteration: # we are done -- no more partitions break if __debug__: debug("BM", "Iteration %d", iter) dss_train, dss_test = zip(*[list(Splitter("partitions").generate(ds)) for ds in dss_partitioned]) # TODO: allow for doing feature selection if do_zscore: for ds in dss_train + dss_test: zscore(ds, chunks_attr=None) if hyper is not None: # since otherwise it would remember previous loop dataset as the "commonspace" # Now let's do hyperalignment but on a copy in each loop iteration hyper_ = copy.deepcopy(hyper) mappers = hyper_(dss_train) else: mappers = [IdentityMapper() for ds in dss_train] dss_test_aligned = [mapper.forward(ds) for mapper, ds in zip(mappers, dss_test)] # assign .sa.subjects to those datasets for i, ds in enumerate(dss_test_aligned): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss_test_aligned: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list(Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert(ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors.append(errors_across_subjects) iter += 1 errors = np.array(errors) if __debug__: debug("BM", "Finished with %s array of errors. Mean error %.2f" % (errors.shape, np.mean(errors))) return errors
def group_sample_loser_measure(attrs=('targets',)): '''takes loser after meaning over attrs''' return ChainNode((mean_group_sample(attrs), sample_loser_measure()))
else: print('partitioning ...') idxs_train, idxs_test = utils.get_train_test_splits( dataset, label_map, n_splits) if utils.check_train_test_splits(idxs_test): idxs_train, idxs_test = utils.get_train_test_splits( dataset, label_map, n_splits) for word2vec_name, word2vec_features in zip( word2vec_names, word2vec_vecs): r_squares, scores = [], [] for fold, (idx_train, idx_test) in tqdm( enumerate(zip(idxs_train, idxs_test))): if average: tr = dataset[idx_train].get_mapped( mean_group_sample(['chunks', 'id'], order='occurrence')) else: tr = dataset[idx_train] te = dataset[idx_test].get_mapped( mean_group_sample(['chunks', 'id'], order='occurrence')) # scaler = utils.build_model_dictionary(n_jobs=4)['RandomForest + Linear-SVM'] # scaler.steps.pop(-1) features_tr = np.array([ word2vec_features[word.lower()] for word in tr.sa.words ]) BOLD_tr = tr.samples.astype('float32') # label_tr = np.array([label_map[item] for item in tr.sa.targets]) features_te = np.array([
def test_gideon_weird_case(self): """Test if MappedClassifier could handle a mapper altering number of samples 'The utter collapse' -- communicated by Peter J. Kohler Desire to collapse all samples per each category in training and testing sets, thus resulting only in a single sample/category per training and per testing. It is a peculiar scenario which pin points the problem that so far mappers assumed not to change number of samples """ from mvpa2.mappers.fx import mean_group_sample from mvpa2.clfs.knn import kNN from mvpa2.mappers.base import ChainMapper ds = datasets['uni2large'].copy() #ds = ds[ds.sa.chunks < 9] accs = [] k = 1 # for kNN nf = 1 # for NFoldPartitioner for i in xrange(1): # # of random runs ds.samples = np.random.randn(*ds.shape) # # There are 3 ways to accomplish needed goal # # 0. Hard way: overcome the problem by manually # pre-splitting/meaning in a loop from mvpa2.clfs.transerror import ConfusionMatrix partitioner = NFoldPartitioner(nf) meaner = mean_group_sample(['targets', 'partitions']) cm = ConfusionMatrix() te = TransferMeasure(kNN(k), Splitter('partitions'), postproc=BinaryFxNode(mean_mismatch_error, 'targets'), enable_ca=['stats']) errors = [] for part in partitioner.generate(ds): ds_meaned = meaner(part) errors.append(np.asscalar(te(ds_meaned))) cm += te.ca.stats #print i, cm.stats['ACC'] accs.append(cm.stats['ACC']) if False: # not yet working -- see _tent/allow_ch_nsamples # branch for attempt to make it work # 1. This is a "native way" IF we allow change of number # of samples via _call to be done by MappedClassifier # while operating solely on the mapped dataset clf2 = MappedClassifier( clf=kNN(k), #clf, mapper=mean_group_sample(['targets', 'partitions'])) cv = CrossValidation(clf2, NFoldPartitioner(nf), postproc=None, enable_ca=['stats']) # meaning all should be ok since we should have ballanced # sets across all chunks here errors_native = cv(ds) self.assertEqual( np.max(np.abs(errors_native.samples[:, 0] - errors)), 0) # 2. Work without fixes to MappedClassifier allowing # change of # of samples # # CrossValidation will operate on a chain mapper which # would perform necessary meaning first before dealing with # kNN cons: .stats would not be exposed since ChainMapper # doesn't expose them from ChainMapper (yet) if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active: raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper") cv2 = CrossValidation(ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(k)], space='targets'), NFoldPartitioner(nf), postproc=None) errors_native2 = cv2(ds) self.assertEqual( np.max(np.abs(errors_native2.samples[:, 0] - errors)), 0) # All of the ways should provide the same results #print i, np.max(np.abs(errors_native.samples[:,0] - errors)), \ # np.max(np.abs(errors_native2.samples[:,0] - errors)) if False: # just to investigate the distribution if we have enough iterations import pylab as pl uaccs = np.unique(accs) step = np.asscalar(np.unique(np.round(uaccs[1:] - uaccs[:-1], 4))) bins = np.linspace(0., 1., np.round(1. / step + 1)) xx = pl.hist(accs, bins=bins, align='left') pl.xlim((0. - step / 2, 1. + step / 2))
pl.imshow(mtx, interpolation='nearest') pl.xticks(range(len(mtx)), labels, rotation=-45) pl.yticks(range(len(mtx)), labels) pl.title(title) pl.clim((0,1)) pl.colorbar() """ As a start, we want to inspect the dissimilarity structure of the stimulation conditions in the entire ROI. For this purpose, we average all samples of each conditions into a single examplar, using an FxMapper() instance. """ # compute a dataset with the mean samples for all conditions from mvpa2.mappers.fx import mean_group_sample mtgs = mean_group_sample(['targets']) mtds = mtgs(ds) """ After these preparations we can use the PDist() measure to compute the desired distance matrix -- by default using correlation distance as a metric. The ``square`` argument will cause a ful square matrix to be produced, instead of a leaner upper-triangular matrix in vector form. """ # basic ROI RSA -- dissimilarity matrix for the entire ROI from mvpa2.measures import rsa dsm = rsa.PDist(square=True) res = dsm(mtds) plot_mtx(res, mtds.sa.targets, 'ROI pattern correlation distances')
def timesegments_classification(dss, hyper=None, part1=HalfPartitioner(), part2=NFoldPartitioner(attr='subjects'), window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. hyper : Hyperalignment-like, optional Beast which if called on a list of datasets should spit out trained mappers. If not specified, `IdentityMapper`s will be used part1 : Partitioner, optional Partitioner to split data for hyperalignment "cross-validation" part2 : Partitioner, optional Partitioner for CV within the hyperalignment test split window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ # Generate outer-most partitioning () parts = [copy.deepcopy(part1).generate(ds) for ds in dss] iter = 1 errors = [] while True: try: dss_partitioned = [p.next() for p in parts] except StopIteration: # we are done -- no more partitions break if __debug__: debug("BM", "Iteration %d", iter) dss_train, dss_test = zip(*[ list(Splitter("partitions").generate(ds)) for ds in dss_partitioned ]) # TODO: allow for doing feature selection if do_zscore: for ds in dss_train + dss_test: zscore(ds, chunks_attr=None) if hyper is not None: # since otherwise it would remember previous loop dataset as the "commonspace" # Now let's do hyperalignment but on a copy in each loop iteration hyper_ = copy.deepcopy(hyper) mappers = hyper_(dss_train) else: mappers = [IdentityMapper() for ds in dss_train] dss_test_aligned = [ mapper.forward(ds) for mapper, ds in zip(mappers, dss_test) ] # assign .sa.subjects to those datasets for i, ds in enumerate(dss_test_aligned): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss_test_aligned: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints( len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [ np.asscalar(np.unique(x)) for x in ds.sa[sa].value ] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef( ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors.append(errors_across_subjects) iter += 1 errors = np.array(errors) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors.shape, np.mean(errors))) return errors
return dss[0].a.mapper.reverse(ds) import pylab as pl pl.clf() DS = dsvstack(dss) # Sample plots for s in [0, 1]: ds2 = get2d(dss[0]) for r in [0, 1]: pl.subplot(3, 3, 1 + r + s * 3) pl.imshow(ds2[ds2.sa.chunks == r].samples[0], interpolation='nearest') pl.ylabel('subj%d' % s) pl.xlabel('run1') pl.subplot(3, 3, 3 + s * 3) pl.imshow(get2d(mean_group_sample(['dissimilarity' ])(dss[0]).samples)[0], interpolation='nearest') pl.xlabel('mean') ds = dsvstack(dss) ds.a['mapper'] = dss[0].a.mapper ds_mean = mean_group_sample(['dissimilarity', 'chunks'])(ds) for r in [0, 1]: ds_mean_run0 = ds.a.mapper.reverse(ds_mean[ds_mean.chunks == r]) pl.subplot(3, 3, 1 + r + 2 * 3) pl.imshow(ds_mean_run0.samples[0], interpolation='nearest') pl.ylabel('mean(subj)') pl.xlabel('run%d' % r) ds_global_mean = mean_group_sample(['dissimilarity'])(ds) pl.subplot(3, 3, 3 + 2 * 3) pl.imshow(get2d(ds_global_mean).samples[0], interpolation='nearest')
def test_hrf_modeling(): skip_if_no_external('nibabel') skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation ds = load_example_fmri_dataset('25mm') #literal=True) # TODO: simulate short dataset with known properties and use it # for testing events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) tr = ds.a.imghdr['pixdim'][4] for ev in events: for a in ('onset', 'duration'): ev[a] = ev[a] * tr evds = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # same voxels assert_equal(ds.nfeatures, evds.nfeatures) assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices) # one sample for each condition, plus constant assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets)) assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant') # with centered data zscore(ds) evds_demean = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # after demeaning the constant should consume a lot less assert (evds.a.add_regs[0].samples.mean() > evds_demean.a.add_regs[0].samples.mean()) # from eyeballing the sensitivity example -- would be better to test this on # the tutorial data assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \ > evds_demean[evds.sa.targets == 'bottle'].samples.max()) # HRF models assert ('regressors' in evds.sa) assert ('regressors' in evds.a.add_regs.sa) assert_equal(evds.sa.regressors.shape[1], len(ds)) # custom regressors evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # verify that nothing screwed up time_coords assert_equal(ds.sa.time_coords[0], 0) assert_equal(len(evds_regrs), len(evds)) # one more output sample in .a.add_regs assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs)) # comes last before constant assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2]) # order of main regressors is unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # custom regressors from external sources evds_regrs = eventrelated_dataset( ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_coords'], design_kwargs=dict(drift_model='blank', add_regs=np.linspace(1, -1, len(ds))[None].T, add_reg_names=['negative_trend']), glmfit_kwargs=dict(model='ols'), model='hrf') assert_equal(len(evds_regrs), len(evds)) # But we got one more in additional regressors assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs)) # comes last before constant assert_array_equal(['negative_trend', 'time_coords', 'constant'], evds_regrs.a.add_regs.sa.regressor_names) # order is otherwise unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # HRF models with estimating per each chunk assert_equal(ds.sa.time_coords[0], 0) evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr=['targets', 'chunks'], regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') assert_true('add_regs' in evds_regrs.a) assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names) assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs)) assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs)) from mvpa2.mappers.fx import mean_group_sample evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs) assert_array_equal(evds_regrs_meaned.T, evds.T) # targets should be the same
def plot_mtx(mtx, labels, title): pl.figure() pl.imshow(mtx, interpolation='nearest') pl.xticks(range(len(mtx)), labels, rotation=-45) pl.yticks(range(len(mtx)), labels) pl.title(title) pl.clim((0, 2)) pl.colorbar() """ As a start, we want to inspect the dissimilarity structure of the stimulation conditions in the entire ROI. For this purpose, we average all samples of each conditions into a single exemplar, using an FxMapper() instance. """ # compute a dataset with the mean samples for all conditions from mvpa2.mappers.fx import mean_group_sample mtgs = mean_group_sample(['targets']) mtds = mtgs(ds) After these preparations we can use the PDist() measure to compute the desired distance matrix – by default using correlation distance as a metric. The square argument will cause a ful square matrix to be produced, instead of a leaner upper-triangular matrix in vector form. # basic ROI RSA -- dissimilarity matrix for the entire ROI from mvpa2.measures import rsa dsm = rsa.PDist(square=True) res = dsm(mtds) plot_mtx(res, mtds.sa.targets, 'ROI pattern correlation distances') Inspecting the figure we can see that there is not much structure in the matrix, except for the face and the house condition being slightly more dissimilar than others. """ Now, let’s take a look at the variation of similarity structure through the brain. We can plug the PDist() measure into a searchlight to quickly scan the brain and harvest this information. """ # same as above, but done in a searchlight fashion
# In[ ]: zscore(fds, param_est=('targets', ['rest'])) # ### Remueve los volúmenes asignados a línea base # In[ ]: fds = fds[fds.sa.targets != 'rest'] # ### Promedia los volúmenes # In[ ]: from mvpa2.mappers.fx import mean_group_sample mtgs = mean_group_sample(['targets']) mtds = mtgs(ds) # ### Mide las distancias entre categorías # In[ ]: dsm = rsa.PDist(square=True) res = dsm(mtds) # ### Muestra los resultados en una figura # In[ ]: plot_mtx(res, mtds.sa.targets, 'ROI pattern correlation distances')
def timesegments_classification(dss, window_size=6, overlapping_windows=True, distance='correlation', do_zscore=True): """Time-segment classification across subjects using Hyperalignment Parameters ---------- dss : list of datasets Datasets to benchmark on. Usually a single dataset per subject. window_size : int, optional How many temporal points to consider for a classification sample overlapping_windows : bool, optional Strategy to how create and classify "samples" for classification. If True -- `window_size` samples from each time point (but trailing ones) constitute a sample, and upon "predict" `window_size` of samples around each test point is not considered. If False -- samples are just taken (with training and testing splits) at `window_size` step from one to another. do_zscore : bool, optional Perform zscoring (overall, not per-chunk) for each dataset upon partitioning with part1 ... """ part2 = NFoldPartitioner(attr='subjects') # Check if input list contains Datasets, ndarrays dss = [Dataset(ds) if not type(ds) == Dataset else ds for ds in dss] # TODO: allow for doing feature selection if do_zscore: for ds in dss: zscore(ds, chunks_attr=None) # assign .sa.subjects to those datasets for i, ds in enumerate(dss): # part2.attr is by default "subjects" ds.sa[part2.attr] = [i] dss_test_bc = [] for ds in dss: if overlapping_windows: startpoints = range(len(ds) - window_size + 1) else: startpoints = _get_nonoverlapping_startpoints(len(ds), window_size) bm = BoxcarMapper(startpoints, window_size) bm.train(ds) ds_ = bm.forward(ds) ds_.sa['startpoints'] = startpoints # reassign subjects so they are not arrays def assign_unique(ds, sa): ds.sa[sa] = [np.asscalar(np.unique(x)) for x in ds.sa[sa].value] assign_unique(ds_, part2.attr) fm = FlattenMapper() fm.train(ds_) dss_test_bc.append(ds_.get_mapped(fm)) ds_test = vstack(dss_test_bc) # Perform classification across subjects comparing against mean # spatio-temporal pattern of other subjects errors_across_subjects = [] for ds_test_part in part2.generate(ds_test): ds_train_, ds_test_ = list( Splitter("partitions").generate(ds_test_part)) # average across subjects to get a representative pattern per timepoint ds_train_ = mean_group_sample(['startpoints'])(ds_train_) assert (ds_train_.shape == ds_test_.shape) if distance == 'correlation': # TODO: redo more efficiently since now we are creating full # corrcoef matrix. Also we might better just take a name for # the pdist measure but then implement them efficiently # (i.e. without hstacking both pieces together first) dist = 1 - np.corrcoef(ds_train_, ds_test_)[len(ds_test_):, :len(ds_test_)] else: raise NotImplementedError if overlapping_windows: dist = wipe_out_offdiag(dist, window_size) winners = np.argmin(dist, axis=1) error = np.mean(winners != np.arange(len(winners))) errors_across_subjects.append(error) errors_across_subjects = np.asarray(errors_across_subjects) if __debug__: debug( "BM", "Finished with %s array of errors. Mean error %.2f" % (errors_across_subjects.shape, np.mean(errors_across_subjects))) return errors_across_subjects
fn = prefix + '*' + suffix files = sorted(glob.glob(fn)) for x in range(len(files)): if x < 5: chunks = [x + 1] * 20 else: chunks = [x - 5 + 1] * 20 d = mv.gifti_dataset(files[x], chunks=chunks, targets=conditions) d.sa['conditions'] = conditions if ds is None: ds = d else: ds = mv.vstack((ds, d)) ds.fa['node_indices'] = range(ds.shape[1]) ds.samples = zscore(ds.samples, axis=1) mtgs = mean_group_sample(['conditions']) mtds = mtgs(ds) slres = sl(mtds) slres.samples = np.nan_to_num(slres.samples) all_slres.append(slres.samples) # all_slres has all (190, 40962) RDMs for each subject # now we need ISCs # (12, 190, 40962) # list of 40962 items (12, 190) all_slres = np.array(all_slres) all_slres = np.swapaxes(all_slres, 0, 2) results = [] for sl_data in all_slres: # now i have a 190 by 12 matrix
def main(subject, study_dir, mask, feature_mask, models, category, res_name, suffix='_stim_fix2', radius=3, n_perm=1000, n_proc=None): from mvpa2.mappers.zscore import zscore from mvpa2.mappers.fx import mean_group_sample from mvpa2.measures.searchlight import sphere_searchlight from mvpa2.datasets.mri import map2nifti from wikisim import mvpa # load subject data sp = su.SubjPath(subject, study_dir) vols = task.prex_vols(sp.path('behav', 'log')) # load fmri data ds = mvpa.load_prex_beta(sp, suffix, mask, feature_mask=feature_mask, verbose=1) # zscore ds.sa['run'] = vols.run.values zscore(ds, chunks_attr='run') # average over item presentations ds.sa['itemno'] = vols.itemno.to_numpy() m = mean_group_sample(['itemno']) dsm = ds.get_mapped(m) # get items of interest if category == 'face': cond = [1, 2] elif category == 'scene': cond = [3, 4] else: ValueError(f'Invalid category code: {category}') include = vols.groupby('itemno').first()['cond'].isin(cond) # get models of interest model_dir = os.path.join(study_dir, 'batch', 'models3') model_names = models.split('-') model_rdms_dict = model.load_category_rdms(model_dir, category, model_names) model_rdms = [model_rdms_dict[name] for name in model_names] # set up searchlight m = mvpa.ItemPartialRSA(model_rdms, n_perm) sl = sphere_searchlight(m, radius=radius, nproc=n_proc) sl_map = sl(dsm[include]) nifti_include = map2nifti(ds, sl_map[-1]) for i, name in enumerate(model_names): # save zstat map res_dir = sp.path('rsa', f'{res_name}_{name}') if not os.path.exists(res_dir): os.makedirs(res_dir) filepath = os.path.join(res_dir, 'zstat.nii.gz') nifti = map2nifti(ds, sl_map[i]) nifti.to_filename(filepath) # save mask of included voxels include_file = os.path.join(res_dir, 'included.nii.gz') nifti_include.to_filename(include_file)
def get_dsm_roi_secondorder_xval2(ds, rois, zscore_ds=True, part=OddEvenPartitioner(), cond_chunk='condition'): """ Obtain second-order dissimilarities between ROIs. This version cross-validates at the second level, thus the resulting dsms are not symmetrical. Arguments -------- ds: dataset rois: dict each item in the dictionary must be a tuple where the 0th element is the center of the roi, and the 1st element is a list of ids zscore_ds: bool is the dset already zscored? part: partitioner cond_chunk: str across which sample attribute to perform mean group sample Returns ------- dataset containing second level dsm """ #ds = h5load(fns.betafn(subnr)) #ds = ds[:, mask_] #ds = ds[ds.sa.condition != 'self'] if zscore_ds: zscore(ds, chunks_attr='chunks') # set up oddeven partition #part = OddEvenPartitioner() rdms = [] mgs = mean_group_sample([cond_chunk]) dissims_folds = [] for ds_ in part.generate(ds): ds_1 = ds_[ds_.sa.partitions == 1] ds_2 = ds_[ds_.sa.partitions == 2] ds_1 = mgs(ds_1) ds_2 = mgs(ds_2) assert (ds_1.samples.shape == ds_2.samples.shape) # first generate first-order rdms for each fold names = [] centers = [] dissims_1 = [] dissims_2 = [] for roi, (center, ids) in rois.iteritems(): names.append(roi) centers.append(center) sample1_roi = ds_1.samples[:, ids] sample2_roi = ds_2.samples[:, ids] dissim1_roi = pdist(sample1_roi, 'correlation') dissim2_roi = pdist(sample2_roi, 'correlation') dissims_1.append(dissim1_roi) dissims_2.append(dissim2_roi) dss1 = np.array(dissims_1) dss2 = np.array(dissims_2) # now compute second-order rdm correlating across folds dissim_2ndorder = 1. - corrcoefxy(dss1.T, dss2.T) dissim_2ndorder = dataset_wizard(dissim_2ndorder, targets=names) dissim_2ndorder.sa['centers'] = centers # also add fa information about roi dissim_2ndorder.fa['roi'] = names dissims_folds.append(dissim_2ndorder) # average dissims = dissims_folds[0] for d in dissims_folds[1:]: dissims.samples += d.samples dissims.samples /= len(dissims_folds) return dissims
def test_hrf_modeling(): skip_if_no_external('nibabel') skip_if_no_external('nipy') # ATM relies on NiPy's GLM implementation ds = load_example_fmri_dataset('25mm') #literal=True) # TODO: simulate short dataset with known properties and use it # for testing events = find_events(targets=ds.sa.targets, chunks=ds.sa.chunks) tr = ds.a.imghdr['pixdim'][4] for ev in events: for a in ('onset', 'duration'): ev[a] = ev[a] * tr evds = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # same voxels assert_equal(ds.nfeatures, evds.nfeatures) assert_array_equal(ds.fa.voxel_indices, evds.fa.voxel_indices) # one sample for each condition, plus constant assert_equal(sorted(ds.sa['targets'].unique), sorted(evds.sa.targets)) assert_equal(evds.a.add_regs.sa.regressor_names[0], 'constant') # with centered data zscore(ds) evds_demean = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # after demeaning the constant should consume a lot less assert(evds.a.add_regs[0].samples.mean() > evds_demean.a.add_regs[0].samples.mean()) # from eyeballing the sensitivity example -- would be better to test this on # the tutorial data assert(evds_demean[evds.sa.targets == 'shoe'].samples.max() \ > evds_demean[evds.sa.targets == 'bottle'].samples.max()) # HRF models assert('regressors' in evds.sa) assert('regressors' in evds.a.add_regs.sa) assert_equal(evds.sa.regressors.shape[1], len(ds)) # custom regressors evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') # verify that nothing screwed up time_coords assert_equal(ds.sa.time_coords[0], 0) assert_equal(len(evds_regrs), len(evds)) # one more output sample in .a.add_regs assert_equal(len(evds_regrs.a.add_regs) - 1, len(evds.a.add_regs)) # comes last before constant assert_equal('time_indices', evds_regrs.a.add_regs.sa.regressor_names[-2]) # order of main regressors is unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # custom regressors from external sources evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr='targets', regr_attrs=['time_coords'], design_kwargs=dict(drift_model='blank', add_regs=np.linspace(1, -1, len(ds))[None].T, add_reg_names=['negative_trend']), glmfit_kwargs=dict(model='ols'), model='hrf') assert_equal(len(evds_regrs), len(evds)) # But we got one more in additional regressors assert_equal(len(evds_regrs.a.add_regs) - 2, len(evds.a.add_regs)) # comes last before constant assert_array_equal(['negative_trend', 'time_coords', 'constant'], evds_regrs.a.add_regs.sa.regressor_names) # order is otherwise unchanged assert_array_equal(evds.sa.targets, evds_regrs.sa.targets) # HRF models with estimating per each chunk assert_equal(ds.sa.time_coords[0], 0) evds_regrs = eventrelated_dataset(ds, events, time_attr='time_coords', condition_attr=['targets', 'chunks'], regr_attrs=['time_indices'], design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), model='hrf') assert_true('add_regs' in evds_regrs.a) assert_true('time_indices' in evds_regrs.a.add_regs.sa.regressor_names) assert_equal(len(ds.UC) * len(ds.UT), len(evds_regrs)) assert_equal(len(evds_regrs.UC) * len(evds_regrs.UT), len(evds_regrs)) from mvpa2.mappers.fx import mean_group_sample evds_regrs_meaned = mean_group_sample(['targets'])(evds_regrs) assert_array_equal(evds_regrs_meaned.T, evds.T) # targets should be the same
def get_dsm_roi_xval1(ds, rois, zscore_ds=True, part=OddEvenPartitioner(), cond_chunk='condition'): """ Obtain second-order dissimilarities between ROIs. This version cross-validates at the first level, thus the resulting dsms are symmetrical. Arguments -------- ds: dataset rois: dict each item in the dictionary must be a tuple where the 0th element is the center of the roi, and the 1st element is a list of ids zscore_ds: bool is the dset already zscored? part: partitioner cond_chunk: str across which sample attribute to perform mean group sample Returns ------- dataset containing second level dsm """ #ds = h5load(fns.betafn(subnr)) #ds = ds[:, mask_] #ds = ds[ds.sa.condition != 'self'] if zscore_ds: zscore(ds, chunks_attr='chunks') # set up oddeven partition #part = OddEvenPartitioner() rdms = [] mgs = mean_group_sample([cond_chunk]) dissims_folds = [] for ds_ in part.generate(ds): ds_1 = ds_[ds_.sa.partitions == 1] ds_2 = ds_[ds_.sa.partitions == 2] ds_1 = mgs(ds_1) ds_2 = mgs(ds_2) assert (ds_1.samples.shape == ds_2.samples.shape) # first generate first-order rdms cross-validated across folds names = [] centers = [] dissims = [] for roi, (center, ids) in rois.iteritems(): names.append(roi) centers.append(center) sample1_roi = ds_1.samples[:, ids] sample2_roi = ds_2.samples[:, ids] dissim_roi = 1. - corrcoefxy(sample1_roi.T, sample2_roi.T) nsamples = ds_1.nsamples assert (dissim_roi.shape == (nsamples, nsamples)) dissims.append( dissim_roi.flatten()) # now the RDM is not symmetrical anymore dissims_folds.append(np.array(dissims)) # average across folds dissims_folds = np.array(dissims_folds).mean(axis=0) assert (dissims_folds.shape == (len(names), nsamples**2)) # now compute second level (distances) distance_roi = dist.pdist(dissims_folds, metric='correlation') dissims_folds = dataset_wizard(dist.squareform(distance_roi), targets=names) dissims_folds.fa['roi'] = names dissims_folds.sa['centers'] = centers return dissims_folds
class SearchlightTests(unittest.TestCase): def setUp(self): self.dataset = datasets['3dlarge'] # give the feature coord a more common name, matching the default of # the searchlight self.dataset.fa['voxel_indices'] = self.dataset.fa.myspace self._tested_pprocess = False # https://github.com/PyMVPA/PyMVPA/issues/67 # https://github.com/PyMVPA/PyMVPA/issues/69 def test_gnbsearchlight_doc(self): # Test either we excluded nproc from the docstrings ok_(not 'nproc' in GNBSearchlight.__init__.__doc__) ok_(not 'nproc' in GNBSearchlight.__doc__) ok_(not 'nproc' in sphere_gnbsearchlight.__doc__) # but present elsewhere ok_('nproc' in sphere_searchlight.__doc__) ok_('nproc' in Searchlight.__init__.__doc__) # https://github.com/PyMVPA/PyMVPA/issues/106 def test_searchlights_doc_qe(self): # queryengine should not be provided to sphere_* helpers for sl in (sphere_searchlight, sphere_gnbsearchlight, sphere_m1nnsearchlight): for kw in ('queryengine', 'qe'): ok_(not kw in sl.__doc__, msg='There should be no %r in %s.__doc__' % (kw, sl)) # queryengine should be provided in corresponding classes __doc__s for sl in (Searchlight, GNBSearchlight, M1NNSearchlight): for kw in ('queryengine', ): ok_(kw in sl.__init__.__doc__, msg='There should be %r in %s.__init__.__doc__' % (kw, sl)) for kw in ('qe', ): ok_(not kw in sl.__init__.__doc__, msg='There should be no %r in %s.__init__.__doc__' % (kw, sl)) #def _test_searchlights(self, ds, sls, roi_ids, result_all): # pragma: no cover @sweepargs( lrn_sllrn_SL_partitioner=[ ( GNB(common_variance=v, descr='GNB'), None, sphere_gnbsearchlight, NFoldPartitioner(cvtype=1), 0. # correction for the error range ) for v in (True, False) ] + # Mean 1 NN searchlights [ (ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(1)], space='targets', descr='M1NN'), kNN(1), sphere_m1nnsearchlight, NFoldPartitioner(0.5, selection_strategy='random', count=20), 0.05), # the same but with NFold(1) partitioner since it still should work (ChainMapper( [mean_group_sample(['targets', 'partitions']), kNN(1)], space='targets', descr='NF-M1NN'), kNN(1), sphere_m1nnsearchlight, NFoldPartitioner(1), 0.05), ]) @sweepargs(do_roi=(False, True)) @sweepargs(results_backend=('native', 'hdf5')) @reseed_rng() def test_spatial_searchlight(self, lrn_sllrn_SL_partitioner, do_roi=False, results_backend='native'): """Tests both generic and ad-hoc searchlights (e.g. GNBSearchlight) Test of and adhoc searchlight anyways requires a ground-truth comparison to the generic version, so we are doing sweepargs here """ lrn, sllrn, SL, partitioner, correction = lrn_sllrn_SL_partitioner ## if results_backend == 'hdf5' and not common_variance: ## # no need for full combination of all possible arguments here ## return if __debug__ and 'ENFORCE_CA_ENABLED' in debug.active \ and isinstance(lrn, ChainMapper): raise SkipTest("Known to fail while trying to enable " "training_stats for the ChainMapper (M1NN here)") # e.g. for M1NN we need plain kNN(1) for m1nnsl, but to imitate m1nn # "learner" we must use a chainmapper atm if sllrn is None: sllrn = lrn ds = datasets['3dsmall'].copy() # Let's test multiclass here, so boost # of labels ds[6:18].T += 2 ds.fa['voxel_indices'] = ds.fa.myspace # To assure that users do not run into incorrect operation due to overflows ds.samples += 5000 ds.samples *= 1000 ds.samples = ds.samples.astype(np.int16) # compute N-1 cross-validation for each sphere # YOH: unfortunately sample_clf_lin is not guaranteed # to provide exactly the same results due to inherent # iterative process. Therefore lets use something quick # and pure Python cv = CrossValidation(lrn, partitioner) skwargs = dict( radius=1, enable_ca=['roi_sizes', 'raw_results', 'roi_feature_ids']) if do_roi: # select some random set of features nroi = rnd.randint(1, ds.nfeatures) # and lets compute the full one as well once again so we have a reference # which will be excluded itself from comparisons but values will be compared # for selected roi_id sl_all = SL(sllrn, partitioner, **skwargs) result_all = sl_all(ds) # select random features roi_ids = rnd.permutation(range(ds.nfeatures))[:nroi] skwargs['center_ids'] = roi_ids else: nroi = ds.nfeatures roi_ids = np.arange(nroi) result_all = None if results_backend == 'hdf5': skip_if_no_external('h5py') sls = [ sphere_searchlight(cv, results_backend=results_backend, **skwargs), #GNBSearchlight(gnb, NFoldPartitioner(cvtype=1)) SL(sllrn, partitioner, indexsum='fancy', **skwargs) ] if externals.exists('scipy'): sls += [SL(sllrn, partitioner, indexsum='sparse', **skwargs)] # Test nproc just once if externals.exists('pprocess') and not self._tested_pprocess: sls += [sphere_searchlight(cv, nproc=2, **skwargs)] self._tested_pprocess = True # Provide the dataset and all those searchlights for testing #self._test_searchlights(ds, sls, roi_ids, result_all) #nroi = len(roi_ids) #do_roi = nroi != ds.nfeatures all_results = [] for sl in sls: # run searchlight mvpa2.seed() # reseed rng again for m1nnsl results = sl(ds) all_results.append(results) #print `sl` # check for correct number of spheres self.assertTrue(results.nfeatures == nroi) # and measures (one per xfold) if partitioner.cvtype == 1: self.assertTrue(len(results) == len(ds.UC)) elif partitioner.cvtype == 0.5: # here we had 4 unique chunks, so 6 combinations # even though 20 max was specified for NFold self.assertTrue(len(results) == 6) else: raise RuntimeError("Unknown yet type of partitioner to check") # check for chance-level performance across all spheres # makes sense only if number of features was big enough # to get some stable estimate of mean if not do_roi or nroi > 20: # correction here is for M1NN class which has wider distribution self.assertTrue(0.67 - correction < results.samples.mean() < 0.85 + correction, msg="Out of range mean result: " "lrn: %s sllrn: %s NROI: %d MEAN: %.3f" % ( lrn, sllrn, nroi, results.samples.mean(), )) mean_errors = results.samples.mean(axis=0) # that we do get different errors ;) self.assertTrue(len(np.unique(mean_errors) > 3)) # check resonable sphere sizes self.assertTrue(len(sl.ca.roi_sizes) == nroi) self.assertTrue(len(sl.ca.roi_feature_ids) == nroi) for i, fids in enumerate(sl.ca.roi_feature_ids): self.assertTrue(len(fids) == sl.ca.roi_sizes[i]) if do_roi: # for roi we should relax conditions a bit self.assertTrue(max(sl.ca.roi_sizes) <= 7) self.assertTrue(min(sl.ca.roi_sizes) >= 4) else: self.assertTrue(max(sl.ca.roi_sizes) == 7) self.assertTrue(min(sl.ca.roi_sizes) == 4) # check base-class state self.assertEqual(sl.ca.raw_results.nfeatures, nroi) # Test if we got results correctly for 'selected' roi ids if do_roi: assert_array_equal(result_all[:, roi_ids], results) if len(all_results) > 1: # if we had multiple searchlights, we can check either they all # gave the same result (they should have) aresults = np.array([a.samples for a in all_results]) dresults = np.abs(aresults - aresults.mean(axis=0)) dmax = np.max(dresults) self.assertTrue(dmax <= 1e-13) # Test the searchlight's reuse of neighbors for indexsum in ['fancy'] + (externals.exists('scipy') and ['sparse'] or []): sl = SL(sllrn, partitioner, indexsum='fancy', reuse_neighbors=True, **skwargs) mvpa2.seed() result1 = sl(ds) mvpa2.seed() result2 = sl(ds) # must be faster assert_array_equal(result1, result2) def test_adhocsearchlight_perm_testing(self): # just a smoke test pretty much ds = datasets['3dmedium'].copy() #ds.samples += np.random.normal(size=ds.samples.shape)*10 mvpa2.seed() ds.fa['voxel_indices'] = ds.fa.myspace from mvpa2.mappers.fx import mean_sample from mvpa2.clfs.stats import MCNullDist permutator = AttributePermutator('targets', count=8, limit='chunks') distr_est = MCNullDist(permutator, tail='left', enable_ca=['dist_samples']) slargs = (kNN(1), NFoldPartitioner(0.5, selection_strategy='random', count=9)) slkwargs = dict(radius=1, postproc=mean_sample()) sl_nodistr = sphere_m1nnsearchlight(*slargs, **slkwargs) skip_if_no_external('scipy') # needed for null_t sl = sphere_m1nnsearchlight(*slargs, null_dist=distr_est, enable_ca=['null_t'], reuse_neighbors=True, **slkwargs) mvpa2.seed() res_nodistr = sl_nodistr(ds) mvpa2.seed() res = sl(ds) # verify that we at least got the same main result # ah (yoh) -- null dist is estimated before the main # estimate so we can't guarantee correspondence :-/ # assert_array_equal(res_nodistr, res) # only resemblance (TODO, may be we want to get/setstate # for rng before null_dist.fit?) # and dimensions correspond assert_array_equal(distr_est.ca.dist_samples.shape, (1, ds.nfeatures, 8)) assert_array_equal(sl.ca.null_t.samples.shape, (1, ds.nfeatures)) def test_partial_searchlight_with_full_report(self): ds = self.dataset.copy() center_ids = np.zeros(ds.nfeatures, dtype='bool') center_ids[[3, 50]] = True ds.fa['center_ids'] = center_ids # compute N-1 cross-validation for each sphere cv = CrossValidation(GNB(), NFoldPartitioner()) # contruct diameter 1 (or just radius 0) searchlight # one time give center ids as a list, the other one takes it from the # dataset itself sls = ( sphere_searchlight(cv, radius=0, center_ids=[3, 50]), sphere_searchlight(None, radius=0, center_ids=[3, 50]), sphere_searchlight(cv, radius=0, center_ids='center_ids'), ) for sl in sls: # assure that we could set cv post constructor if sl.datameasure is None: sl.datameasure = cv # run searchlight results = sl(ds) # only two spheres but error for all CV-folds self.assertEqual(results.shape, (len(self.dataset.UC), 2)) # Test if results hold if we "set" a "new" datameasure sl.datameasure = CrossValidation(GNB(), NFoldPartitioner()) results2 = sl(ds) assert_array_almost_equal(results, results2) # test if we graciously puke if center_ids are out of bounds dataset0 = ds[:, :50] # so we have no 50th feature self.assertRaises(IndexError, sls[0], dataset0) # but it should be fine on the one that gets the ids from the dataset # itself results = sl(dataset0) assert_equal(results.nfeatures, 1) # check whether roi_seeds are correct sl = sphere_searchlight(lambda x: np.vstack( (x.fa.roi_seed, x.samples)), radius=1, add_center_fa=True, center_ids=[12]) res = sl(ds) assert_array_equal( res.samples[1:, res.samples[0].astype('bool')].squeeze(), ds.samples[:, 12]) def test_partial_searchlight_with_confusion_matrix(self): ds = self.dataset from mvpa2.clfs.stats import MCNullDist from mvpa2.mappers.fx import mean_sample, sum_sample # compute N-1 cross-validation for each sphere cm = ConfusionMatrix(labels=ds.UT) cv = CrossValidation( sample_clf_lin, NFoldPartitioner(), # we have to assure that matrix does not get flatted by # first vstack in cv and then hstack in searchlight -- # thus 2 leading dimensions # TODO: RF? make searchlight/crossval smarter? errorfx=lambda *a: cm(*a)[None, None, :]) # contruct diameter 2 (or just radius 1) searchlight sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50]) # our regular searchlight -- to compare results cv_gross = CrossValidation(sample_clf_lin, NFoldPartitioner()) sl_gross = sphere_searchlight(cv_gross, radius=1, center_ids=[3, 5, 50]) # run searchlights res = sl(ds) res_gross = sl_gross(ds) # only two spheres but error for all CV-folds and complete confusion matrix assert_equal(res.shape, (len(ds.UC), 3, len(ds.UT), len(ds.UT))) assert_equal(res_gross.shape, (len(ds.UC), 3)) # briefly inspect the confusion matrices mat = res.samples # since input dataset is probably balanced (otherwise adjust # to be per label): sum within columns (thus axis=-2) should # be identical to per-class/chunk number of samples samples_per_classchunk = len(ds) / (len(ds.UT) * len(ds.UC)) ok_(np.all(np.sum(mat, axis=-2) == samples_per_classchunk)) # and if we compute accuracies manually -- they should # correspond to the one from sl_gross assert_array_almost_equal( res_gross.samples, # from accuracies to errors 1 - (mat[..., 0, 0] + mat[..., 1, 1]).astype(float) / (2 * samples_per_classchunk)) # and now for those who remained sited -- lets perform H0 MC # testing of this searchlight... just a silly one with minimal # number of permutations no_permutations = 10 permutator = AttributePermutator('targets', count=no_permutations) # once again -- need explicit leading dimension to avoid # vstacking during cross-validation cv.postproc = lambda x: sum_sample()(x)[None, :] sl = sphere_searchlight(cv, radius=1, center_ids=[3, 5, 50], null_dist=MCNullDist( permutator, tail='right', enable_ca=['dist_samples'])) res_perm = sl(ds) # XXX all of the res_perm, sl.ca.null_prob and # sl.null_dist.ca.dist_samples carry a degenerate leading # dimension which was probably due to introduced new axis # above within cv.postproc assert_equal(res_perm.shape, (1, 3, 2, 2)) assert_equal(sl.null_dist.ca.dist_samples.shape, res_perm.shape + (no_permutations, )) assert_equal(sl.ca.null_prob.shape, res_perm.shape) # just to make sure ;) ok_(np.all(sl.ca.null_prob.samples >= 0)) ok_(np.all(sl.ca.null_prob.samples <= 1)) # we should have got sums of hits across the splits assert_array_equal(np.sum(mat, axis=0), res_perm.samples[0]) def test_chi_square_searchlight(self): # only do partial to save time # Can't yet do this since test_searchlight isn't yet "under nose" #skip_if_no_external('scipy') if not externals.exists('scipy'): return from mvpa2.misc.stats import chisquare cv = CrossValidation(sample_clf_lin, NFoldPartitioner(), enable_ca=['stats']) def getconfusion(data): cv(data) return chisquare(cv.ca.stats.matrix)[0] sl = sphere_searchlight(getconfusion, radius=0, center_ids=[3, 50]) # run searchlight results = sl(self.dataset) self.assertTrue(results.nfeatures == 2) def test_1d_multispace_searchlight(self): ds = Dataset([np.arange(6)]) ds.fa['coord1'] = np.repeat(np.arange(3), 2) # add a second space to the dataset ds.fa['coord2'] = np.tile(np.arange(2), 3) measure = lambda x: "+".join([str(x) for x in x.samples[0]]) # simply select each feature once res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)), nproc=1)(ds) assert_array_equal(res.samples, [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']]) #@sweepargs(regr=regrswh[:]) @reseed_rng() def test_regression_with_additional_sa(self): regr = regrswh[:][0] ds = datasets['3dsmall'].copy() ds.fa['voxel_indices'] = ds.fa.myspace # Create a new sample attribute which will be used along with # every searchlight ds.sa['beh'] = np.random.normal(size=(ds.nsamples, 2)) # and now for fun -- lets create custom linar regression # targets out of some random feature and beh linearly combined rfeature = np.random.randint(ds.nfeatures) ds.sa.targets = np.dot( np.hstack((ds.sa.beh, ds.samples[:, rfeature:rfeature + 1])), np.array([0.3, 0.2, 0.3])) class CrossValidationWithBeh(CrossValidation): """An adapter for regular CV which would hstack sa.beh to the searchlighting ds""" def _call(self, ds): return CrossValidation._call( self, Dataset(np.hstack((ds, ds.sa.beh)), sa=ds.sa)) cvbeh = CrossValidationWithBeh(regr, OddEvenPartitioner(), errorfx=corr_error) # regular cv cv = CrossValidation(regr, OddEvenPartitioner(), errorfx=corr_error) slbeh = sphere_searchlight(cvbeh, radius=1) slmapbeh = slbeh(ds) sl = sphere_searchlight(cv, radius=1) slmap = sl(ds) assert_equal(slmap.shape, (2, ds.nfeatures)) # SL which had access to beh should have got for sure better # results especially in the vicinity of the chosen feature... features = sl.queryengine.query_byid(rfeature) assert_array_lequal(slmapbeh.samples[:, features], slmap.samples[:, features]) # elsewhere they should tend to be better but not guaranteed @labile(5, 1) def test_usecase_concordancesl(self): import numpy as np from mvpa2.base.dataset import vstack from mvpa2.mappers.fx import mean_sample # Take our sample 3d dataset ds1 = datasets['3dsmall'].copy(deep=True) ds1.fa['voxel_indices'] = ds1.fa.myspace ds1.sa['subject'] = [1 ] # not really necessary -- but let's for clarity ds1 = mean_sample()( ds1) # so we get just a single representative sample def corr12(ds): corr = np.corrcoef(ds.samples) assert (corr.shape == (2, 2)) # for paranoid ones return corr[0, 1] for nsc, thr, thr_mean in ((0, 1.0, 1.0), (0.1, 0.3, 0.8)): # just a bit of noise ds2 = ds1.copy(deep=True) # make a copy for the 2nd subject ds2.sa['subject'] = [2] ds2.samples += nsc * np.random.normal(size=ds1.shape) # make sure that both have the same voxel indices assert (np.all(ds1.fa.voxel_indices == ds2.fa.voxel_indices)) ds_both = vstack((ds1, ds2)) # join 2 images into a single dataset # with .sa.subject distinguishing both sl = sphere_searchlight(corr12, radius=2) slmap = sl(ds_both) ok_(np.all(slmap.samples >= thr)) ok_(np.mean(slmap.samples) >= thr) def test_swaroop_case(self): """Test hdf5 backend to pass results on Swaroop's usecase """ skip_if_no_external('h5py') from mvpa2.measures.base import Measure class sw_measure(Measure): def __init__(self): Measure.__init__(self, auto_train=True) def _call(self, dataset): # For performance measures -- increase to 50-200 # np.sum here is just to get some meaningful value in # them #return np.ones(shape=(2, 2))*np.sum(dataset) return Dataset( np.array([{ 'd': np.ones(shape=(5, 5)) * np.sum(dataset) }], dtype=object)) results = [] ds = datasets['3dsmall'].copy(deep=True) ds.fa['voxel_indices'] = ds.fa.myspace our_custom_prefix = tempfile.mktemp() for backend in ['native'] + \ (externals.exists('h5py') and ['hdf5'] or []): sl = sphere_searchlight(sw_measure(), radius=1, tmp_prefix=our_custom_prefix, results_backend=backend) t0 = time.time() results.append(np.asanyarray(sl(ds))) # print "Done for backend %s in %d sec" % (backend, time.time() - t0) # because of swaroop's ad-hoc (who only could recommend such # a construct?) use case, and absent fancy working assert_objectarray_equal # let's compare manually #assert_objectarray_equal(*results) if not externals.exists('h5py'): self.assertRaises(RuntimeError, sphere_searchlight, sw_measure(), results_backend='hdf5') raise SkipTest('h5py required for test of backend="hdf5"') assert_equal(results[0].shape, results[1].shape) results = [r.flatten() for r in results] for x, y in zip(*results): assert_equal(x.keys(), y.keys()) assert_array_equal(x['d'], y['d']) # verify that no junk is left behind tempfiles = glob.glob(our_custom_prefix + '*') assert_equal(len(tempfiles), 0) def test_nblocks(self): skip_if_no_external('pprocess') # just a basic test to see that we are getting the same # results with different nblocks ds = datasets['3dsmall'].copy(deep=True)[:, :13] ds.fa['voxel_indices'] = ds.fa.myspace cv = CrossValidation(GNB(), OddEvenPartitioner()) res1 = sphere_searchlight(cv, radius=1, nproc=2)(ds) res2 = sphere_searchlight(cv, radius=1, nproc=2, nblocks=5)(ds) assert_array_equal(res1, res2) def test_custom_results_fx_logic(self): # results_fx was introduced for the blow-up-the-memory-Swaroop # where keeping all intermediate results of the dark-magic SL # hyperalignment is not feasible. So it is desired to split # searchlight computation in more blocks while composing the # target result "on-the-fly" from available so far results. # # Implementation relies on using generators feeding the # results_fx with fresh results whenever those become # available. # # This test/example's "measure" creates files which should be # handled by the results_fx function and removed in this case # to check if we indeed have desired high number of blocks while # only limited nproc. skip_if_no_external('pprocess') tfile = tempfile.mktemp('mvpa', 'test-sl') ds = datasets['3dsmall'].copy()[:, :25] # smaller copy ds.fa['voxel_indices'] = ds.fa.myspace ds.fa['feature_id'] = np.arange(ds.nfeatures) nproc = 3 # it is not about computing -- so we will can # start more processes than possibly having CPUs just to test nblocks = nproc * 7 # figure out max number of features to be given to any proc_block # yoh: not sure why I had to +1 here... but now it became more robust and # still seems to be doing what was demanded so be it max_block = int(ceil(ds.nfeatures / float(nblocks)) + 1) def print_(s, *args): """For local debugging""" #print s, args pass def results_fx(sl=None, dataset=None, roi_ids=None, results=None): """It will "process" the results by removing those files generated inside the measure """ res = [] print_("READY") for x in results: ok_(isinstance(x, list)) res.append(x) print_("R: ", x) for r in x: # Can happen if we requested those .ca's enabled # -- then automagically _proc_block would wrap # results in a dataset... Originally detected by # running with MVPA_DEBUG=.* which triggered # enabling all ca's if is_datasetlike(r): r = np.asscalar(r.samples) os.unlink(r) # remove generated file print_("WAITING") results_ds = hstack(sum(res, [])) # store the center ids as a feature attribute since we use # them for testing results_ds.fa['center_ids'] = roi_ids return results_ds def results_postproc_fx(results): for ds in results: ds.fa['test_postproc'] = np.atleast_1d(ds.a.roi_center_ids**2) return results def measure(ds): """The "measure" will check if a run with the same "index" from previous block has been processed by now """ f = '%s+%03d' % (tfile, ds.fa.feature_id[0] % (max_block * nproc)) print_("FID:%d f:%s" % (ds.fa.feature_id[0], f)) # allow for up to few seconds to wait for the file to # disappear -- i.e. its result from previous "block" was # processed t0 = time.time() while os.path.exists(f) and time.time() - t0 < 4.: time.sleep(0.5) # so it does take time to compute the measure pass if os.path.exists(f): print_("ERROR: ", f) raise AssertionError( "File %s must have been processed by now" % f) open(f, 'w').write( 'XXX') # signal that we have computing this measure print_("RES: %s" % f) return f sl = sphere_searchlight(measure, radius=0, nproc=nproc, nblocks=nblocks, results_postproc_fx=results_postproc_fx, results_fx=results_fx, center_ids=np.arange(ds.nfeatures)) assert_equal(len(glob.glob(tfile + '*')), 0) # so no junk around try: res = sl(ds) assert_equal(res.nfeatures, ds.nfeatures) # verify that we did have results_postproc_fx called assert_array_equal(res.fa.test_postproc, np.power(res.fa.center_ids, 2)) finally: # remove those generated left-over files for f in glob.glob(tfile + '*'): os.unlink(f)
def get_dsm_roi_xval1_firstlev(ds, rois, zscore_ds=True, part=OddEvenPartitioner(), cond_chunk='condition', fisher=False): """ Obtain second-order dissimilarities between ROIs. This version cross-validates at the first level and returns only the first level, without distances between ROIs Arguments -------- ds: dataset rois: dict each item in the dictionary must be a tuple where the 0th element is the center of the roi, and the 1st element is a list of ids zscore_ds: bool is the dset already zscored? part: partitioner cond_chunk: str across which sample attribute to perform mean group sample fisher: bool whether to fisher-transform the correlations before averaging across folds Returns ------- dataset containing first level dsm of shape (nrois, ncond**2) """ #ds = h5load(fns.betafn(subnr)) #ds = ds[:, mask_] #ds = ds[ds.sa.condition != 'self'] # set up oddeven partition #part = OddEvenPartitioner() mgs = mean_group_sample([cond_chunk]) dissims_folds = [] folds = 1 for ds_ in part.generate(ds): print("Running fold {0}".format(folds)) ds_1 = ds_[ds_.sa.partitions == 1] ds_2 = ds_[ds_.sa.partitions == 2] ds_1 = mgs(ds_1) ds_2 = mgs(ds_2) if ds_1.nsamples >= 4 and zscore_ds: zscore(ds_1, chunks_attr='chunks') zscore(ds_2, chunks_attr='chunks') assert (ds_1.samples.shape == ds_2.samples.shape) # first generate first-order rdms cross-validated across folds names = [] centers = [] dissims = [] for roi, (center, ids) in rois.iteritems(): names.append(roi) centers.append(center) sample1_roi = ds_1.samples[:, ids] sample2_roi = ds_2.samples[:, ids] dissim_roi = corrcoefxy(sample1_roi.T, sample2_roi.T, fisher=fisher) nsamples = ds_1.nsamples assert (dissim_roi.shape == (nsamples, nsamples)) dissims.append( dissim_roi.flatten()) # now the RDM is not symmetrical anymore dissims_folds.append(np.array(dissims)) folds += 1 # average across folds dissims_folds = np.array(dissims_folds).mean(axis=0) assert (dissims_folds.shape == (len(names), nsamples**2)) if fisher: dissims_folds = np.tanh(dissims_folds) dissims_folds = dataset_wizard(dissims_folds, targets=names) dissims_folds.sa['centers'] = centers return dissims_folds
def group_sample_loser_measure(attrs=('targets', )): '''takes loser after meaning over attrs''' return ChainNode((mean_group_sample(attrs), sample_loser_measure()))
roi_neighborhood=Sphere(6), nruns=3, nsubjects=2, noise_subject_n=1, noise_subject_std=5, noise_subject_smooth=5, noise_independent_std=4, noise_independent_smooth=1.5, noise_common_n=1, noise_common_std=3) # just a little helper def get2d(ds): return dss[0].a.mapper.reverse(ds) import pylab as pl pl.clf() DS = dsvstack(dss) # Sample plots for s in [0, 1]: ds2 = get2d(dss[0]) for r in [0, 1]: pl.subplot(3,3,1+r+s*3); pl.imshow(ds2[ds2.sa.chunks == r].samples[0], interpolation='nearest'); pl.ylabel('subj%d' % s); pl.xlabel('run1'); pl.subplot(3,3,3+s*3); pl.imshow(get2d(mean_group_sample(['dissimilarity'])(dss[0]).samples)[0], interpolation='nearest'); pl.xlabel('mean'); ds = dsvstack(dss) ds.a['mapper'] = dss[0].a.mapper ds_mean = mean_group_sample(['dissimilarity', 'chunks'])(ds) for r in [0, 1]: ds_mean_run0 = ds.a.mapper.reverse(ds_mean[ds_mean.chunks == r]) pl.subplot(3,3,1+r+2*3); pl.imshow(ds_mean_run0.samples[0], interpolation='nearest'); pl.ylabel('mean(subj)'); pl.xlabel('run%d' % r) ds_global_mean = mean_group_sample(['dissimilarity'])(ds) pl.subplot(3,3,3+2*3); pl.imshow(get2d(ds_global_mean).samples[0], interpolation='nearest'); pl.xlabel('mean'); pl.show()