def arg2neighbor(arg): # [[shape:]shape:]params comp = arg.split(':') if not len(comp): # need at least a radius raise ValueError("incomplete neighborhood specification") if len(comp) == 1: # [file|sphere radius] attr = 'voxel_indices' arg = comp[0] if os.path.isfile(arg) and arg.endswith('.py'): neighbor = script2obj(arg) else: from mvpa2.misc.neighborhood import Sphere neighbor = Sphere(int(arg)) elif len(comp) == 2: # attr:[file|sphere radius] attr = comp[0] arg = comp[1] if os.path.isfile(arg) and arg.endswith('.py'): neighbor = script2obj(arg) else: from mvpa2.misc.neighborhood import Sphere neighbor = Sphere(int(arg)) elif len(comp) > 2: attr = comp[0] shape = comp[1] params = [float(c) for c in comp[2:]] import mvpa2.misc.neighborhood as neighb neighbor = getattr(neighb, shape)(*params) return attr, neighbor
def test_cached_qe_gnbsearchlight(self): ds1 = datasets['3dsmall'].copy(deep=True) qe = IndexQueryEngine(myspace=Sphere(2)) cached_qe = CachedQueryEngine(qe) gnb_sl = GNBSearchlight(GNB(), NFoldPartitioner(), qe=cached_qe) res = gnb_sl(ds1) assert_false(cached_qe.ids is None)
def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds): """Helper to return trained query engine(s), either list of one or one per each dataset if queryengine is None then IndexQueryEngine based on radius is created """ ndatasets = len(datasets) if queryengine: if isinstance(queryengine, (list, tuple)): queryengines = queryengine if len(queryengines) != ndatasets: raise ValueError( "%d query engines were specified although %d datasets " "provided" % (len(queryengines), ndatasets)) _shpaldebug("Training provided query engines") for qe, ds in zip(queryengines, datasets): qe.train(ds) else: queryengine.train(datasets[ref_ds]) queryengines = [queryengine] else: _shpaldebug( 'No custom query engines were provided. Setting up the ' 'volumetric query engine on voxel_indices.') queryengine = IndexQueryEngine(voxel_indices=Sphere(radius)) queryengine.train(datasets[ref_ds]) queryengines = [queryengine] return queryengines
def test_gnbsearghlight_exclude_partition(self): # just a smoke test with a custom partitioner ds1 = datasets['3dsmall'].copy(deep=True) gnb_sl = GNBSearchlight(GNB(), generator=CustomPartitioner([([0], [1])]), qe=IndexQueryEngine(myspace=Sphere(2)), errorfx=None) res = gnb_sl(ds1)
def test_simple_sim1_clean_per_subject(): # no noise -- all must be clear dissims = [[0.9], [0.8], [0.5], [0.3]] args = (64, 64), dissims kwargs = dict(roi_neighborhood=Sphere(6), nruns=3, nsubjects=2) # clean case signal_clean, cluster_truth, dss = simple_sim1(*args, noise_subject_std=0, noise_independent_std=0, noise_common_std=0, **kwargs) # ,1 since we have only 1 value of dissim per each or ROIs assert_equal(signal_clean.shape, (64, 64, 1)) # all dss should be identical to a_clean for ds in dss: for samples in ds[0].a.mapper.reverse(ds).samples: assert_array_almost_equal(signal_clean[..., 0], samples) # Now lets generate common noise signal_clean, cluster_truth, dss = simple_sim1(*args, noise_subject_std=0, noise_independent_std=0, noise_common_std=100, **kwargs) # corr coeffs should be really high across all the runs and subjects all_subj_runs = np.corrcoef(np.vstack(dss)) assert_true( np.all( np.abs(all_subj_runs[np.triu_indices(len(all_subj_runs))]) > 0.7)) # but low to signal_clean assert_true( np.all( np.abs(np.corrcoef(signal_clean.flatten(), np.vstack(dss))[0, 1:]) < 0.3)) # Now lets generate per subject common noise signal_clean, cluster_truth, dss = simple_sim1(*args, noise_subject_std=100, noise_independent_std=0, noise_common_std=0, **kwargs) # corr coeffs should be really high across all the runs within each # subject but otherwise having low correlation all_subj_runs = np.corrcoef(np.vstack(dss)) # TODO: fix up # assert_true(np.all(np.abs( # all_subj_runs[np.triu_indices(len(all_subj_runs))]) # > 0.8)) ## TODO: fix up # and low to signal_clean assert_true( np.all( np.abs(np.corrcoef(signal_clean.flatten(), np.vstack(dss))[0, 1:]) < 0.3))
def test_splitter_gnbsearghlight(self): ds1 = datasets['3dsmall'].copy(deep=True) gnb_sl = GNBSearchlight(GNB(), generator=CustomPartitioner([([0], [1])]), qe=IndexQueryEngine(myspace=Sphere(2)), splitter=Splitter(attr='partitions', attr_values=[1, 2]), errorfx=None) res = gnb_sl(ds1) assert_equal(res.nsamples, (ds1.chunks == 1).sum())
def sphere_searchlight(datameasure, radius=1, center_ids=None, space='voxel_indices', **kwargs): """Creates a `Searchlight` to run a scalar `Measure` on all possible spheres of a certain size within a dataset. The idea for a searchlight algorithm stems from a paper by :ref:`Kriegeskorte et al. (2006) <KGB06>`. Parameters ---------- datameasure : callable Any object that takes a :class:`~mvpa2.datasets.base.Dataset` and returns some measure when called. radius : int All features within this radius around the center will be part of a sphere. Radius is in grid-indices, i.e. ``1`` corresponds to all immediate neighbors, regardless of the physical distance. center_ids : list of int List of feature ids (not coordinates) the shall serve as sphere centers. Alternatively, this can be the name of a feature attribute of the input dataset, whose non-zero values determine the feature ids. By default all features will be used (it is passed as ``roi_ids`` argument of Searchlight). space : str Name of a feature attribute of the input dataset that defines the spatial coordinates of all features. **kwargs In addition this class supports all keyword arguments of its base-class :class:`~mvpa2.measures.base.Measure`. Notes ----- If `Searchlight` is used as `SensitivityAnalyzer` one has to make sure that the specified scalar `Measure` returns large (absolute) values for high sensitivities and small (absolute) values for low sensitivities. Especially when using error functions usually low values imply high performance and therefore high sensitivity. This would in turn result in sensitivity maps that have low (absolute) values indicating high sensitivities and this conflicts with the intended behavior of a `SensitivityAnalyzer`. """ # build a matching query engine from the arguments kwa = {space: Sphere(radius)} qe = IndexQueryEngine(**kwa) # init the searchlight with the queryengine return Searchlight(datameasure, queryengine=qe, roi_ids=center_ids, **kwargs)
def sphere_gnbsearchlight(gnb, generator, radius=1, center_ids=None, space='voxel_indices', *args, **kwargs): """Creates a `GNBSearchlight` to assess :term:`cross-validation` classification performance of GNB on all possible spheres of a certain size within a dataset. The idea of taking advantage of naiveness of GNB for the sake of quick searchlight-ing stems from Francisco Pereira (paper under review). Parameters ---------- radius : float All features within this radius around the center will be part of a sphere. center_ids : list of int List of feature ids (not coordinates) the shall serve as sphere centers. By default all features will be used (it is passed roi_ids argument for Searchlight). space : str Name of a feature attribute of the input dataset that defines the spatial coordinates of all features. **kwargs In addition this class supports all keyword arguments of :class:`~mvpa2.measures.gnbsearchlight.GNBSearchlight`. Notes ----- If any `BaseSearchlight` is used as `SensitivityAnalyzer` one has to make sure that the specified scalar `Measure` returns large (absolute) values for high sensitivities and small (absolute) values for low sensitivities. Especially when using error functions usually low values imply high performance and therefore high sensitivity. This would in turn result in sensitivity maps that have low (absolute) values indicating high sensitivities and this conflicts with the intended behavior of a `SensitivityAnalyzer`. """ # build a matching query engine from the arguments kwa = {space: Sphere(radius)} qe = IndexQueryEngine(**kwa) # init the searchlight with the queryengine return GNBSearchlight(gnb, generator, qe, roi_ids=center_ids, *args, **kwargs)
def test_add_center_fa(self): # just a smoke test pretty much ds = datasets['3dsmall'].copy() # check that we do not mark anything as center whenever there is none def check_no_center(ds): assert (not np.any(ds.fa.center)) return 1.0 # or just a single center in our case def check_center(ds): assert (np.sum(ds.fa.center) == 1) return 1.0 for n, check in [(HollowSphere(1, 0), check_no_center), (Sphere(0), check_center), (Sphere(1), check_center)]: Searchlight(check, IndexQueryEngine(myspace=n), add_center_fa='center')(ds) # and no changes to original ds data, etc assert_array_equal(datasets['3dsmall'].fa.keys(), ds.fa.keys()) assert_array_equal(datasets['3dsmall'].samples, ds.samples)
def test_1d_multispace_searchlight(self): ds = Dataset([np.arange(6)]) ds.fa['coord1'] = np.repeat(np.arange(3), 2) # add a second space to the dataset ds.fa['coord2'] = np.tile(np.arange(2), 3) measure = lambda x: "+".join([str(x) for x in x.samples[0]]) # simply select each feature once res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)), nproc=1)(ds) assert_array_equal(res.samples, [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
def test_searchlight_hyperalignment(self): skip_if_no_external('scipy') skip_if_no_external('h5py') ds_orig = datasets['3dsmall'].copy()[:, :15] ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace space = 'voxel_indices' # total number of datasets for the analysis nds = 5 zscore(ds_orig, chunks_attr=None) dss = [ds_orig] # create a few distorted datasets to match the desired number of datasets # not sure if this truly mimics the real data, but at least we can test # implementation while len(dss) < nds - 1: sd = local_random_affine_transformations( ds_orig, scatter_neighborhoods(Sphere(1), ds_orig.fa[space].value, deterministic=True)[1], Sphere(2), space=space, scale_fac=1.0, shift_fac=0.0) # sometimes above function returns dataset with nans, infs, we don't want that. if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \ and np.all(sd.samples.std(0)): dss.append(sd) ds_orig_noisy = ds_orig.copy() ds_orig_noisy.samples += 0.1 * np.random.random( size=ds_orig_noisy.shape) dss.append(ds_orig_noisy) _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]] # we should have some distortion for ds in dss[1:]: assert_false(np.all(ds_orig.samples == ds.samples)) # testing checks slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1]) self.assertRaises(ValueError, slhyp, dss[:3]) slhyp = SearchlightHyperalignment(ref_ds=3) self.assertRaises(ValueError, slhyp, dss[:3]) # explicit test of exclude_from_model slhyp = SearchlightHyperalignment(ref_ds=2, exclude_from_model=[1], featsel=0.7) projs1 = slhyp(dss) aligned1 = [proj.forward(ds) for proj, ds in zip(projs1, dss)] samples = dss[1].samples.copy() dss[1].samples += 0.1 * np.random.random(size=dss[1].shape) projs2 = slhyp(dss) aligned2 = [proj.forward(ds) for proj, ds in zip(projs1, dss)] for i in [0, 2, 3, 4]: assert_array_almost_equal(projs1[i].proj.todense(), projs2[i].proj.todense()) assert_array_almost_equal(aligned1[i].samples, aligned2[i].samples) assert_false( np.all(projs1[1].proj.todense() == projs1[2].proj.todense())) assert_false(np.all(aligned1[1].samples == aligned2[1].samples)) dss[1].samples = samples # store projections for each mapper separately projs = list() # run the algorithm with all combinations of the two major parameters # for projection calculation. for kwargs in [{ 'combine_neighbormappers': True, 'nproc': 1 + int(externals.exists('pprocess')) }, { 'combine_neighbormappers': True, 'dtype': 'float64', 'compute_recon': True }, { 'combine_neighbormappers': True, 'exclude_from_model': [2, 4] }, { 'combine_neighbormappers': False }, { 'combine_neighbormappers': False, 'mask_node_ids': np.arange(dss[0].nfeatures).tolist() }, { 'combine_neighbormappers': True, 'sparse_radius': 1 }, { 'combine_neighbormappers': True, 'nblocks': 2 }]: slhyp = SearchlightHyperalignment(radius=2, **kwargs) mappers = slhyp(dss) # one mapper per input ds assert_equal(len(mappers), nds) projs.append(mappers) # some checks for midx in range(nds): # making sure mask_node_ids options works as expected assert_array_almost_equal(projs[3][midx].proj.todense(), projs[4][midx].proj.todense()) # recon check assert_array_almost_equal(projs[0][midx].proj.todense(), projs[1][midx].recon.T.todense(), decimal=5) assert_equal(projs[1][midx].proj.dtype, 'float64') assert_equal(projs[0][midx].proj.dtype, 'float32') # making sure the projections make sense for proj in projs: # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first max_weight = proj[0].proj.toarray().max(1).squeeze() diag_weight = proj[0].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert (np.sum(max_weight == diag_weight) / float(len(diag_weight)) >= 0.80) # and not true for other subjects for i in range(1, nds - 1): assert (np.sum(proj[i].proj.toarray().max(1).squeeze() == proj[i].proj.diagonal()) / float(proj[i].proj.shape[0]) < 0.80) # Check to make sure projection weights match across duplicate datasets max_weight = proj[-1].proj.toarray().max(1).squeeze() diag_weight = proj[-1].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert (np.sum(max_weight == diag_weight) / float(len(diag_weight)) >= 0.80) # project data dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)] _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper] ndcss = [] nf = ds_orig.nfeatures for ds_hyper in dss_hyper: ndcs = np.diag(np.corrcoef(ds_hyper.samples.T, ds_orig.samples.T)[nf:, :nf], k=0) ndcss += [ndcs] assert_true(np.median(ndcss[0]) > 0.9) # noisy copy of original dataset should be similar to original after hyperalignment assert_true(np.median(ndcss[-1]) > 0.9) assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))
ds.a['mapper'] = dss_subject[ 0].a.mapper # .a are not transferred by vstack dss.append(ds) # Instrumental noise -- the most banal assert (len(dss) == nsubjects) assert (len(dss) == nsubjects) assert (len(dss[0]) == nruns * len(dissim)) return np.tanh(signal_clean), cluster_truth, dss if __name__ == '__main__': a_clean, cluster_truth, dss = simple_sim1((64, 64), [[1], [0.8], [0.5], [0.3]], roi_neighborhood=Sphere(6), nruns=3, nsubjects=2, noise_subject_n=1, noise_subject_std=5, noise_subject_smooth=5, noise_independent_std=4, noise_independent_smooth=1.5, noise_common_n=1, noise_common_std=3) # just a little helper def get2d(ds): return dss[0].a.mapper.reverse(ds) import pylab as pl
def test_voxel_selection(self): '''Compare surface and volume based searchlight''' ''' Tests to see whether results are identical for surface-based searchlight (just one plane; Euclidean distnace) and volume-based searchlight. Note that the current value is a float; if it were int, it would specify the number of voxels in each searchlight''' radius = 10. '''Define input filenames''' epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz') maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz') ''' Use the EPI datafile to define a surface. The surface has as many nodes as there are voxels and is parallel to the volume 'slice' ''' vg = volgeom.from_any(maskfn, mask_volume=True) aff = vg.affine nx, ny, nz = vg.shape[:3] '''Plane goes in x and y direction, so we take these vectors from the affine transformation matrix of the volume''' plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny) ''' Simulate pial and white matter as just above and below the central plane ''' normal_vec = aff[:3, 2] outer = plane + normal_vec inner = plane + -normal_vec ''' Combine volume and surface information ''' vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner) ''' Run voxel selection with specified radius (in mm), using Euclidean distance measure ''' surf_voxsel = surf_voxel_selection.voxel_selection(vsm, radius, distance_metric='e') '''Define the measure''' # run_slow=True would give an actual cross-validation with meaningful # accuracies. Because this is a unit-test only the number of voxels # in each searchlight is tested. run_slow = False if run_slow: meas = CrossValidation(GNB(), OddEvenPartitioner(), errorfx=lambda p, t: np.mean(p == t)) postproc = mean_sample else: meas = _Voxel_Count_Measure() postproc = lambda x: x ''' Surface analysis: define the query engine, cross validation, and searchlight ''' surf_qe = SurfaceVerticesQueryEngine(surf_voxsel) surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc) ''' new (Sep 2012): also test 'simple' queryengine wrapper function ''' surf_qe2 = disc_surface_queryengine(radius, maskfn, inner, outer, plane, volume_mask=True, distance_metric='euclidean') surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc) ''' Same for the volume analysis ''' element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2]))) sph = Sphere(radius, element_sizes=element_sizes) kwa = {'voxel_indices': sph} vol_qe = IndexQueryEngine(**kwa) vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc) '''The following steps are similar to start_easy.py''' attr = SampleAttributes( os.path.join(pymvpa_dataroot, 'attributes_literal.txt')) mask = surf_voxsel.get_mask() dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot, 'bold.nii.gz'), targets=attr.targets, chunks=attr.chunks, mask=mask) if run_slow: # do chunkswise linear detrending on dataset poly_detrend(dataset, polyord=1, chunks_attr='chunks') # zscore dataset relative to baseline ('rest') mean zscore(dataset, chunks_attr='chunks', param_est=('targets', ['rest'])) # select class face and house for this demo analysis # would work with full datasets (just a little slower) dataset = dataset[np.array( [l in ['face', 'house'] for l in dataset.sa.targets], dtype='bool')] '''Apply searchlight to datasets''' surf_dset = surf_sl(dataset) surf_dset2 = surf_sl2(dataset) vol_dset = vol_sl(dataset) surf_data = surf_dset.samples surf_data2 = surf_dset2.samples vol_data = vol_dset.samples assert_array_equal(surf_data, surf_data2) assert_array_equal(surf_data, vol_data)
def simple_sim1( shape, dissims, rois_arrangement='circle', roi_neighborhood=Sphere(5), nruns=1, nsubjects=1, # noise components -- we just add normal for now also with # spatial smoothing to possibly create difference in noise # characteristics across different kinds # # "Instrumental noise" -- generic nuisance noise_independent_std=0.4, noise_independent_smooth=3., # "Intrinsic signal", specific per each subject (due to # motion, whatever) -- might be fun for someone to cluster, # but irrelevant for us noise_subject_n=1, noise_subject_std=0.4, noise_subject_smooth=1.5, # "Intrinsic common signal" -- probably generalizes across # subjects and fun for someone studying veins to get those # reproducible clusters. It will be mixed in also with # different weights per each run. # Again -- might be fun for someone to cluster, but not for us # since it would not be representative of the original signal noise_common_n=1, noise_common_std=0.4, noise_common_smooth=2.): """Simulate "data" containing similarity matrices with 3 noise components for multiple subjects Noise components are: - random normal noise, also spatially smoothed (should have smaller sigma for smoothing probably than for intrinsic noise) - intrinsic noise which is composed from a set of random fields, generated by random normal noise with subsequent spatial filtering, which are then mixed into each run data with random weights. They are to simulate subject-specific intrinsic signals such as artifacts due to motion, possible subject-specific physiological processes - intrinsic common noise across subjects intrinsic noise (e.g. all of them have similar blood distribution networks and other physiological parameters, and some intrinsic networks, which although similar in space would have different mix-in coefficients across subject/runs) Theoretically, decomposition methods (such as ICA, PCA, etc) should help to identify such common noise components and filter them out. Also methods which iteratively remove non-informative projections (such as GLMdenoise) should be effective to identify those mix-ins TODO: now mix-in happens with purely normal random weights, ideally we should color those as well """ ndissims = len(dissims) # first we fisher transform so we can add normal noise # check first that we don't have extreme values that might give infinity dissims = np.array(dissims) dissims = 1. - dissims dissims[dissims == 1] = 0.99 dissims[dissims == -1] = -0.99 # fisher dissims = np.arctanh(dissims) # generate target clean "picture" d = np.asanyarray(dissims[0]) signal_clean = np.zeros(shape + (len(vector_form(d)), )) # generate ground truth for clustering cluster_truth = np.zeros(shape, dtype='int') if rois_arrangement == 'circle': radius = min(shape[:2]) / 4. center = np.array((radius * 2, ) * len(shape)).astype(int) # arrange at quarter distance from center for i, dissim in enumerate(dissims): dissim = vector_form(dissim) # that is kinda boring -- the same dissimilarity to each # voxel??? # # TODO: come up with a better arrangement/idea, e.g. to # generate an MVPA pattern which would satisfy the # dissimilarity (not exactly but at least close). That # would make more sense roi_center = center.copy() roi_center[0] += int(radius * np.cos(2 * np.pi * i / ndissims)) roi_center[1] += int(radius * np.sin(2 * np.pi * i / ndissims)) for coords in roi_neighborhood(roi_center): acoords = np.asanyarray(coords) if np.all(acoords >= [0]*len(coords)) and \ np.all(acoords < signal_clean.shape[:len(coords)]): signal_clean.__setitem__(coords, dissim) cluster_truth.__setitem__(coords, i + 1) else: raise ValueError("I know only circle") # generated randomly and will be mixed into subjects with different weights # TODO: static across runs within subject?? if so -- would be no different # from having RSAs? common_noises = get_intrinsic_noises(signal_clean.shape, std=noise_common_std, sigma=noise_common_smooth, n=noise_common_n) assert common_noises[0].ndim == 3, "There should be no time comp" # Now lets generate per subject and per run data by adding some noise(s) # all_signals = [] dss = [] for isubject in xrange(nsubjects): # Interesting noise, simulating some underlying process which has nothing # to do with original design/similarity but having spatial structure which # repeats through runs with random weights (consider it to be a principal component) # generated randomly for each subject separately, but they should have # common structure across runs subj_specific_noises = get_intrinsic_noises(signal_clean.shape, std=noise_subject_std, sigma=noise_subject_smooth, n=noise_subject_n) assert subj_specific_noises[ 0].ndim == 3, "There should be no time comp" # subject_signals = [] dss_subject = [] subj_common_noises = [ noise * np.random.normal() for noise in common_noises ] subj_specific_mixins = generate_mixins(nruns) subj_common_mixins = generate_mixins(nruns) for run in range(nruns): signal_run = signal_clean.copy() for noise in subj_specific_noises: signal_run += noise * subj_specific_mixins[run] for noise in subj_common_noises: signal_run += noise * subj_common_mixins[run] # generic noise -- no common structure across subjects/runs signal_run += filter_each_2d( np.random.normal(size=signal_clean.shape) * noise_independent_std, noise_independent_smooth) # go back to correlations with inverse of fisher signal_run = np.tanh(signal_run) # rollaxis to bring similarities into leading dimension ds = Dataset(np.rollaxis(signal_run, 2, 0)) ds.sa['chunks'] = [run] ds.sa['dissimilarity'] = np.arange(len(dissim)) # Lame one for now ds_flat = ds.get_mapped( FlattenMapper(shape=ds.shape[1:], space='pixel_indices')) dss_subject.append(ds_flat) #subject_signals.append(signal_run) #all_signals.append(subject_signals) ds = dsvstack(dss_subject) ds.a['mapper'] = dss_subject[ 0].a.mapper # .a are not transferred by vstack dss.append(ds) # Instrumental noise -- the most banal assert (len(dss) == nsubjects) assert (len(dss) == nsubjects) assert (len(dss[0]) == nruns * len(dissim)) return np.tanh(signal_clean), cluster_truth, dss
N_BLOCKS=128 cnx_tx = 489 toutdir = os.path.join(basedir, 'transformation_matrices', 'iterative_cha_olp4cbp_mappers' +'_' + 'subs-' + str(nsubs) + '_'+ 'radius1-10_radius2-' + str(HYPERALIGNMENT_RADIUS) + '.hdf5.gz') print(toutdir) # load nifti as a pymvpa dataset and then use that as ref_ds in the queryengine definition # mask with data in brainmask so only 170k (size of connectomes) voxels are included ref_ds = fmri_dataset(os.path.join(helperfiles,'newbrainmask.nii'), mask=os.path.join(helperfiles,'newbrainmask.nii')) print('Size of brain mask:') print(str(len(ref_ds.fa.voxel_indices))) # set searchlight sphere radius sl_radius = HYPERALIGNMENT_RADIUS #create query engine qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius)) qe.train(ref_ds) # load all subject nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*')) print('Loading participant data from: ') print(chamats) mysubs = nfiles[0:nsubs] # import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets dss = [] for sub in range(len(mysubs)): ds = mv.Dataset(np.load(mysubs[sub])) ds.fa['voxel_indices'] = range(ds.shape[1]) #ds.sa['chunks'] = np.repeat(i,cnx_tx) mv.zscore(ds, chunks_attr=None)
def get_masked_array(self, nt=None, dilate=None): '''Provides a masked numpy array Parameters ---------- nt: int or None Number of timepoints (or samples). Each feature has the same value (1 if in the mask, 0 otherwise) for each sample. If nt is None, then the output is 3D; otherwise it is 4D with 'nt' values in the last dimension. dilate: callable or int or None Speficiation of mask dilation. If a callable, it should be a a neighborhood function (like Sphere(..)) that can map a single voxel coordinate (represented as a triple of indices) to a list of voxel coordinates that define the neighboorhood of that coordinate. For example, Sphere(3) can be used to dilate the original mask by 3 voxels. If an int, then it uses Sphere(dilate) to dilate the mask. If set to None the mask is not dilated. Returns ------- msk: numpy.ndarray an array with values 1. for values inside the mask and values of 0 elsewhere. If the instance has no mask, then all values are 1. ''' data_vec = np.zeros((self.nvoxels, ), dtype=np.float32) if self.mask is None: data_vec[:] = 1 else: data_vec[self.mask] = 1 # see if the mask has to be dilated. # if all voxels are already in the mask this can be omitted if not dilate is None and \ self.nvoxels_mask != self.nvoxels: if type(dilate) is int: dilate = Sphere(dilate) # offsets deltas = dilate((0, 0, 0)) # positions of nonzero voxels data_ijks = self.lin2ijk(np.nonzero(data_vec)[0]) # helper function def add_tuple(x, y): return (x[0] + y[0], x[1] + y[1], x[2] + y[2]) # gather all subindices ehre dilate_ijk = set() # all combinations of offsets and positions of voxels in the mask for delta in deltas: if delta != (0, 0, 0): for data_ijk in data_ijks: pos = add_tuple(delta, data_ijk) dilate_ijk.add(pos) if dilate_ijk: dilate_lin = self._ijk2lin_unmasked(list(dilate_ijk)) lin_mask = self.contains_lin(dilate_lin, apply_mask=False) data_vec[dilate_lin[lin_mask]] = 1 sh = self.shape data_t1 = np.reshape(data_vec, sh[:3]) if not nt is None: sh = (sh[0], sh[1], sh[2], nt) data = np.zeros(sh, data_vec.dtype) for t in xrange(nt): data[:, :, :, t] = data_t1 return data else: return data_t1
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) selected = [ _ for _ in range(ndatasets) if _ not in params.exclude_from_model ] ref_ds_train = selected.index(params.ref_ds) params.hyperalignment.params.ref_ds = ref_ds_train warning('Using %dth dataset as the reference dataset (%dth after ' 'excluding datasets)' % (params.ref_ds, ref_ds_train)) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning( '%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max( np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( ref_ds=params.ref_ds, featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines(datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)] if isinstance(qe, SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError( "Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True." ) self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[ params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets) ] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [ self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks ] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug( 'Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections ] return self.projections