def test_scattered_neighborhoods(): radius = 1 sphere = ne.Sphere(radius) coords = range(50) scoords, sidx = ne.scatter_neighborhoods(sphere, coords, deterministic=False) # for this specific case of 1d coordinates the coords and idx should be # identical assert_array_equal(scoords, sidx) # minimal difference of successive coordinates must be larger than the # radius of the spheres. Test only works for 1d coords and sorted return # values assert (np.diff(scoords).min() > radius) # now the same for the case where a particular coordinate appears multiple # times coords = range(10) + range(10) scoords, sidx = ne.scatter_neighborhoods(sphere, coords, deterministic=False) sidx = sorted(sidx) assert_array_equal(scoords, sidx[:5]) assert_array_equal(scoords, [i - 10 for i in sidx[5:]])
def test_scattered_neighborhoods(): radius = 1 sphere = ne.Sphere(radius) coords = range(50) scoords, sidx = ne.scatter_neighborhoods(sphere, coords, deterministic=False) # for this specific case of 1d coordinates the coords and idx should be # identical assert_array_equal(scoords, sidx) # minimal difference of successive coordinates must be larger than the # radius of the spheres. Test only works for 1d coords and sorted return # values assert(np.diff(scoords).min() > radius) # now the same for the case where a particular coordinate appears multiple # times coords = range(10) + range(10) scoords, sidx = ne.scatter_neighborhoods(sphere, coords, deterministic=False) sidx = sorted(sidx) assert_array_equal(scoords, sidx[:5]) assert_array_equal(scoords, [i - 10 for i in sidx[5:]])
def test_searchlight_hyperalignment(self): skip_if_no_external('scipy') skip_if_no_external('h5py') ds_orig = datasets['3dsmall'].copy()[:, :15] ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace space = 'voxel_indices' # total number of datasets for the analysis nds = 5 zscore(ds_orig, chunks_attr=None) dss = [ds_orig] # create a few distorted datasets to match the desired number of datasets # not sure if this truly mimics the real data, but at least we can test # implementation while len(dss) < nds - 1: sd = local_random_affine_transformations( ds_orig, scatter_neighborhoods( Sphere(1), ds_orig.fa[space].value, deterministic=True)[1], Sphere(2), space=space, scale_fac=1.0, shift_fac=0.0) # sometimes above function returns dataset with nans, infs, we don't want that. if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \ and np.all(sd.samples.std(0)): dss.append(sd) ds_orig_noisy = ds_orig.copy() ds_orig_noisy.samples += 0.1 * np.random.random(size=ds_orig_noisy.shape) dss.append(ds_orig_noisy) _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]] # we should have some distortion for ds in dss[1:]: assert_false(np.all(ds_orig.samples == ds.samples)) # testing checks slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1]) self.assertRaises(ValueError, slhyp, dss[:3]) slhyp = SearchlightHyperalignment(ref_ds=3) self.assertRaises(ValueError, slhyp, dss[:3]) # store projections for each mapper separately projs = list() # run the algorithm with all combinations of the two major parameters # for projection calculation. for kwargs in [{'combine_neighbormappers': True, 'nproc': 2}, {'combine_neighbormappers': True, 'dtype': 'float64', 'compute_recon': True}, {'combine_neighbormappers': True, 'exclude_from_model': [2, 4]}, {'combine_neighbormappers': False}, {'combine_neighbormappers': False, 'mask_node_ids': np.arange(dss[0].nfeatures).tolist()}, {'combine_neighbormappers': True, 'sparse_radius': 1}, {'combine_neighbormappers': True, 'nblocks': 2}]: slhyp = SearchlightHyperalignment(radius=2, **kwargs) mappers = slhyp(dss) # one mapper per input ds assert_equal(len(mappers), nds) projs.append(mappers) # some checks for midx in range(nds): # making sure mask_node_ids options works as expected assert_array_almost_equal(projs[3][midx].proj.todense(), projs[4][midx].proj.todense()) # recon check assert_array_almost_equal(projs[0][midx].proj.todense(), projs[1][midx].recon.T.todense(), decimal=5) assert_equal(projs[1][midx].proj.dtype, 'float64') assert_equal(projs[0][midx].proj.dtype, 'float32') # making sure the projections make sense for proj in projs: # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first max_weight = proj[0].proj.toarray().max(0).squeeze() diag_weight = proj[0].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90) # and not true for other subjects for i in range(1, nds - 1): assert(np.sum(proj[i].proj.toarray().max(0).squeeze() == proj[i].proj.diagonal()) / float(proj[i].proj.shape[0]) < 0.80) # Check to make sure projection weights match across duplicate datasets max_weight = proj[-1].proj.toarray().max(0).squeeze() diag_weight = proj[-1].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert(np.sum(max_weight == diag_weight) / float(len(diag_weight)) > 0.90) # project data dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)] _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper] ndcss = [] nf = ds_orig.nfeatures for ds_hyper in dss_hyper: ndcs = np.diag(np.corrcoef(ds_hyper.samples.T, ds_orig.samples.T)[nf:, :nf], k=0) ndcss += [ndcs] assert_true(np.median(ndcss[0]) > 0.9) # noisy copy of original dataset should be similar to original after hyperalignment assert_true(np.median(ndcss[-1]) > 0.9) assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) selected = [ _ for _ in range(ndatasets) if _ not in params.exclude_from_model ] ref_ds_train = selected.index(params.ref_ds) params.hyperalignment.params.ref_ds = ref_ds_train warning('Using %dth dataset as the reference dataset (%dth after ' 'excluding datasets)' % (params.ref_ds, ref_ds_train)) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning( '%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max( np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( ref_ds=params.ref_ds, featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines(datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)] if isinstance(qe, SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError( "Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True." ) self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[ params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets) ] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [ self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks ] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug( 'Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections ] return self.projections
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError('cross-validation payload requires --learner and --partitioner') # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if args.ds_preproc_fx is not None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose(3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError( 'cross-validation payload requires --learner and --partitioner' ) # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if not args.ds_preproc_fx is None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose( 3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) if params.ref_ds != params.hyperalignment.params.ref_ds: warning('Supplied ref_ds & hyperalignment instance ref_ds:%d differ.' % params.hyperalignment.params.ref_ds) warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds) params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning('%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max(np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines( datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError("Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True.") self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets)] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug('Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections] return self.projections
def test_searchlight_hyperalignment(self): skip_if_no_external('scipy') skip_if_no_external('h5py') ds_orig = datasets['3dsmall'].copy()[:, :15] ds_orig.fa['voxel_indices'] = ds_orig.fa.myspace space = 'voxel_indices' # total number of datasets for the analysis nds = 5 zscore(ds_orig, chunks_attr=None) dss = [ds_orig] # create a few distorted datasets to match the desired number of datasets # not sure if this truly mimics the real data, but at least we can test # implementation while len(dss) < nds - 1: sd = local_random_affine_transformations( ds_orig, scatter_neighborhoods(Sphere(1), ds_orig.fa[space].value, deterministic=True)[1], Sphere(2), space=space, scale_fac=1.0, shift_fac=0.0) # sometimes above function returns dataset with nans, infs, we don't want that. if np.sum(np.isnan(sd.samples)+np.isinf(sd.samples)) == 0 \ and np.all(sd.samples.std(0)): dss.append(sd) ds_orig_noisy = ds_orig.copy() ds_orig_noisy.samples += 0.1 * np.random.random( size=ds_orig_noisy.shape) dss.append(ds_orig_noisy) _ = [zscore(sd, chunks_attr=None) for sd in dss[1:]] # we should have some distortion for ds in dss[1:]: assert_false(np.all(ds_orig.samples == ds.samples)) # testing checks slhyp = SearchlightHyperalignment(ref_ds=1, exclude_from_model=[1]) self.assertRaises(ValueError, slhyp, dss[:3]) slhyp = SearchlightHyperalignment(ref_ds=3) self.assertRaises(ValueError, slhyp, dss[:3]) # explicit test of exclude_from_model slhyp = SearchlightHyperalignment(ref_ds=2, exclude_from_model=[1], featsel=0.7) projs1 = slhyp(dss) aligned1 = [proj.forward(ds) for proj, ds in zip(projs1, dss)] samples = dss[1].samples.copy() dss[1].samples += 0.1 * np.random.random(size=dss[1].shape) projs2 = slhyp(dss) aligned2 = [proj.forward(ds) for proj, ds in zip(projs1, dss)] for i in [0, 2, 3, 4]: assert_array_almost_equal(projs1[i].proj.todense(), projs2[i].proj.todense()) assert_array_almost_equal(aligned1[i].samples, aligned2[i].samples) assert_false( np.all(projs1[1].proj.todense() == projs1[2].proj.todense())) assert_false(np.all(aligned1[1].samples == aligned2[1].samples)) dss[1].samples = samples # store projections for each mapper separately projs = list() # run the algorithm with all combinations of the two major parameters # for projection calculation. for kwargs in [{ 'combine_neighbormappers': True, 'nproc': 1 + int(externals.exists('pprocess')) }, { 'combine_neighbormappers': True, 'dtype': 'float64', 'compute_recon': True }, { 'combine_neighbormappers': True, 'exclude_from_model': [2, 4] }, { 'combine_neighbormappers': False }, { 'combine_neighbormappers': False, 'mask_node_ids': np.arange(dss[0].nfeatures).tolist() }, { 'combine_neighbormappers': True, 'sparse_radius': 1 }, { 'combine_neighbormappers': True, 'nblocks': 2 }]: slhyp = SearchlightHyperalignment(radius=2, **kwargs) mappers = slhyp(dss) # one mapper per input ds assert_equal(len(mappers), nds) projs.append(mappers) # some checks for midx in range(nds): # making sure mask_node_ids options works as expected assert_array_almost_equal(projs[3][midx].proj.todense(), projs[4][midx].proj.todense()) # recon check assert_array_almost_equal(projs[0][midx].proj.todense(), projs[1][midx].recon.T.todense(), decimal=5) assert_equal(projs[1][midx].proj.dtype, 'float64') assert_equal(projs[0][midx].proj.dtype, 'float32') # making sure the projections make sense for proj in projs: # no .max on sparse matrices on older scipy (e.g. on precise) so conver to array first max_weight = proj[0].proj.toarray().max(1).squeeze() diag_weight = proj[0].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert (np.sum(max_weight == diag_weight) / float(len(diag_weight)) >= 0.80) # and not true for other subjects for i in range(1, nds - 1): assert (np.sum(proj[i].proj.toarray().max(1).squeeze() == proj[i].proj.diagonal()) / float(proj[i].proj.shape[0]) < 0.80) # Check to make sure projection weights match across duplicate datasets max_weight = proj[-1].proj.toarray().max(1).squeeze() diag_weight = proj[-1].proj.diagonal() # Check to make sure diagonal is the max weight, in almost all rows for reference subject assert (np.sum(max_weight == diag_weight) / float(len(diag_weight)) >= 0.80) # project data dss_hyper = [hm.forward(sd) for hm, sd in zip(projs[0], dss)] _ = [zscore(sd, chunks_attr=None) for sd in dss_hyper] ndcss = [] nf = ds_orig.nfeatures for ds_hyper in dss_hyper: ndcs = np.diag(np.corrcoef(ds_hyper.samples.T, ds_orig.samples.T)[nf:, :nf], k=0) ndcss += [ndcs] assert_true(np.median(ndcss[0]) > 0.9) # noisy copy of original dataset should be similar to original after hyperalignment assert_true(np.median(ndcss[-1]) > 0.9) assert_true(np.all([np.median(ndcs) > 0.2 for ndcs in ndcss[1:-2]]))