def _sl_call(self, dataset, roi_ids, nproc): """Classical generic searchlight implementation """ assert (self.results_backend in ('native', 'hdf5')) # compute if nproc is not None and nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), nproc) nblocks = nproc_needed \ if self.nblocks is None else self.nblocks roi_blocks = np.array_split(roi_ids, nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) for iblock, block in enumerate(roi_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? seed = mvpa2.get_random_seed() compute(block, dataset, copy.copy(self.__datameasure), seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list p_results = [ self._proc_block(roi_ids, dataset, self.__datameasure) ] # Finally collect and possibly process results # p_results here is either a generator from pprocess.Map or a list. # In case of a generator it allows to process results as they become # available result_ds = self.results_fx( sl=self, dataset=dataset, roi_ids=roi_ids, results=self.__handle_all_results(p_results)) # Assure having a dataset (for paranoid ones) if not is_datasetlike(result_ds): try: result_a = np.atleast_1d(result_ds) except ValueError, e: if 'setting an array element with a sequence' in str(e): # try forcing object array. Happens with # test_custom_results_fx_logic on numpy 1.4.1 on Debian # squeeze result_a = np.array(result_ds, dtype=object) else: raise result_ds = Dataset(result_a)
def _sl_call(self, dataset, roi_ids, nproc): """Classical generic searchlight implementation """ assert(self.results_backend in ('native', 'hdf5')) # compute if nproc is not None and nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), nproc) nblocks = nproc_needed \ if self.nblocks is None else self.nblocks roi_blocks = np.array_split(roi_ids, nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, nblocks)) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) for iblock, block in enumerate(roi_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? seed = mvpa2.get_random_seed() compute(block, dataset, copy.copy(self.__datameasure), seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list p_results = [ self._proc_block(roi_ids, dataset, self.__datameasure)] # Finally collect and possibly process results # p_results here is either a generator from pprocess.Map or a list. # In case of a generator it allows to process results as they become # available result_ds = self.results_fx(sl=self, dataset=dataset, roi_ids=roi_ids, results=self.__handle_all_results(p_results)) # Assure having a dataset (for paranoid ones) if not is_datasetlike(result_ds): try: result_a = np.atleast_1d(result_ds) except ValueError, e: if 'setting an array element with a sequence' in str(e): # try forcing object array. Happens with # test_custom_results_fx_logic on numpy 1.4.1 on Debian # squeeze result_a = np.array(result_ds, dtype=object) else: raise result_ds = Dataset(result_a)
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) selected = [ _ for _ in range(ndatasets) if _ not in params.exclude_from_model ] ref_ds_train = selected.index(params.ref_ds) params.hyperalignment.params.ref_ds = ref_ds_train warning('Using %dth dataset as the reference dataset (%dth after ' 'excluding datasets)' % (params.ref_ds, ref_ds_train)) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning( '%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max( np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( ref_ds=params.ref_ds, featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines(datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)] if isinstance(qe, SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError( "Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True." ) self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[ params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets) ] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [ self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks ] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug( 'Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections ] return self.projections
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) if params.ref_ds != params.hyperalignment.params.ref_ds: warning('Supplied ref_ds & hyperalignment instance ref_ds:%d differ.' % params.hyperalignment.params.ref_ds) warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds) params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning('%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max(np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines( datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError("Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True.") self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets)] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug('Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections] return self.projections