def cast_unsuitable_regions_by_label_MT(multi_level_mask, thresholds_list, parameter_dict): '''cast small region as noise and big region as vessel''' nodule_mask = np.zeros_like(multi_level_mask, np.int8) multi_image_labels = range(int(np.max(multi_level_mask))) multi_image_labels.reverse() loop_times = len(multi_image_labels) # TODO map style parallezision shared_array_s = mp.Array(ctypes.c_int8, loop_times * np.size(nodule_mask)) shared_array = np.frombuffer(shared_array_s.get_obj(), dtype=np.int8).reshape((loop_times,) + nodule_mask.shape) num_of_proc = pprocess.get_number_of_cores() results = pprocess.Map(limit=num_of_proc / 2) para_func = results.manage(pprocess.MakeParallel(put_result_into_shared_memory)) for i in range(loop_times): one_label = multi_image_labels[i] para_func(shared_array, multi_level_mask, thresholds_list, parameter_dict, one_label) results.finish() for num_of_loop in range(shared_array.shape[0]): nodule_mask = np.logical_or(nodule_mask, shared_array[num_of_loop, ...]) datastate = shared_array_s.get_obj()._wrapper._state arenaobj = datastate[0][0] arenaobj.buffer.close() mp.heap.BufferWrapper._heap = mp.heap.Heap() return nodule_mask
def filter_regions(multi_level_mask, thresholds_list, parameter_dict, one_label): one_threshold = thresholds_list[one_label] mask = (multi_level_mask > one_label) min_size = parameter_dict['small_vol_threshold'] label_image, bounding_box_slices = lyBWareaopen(mask, min_size) region_result = [False] '''single thread''' # for label_number in range(1, np.max(label_image) + 1): # single_result = region_task(label_number, label_image, bounding_box_slices, one_threshold, parameter_dict) # region_result.append(single_result) '''multi thread''' results = pprocess.Map(limit=pprocess.get_number_of_cores()) calc = results.manage(pprocess.MakeParallel(region_task)) for label_number in range(1, np.max(label_image) + 1): calc(label_number, label_image, bounding_box_slices, one_threshold, parameter_dict) for i, result in enumerate(results): region_result.append(result) region_result = np.array(region_result, np.bool) tuild_result = np.logical_not(region_result) label_image[tuild_result[label_image]] = 0 return (label_image > 0)
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # pass to subclass results, roi_sizes = self._sl_call(dataset, roi_ids, nproc) if not roi_sizes is None: self.ca.roi_sizes = roi_sizes if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append(StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append( StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: qe_ids = self._queryengine.ids # known to qe if not set(qe_ids).issuperset(roi_ids): raise IndexError( "Some roi_ids are not known to the query engine %s: %s" % (self._queryengine, set(roi_ids).difference(qe_ids))) else: roi_ids = self._queryengine.ids # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.__nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self.__qe.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if self.__center_ids is not None: roi_ids = self.__center_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # compute if nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks roi_blocks = np.array_split(roi_ids, nproc) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) for block in roi_blocks: # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, dataset, copy.copy(self.__datameasure)) # collect results results = [] if self.ca.is_enabled('roisizes'): roisizes = [] else: roisizes = None for r, rsizes in p_results: results += r if not roisizes is None: roisizes += rsizes else: # otherwise collect the results in a list results, roisizes = \ self._proc_block(roi_ids, dataset, self.__datameasure) if not roisizes is None: self.ca.roisizes = roisizes if __debug__: debug('SLC', '') # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack results = hstack(results) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__center_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append(FeatureSliceMapper(self.__center_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def voxel_selection(vol_surf_mapping, radius, source_surf=None, source_surf_nodes=None, distance_metric='dijkstra', eta_step=10, nproc=None, outside_node_margin=None, results_backend=None, tmp_prefix='tmpvoxsel'): """ Voxel selection for multiple center nodes on the surface Parameters ---------- vol_surf_mapping: volsurf.VolSurfMapping Contains gray and white matter surface, and volume geometry radius: int or float Size of searchlight. If an integer, then it indicates the number of voxels. If a float, then it indicates the radius of the disc source_surf: surf.Surface or None Surface used to compute distance between nodes. If omitted, it is the average of the gray and white surfaces. source_surf_nodes: list of int or numpy array or None Indices of nodes in source_surf that serve as searchlight center. By default every node serves as a searchlight center. distance_metric: str Distance metric between nodes. 'euclidean' or 'dijksta' (default) eta_step: int Report progress every eta_step (default: 10). nproc: int or None Number of parallel threads. None means as many threads as the system supports. The pprocess is required for parallel threads; if it cannot be used, then a single thread is used. outside_node_margin: float or True or None (default) By default nodes outside the volume are skipped; using this parameter allows for a marign. If this value is a float (possibly np.inf), then all nodes within outside_node_margin Dijkstra distance from any node within the volume are still assigned associated voxels. If outside_node_margin is True, then a node is always assigned voxels regardless of its position in the volume. results_backend : 'native' or 'hdf5' or None (default). Specifies the way results are provided back from a processing block in case of nproc > 1. 'native' is pickling/unpickling of results by pprocess, while 'hdf5' would use h5save/h5load functionality. 'hdf5' might be more time and memory efficient in some cases. If None, then 'hdf5' if used if available, else 'native'. tmp_prefix : str, optional If specified -- serves as a prefix for temporary files storage if results_backend == 'hdf5'. Thus can specify the directory to use (trailing file path separator is not added automagically). Returns ------- sel: volume_mask_dict.VolumeMaskDictionary Voxel selection results, that associates, which each node, the indices of the surrounding voxels. """ # construct the intermediate surface, which is used # to measure distances intermediate_surf = (vol_surf_mapping.pial_surface * .5) + \ (vol_surf_mapping.white_surface * .5) if source_surf is None: source_surf = intermediate_surf else: source_surf = surf.from_any(source_surf) if _debug(): debug( 'SVS', "Generated high-res intermediate surface: " "%d nodes, %d faces" % (intermediate_surf.nvertices, intermediate_surf.nfaces)) debug( 'SVS', "Mapping source to high-res surface:" " %d nodes, %d faces" % (source_surf.nvertices, source_surf.nfaces)) if distance_metric[0].lower() == 'e' and outside_node_margin: # euclidean distance: identity mapping # this is *slow* n = source_surf.nvertices xyz = source_surf.vertices src2intermediate = dict((i, tuple(xyz[i])) for i in xrange(n)) else: # find a mapping from nodes in source_surf to those in # intermediate surface src2intermediate = source_surf.map_to_high_resolution_surf(\ intermediate_surf) # if no sources are given, then visit all ndoes if source_surf_nodes is None: source_surf_nodes = np.arange(source_surf.nvertices) n = len(source_surf_nodes) if _debug(): debug('SVS', "Performing surface-based voxel selection" " for %d centers" % n) # visit in random order, for for better ETA estimate visitorder = list(np.random.permutation(len(source_surf_nodes))) # construct mapping from nodes to enclosing voxels n2v = vol_surf_mapping.get_node2voxels_mapping() if __debug__: debug('SVS', "Generated mapping from nodes" " to intersecting voxels") # build voxel selector voxel_selector = VoxelSelector(radius, intermediate_surf, n2v, distance_metric, outside_node_margin=outside_node_margin) if _debug(): debug('SVS', "Instantiated voxel selector (radius %r)" % radius) # structure to keep output data. Initialize with None, then # make a sparse_attributes instance when we know what the attributes are node2volume_attributes = None attribute_mapper = voxel_selector.disc_voxel_indices_and_attributes srcs_order = [source_surf_nodes[node] for node in visitorder] src_trg_nodes = [(src, src2intermediate[src]) for src in srcs_order] if nproc is not None and nproc > 1 and not externals.exists('pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i) or set to default None" % nproc) if nproc is None: if externals.exists('pprocess'): try: import pprocess nproc = pprocess.get_number_of_cores() or 1 if _debug(): debug("SVS", 'Using pprocess with %d cores' % nproc) except: if _debug(): debug("SVS", 'pprocess not available') if nproc is None: # importing pprocess failed - so use a single core nproc = 1 debug("SVS", 'Using %d cores - pprocess not available' % nproc) # get the the voxel selection parameters parameter_dict = vol_surf_mapping.get_parameter_dict() parameter_dict.update(dict(radius=radius, outside_node_margin=outside_node_margin, distance_metric=distance_metric), source_nvertices=source_surf.nvertices) init_output = lambda: volume_mask_dict.VolumeMaskDictionary( vol_surf_mapping.volgeom, intermediate_surf, meta=parameter_dict) if nproc > 1: if results_backend == 'hdf5': externals.exists('h5py', raise_=True) elif results_backend is None: if externals.exists( 'h5py') and externals.versions['hdf5'] >= '1.8.7': results_backend = 'hdf5' else: results_backend = 'native' if _debug(): debug('SVS', "Using '%s' backend" % (results_backend, )) if not results_backend in ('native', 'hdf5'): raise ValueError('Illegal results backend %r' % results_backend) import pprocess n_srcs = len(src_trg_nodes) blocks = np.array_split(np.arange(n_srcs), nproc) results = pprocess.Map(limit=nproc) reducer = results.manage(pprocess.MakeParallel(_reduce_mapper)) if __debug__: debug('SVS', "Starting %d child processes", (len(blocks), )) for i, block in enumerate(blocks): empty_dict = init_output() src_trg = [] for idx in block: src_trg.append(src_trg_nodes[idx]) if _debug(): debug('SVS', " starting block %d/%d: %d centers" % (i + 1, nproc, len(src_trg)), cr=True) reducer(empty_dict, attribute_mapper, src_trg, eta_step=eta_step, proc_id='%d' % (i + 1, ), results_backend=results_backend, tmp_prefix=tmp_prefix) if _debug(): debug('SVS', '') debug('SVS', 'Started all %d child processes' % (len(blocks))) tstart = time.time() node2volume_attributes = None for i, result in enumerate(results): if result is None: continue if results_backend == 'hdf5': result_fn = result result = h5load(result_fn) os.remove(result_fn) if node2volume_attributes is None: # first time we have actual results. # Use as a starting point node2volume_attributes = result if _debug(): debug('SVS', '') debug( 'SVS', "Merging results from %d child " "processes using '%s' backend" % (len(blocks), results_backend)) else: # merge new with current data node2volume_attributes.merge(result) if _debug(): debug('SVS', " merged result block %d/%d" % (i + 1, nproc), cr=True) if _debug(): telapsed = time.time() - tstart debug('SVS', "") debug( 'SVS', 'Merged results from %d child processed - ' 'took %s' % (len(blocks), seconds2prettystring(telapsed))) else: empty_dict = init_output() node2volume_attributes = _reduce_mapper(empty_dict, attribute_mapper, src_trg_nodes, eta_step=eta_step) debug('SVS', "") if _debug(): if node2volume_attributes is None: msgs = [ "Voxel selection completed: none of %d nodes have " "voxels associated" % len(visitorder) ] else: nvox_selected = np.sum(node2volume_attributes.get_mask() != 0) vg = vol_surf_mapping.volgeom msgs = [ "Voxel selection completed: %d / %d nodes have " "voxels associated" % (len(node2volume_attributes.keys()), len(visitorder)), "Selected %d / %d voxels (%.0f%%) in the mask at least once" % (nvox_selected, vg.nvoxels_mask, 100. * nvox_selected / vg.nvoxels_mask) ] for msg in msgs: debug("SVS", msg) if node2volume_attributes is None: warning('No voxels associated with any of %d nodes' % len(visitorder)) return node2volume_attributes
def voxel_selection(vol_surf_mapping, radius, source_surf=None, source_surf_nodes=None, distance_metric='dijkstra', eta_step=10, nproc=None, outside_node_margin=None, results_backend=None, tmp_prefix='tmpvoxsel'): """ Voxel selection for multiple center nodes on the surface Parameters ---------- vol_surf_mapping: volsurf.VolSurfMapping Contains gray and white matter surface, and volume geometry radius: int or float Size of searchlight. If an integer, then it indicates the number of voxels. If a float, then it indicates the radius of the disc source_surf: surf.Surface or None Surface used to compute distance between nodes. If omitted, it is the average of the gray and white surfaces. source_surf_nodes: list of int or numpy array or None Indices of nodes in source_surf that serve as searchlight center. By default every node serves as a searchlight center. distance_metric: str Distance metric between nodes. 'euclidean' or 'dijksta' (default) eta_step: int Report progress every eta_step (default: 10). nproc: int or None Number of parallel threads. None means as many threads as the system supports. The pprocess is required for parallel threads; if it cannot be used, then a single thread is used. outside_node_margin: float or True or None (default) By default nodes outside the volume are skipped; using this parameter allows for a marign. If this value is a float (possibly np.inf), then all nodes within outside_node_margin Dijkstra distance from any node within the volume are still assigned associated voxels. If outside_node_margin is True, then a node is always assigned voxels regardless of its position in the volume. results_backend : 'native' or 'hdf5' or None (default). Specifies the way results are provided back from a processing block in case of nproc > 1. 'native' is pickling/unpickling of results by pprocess, while 'hdf5' would use h5save/h5load functionality. 'hdf5' might be more time and memory efficient in some cases. If None, then 'hdf5' if used if available, else 'native'. tmp_prefix : str, optional If specified -- serves as a prefix for temporary files storage if results_backend == 'hdf5'. Thus can specify the directory to use (trailing file path separator is not added automagically). Returns ------- sel: volume_mask_dict.VolumeMaskDictionary Voxel selection results, that associates, which each node, the indices of the surrounding voxels. """ # construct the intermediate surface, which is used # to measure distances intermediate_surf = (vol_surf_mapping.pial_surface * .5) + \ (vol_surf_mapping.white_surface * .5) if source_surf is None: source_surf = intermediate_surf else: source_surf = surf.from_any(source_surf) if _debug(): debug('SVS', "Generated high-res intermediate surface: " "%d nodes, %d faces" % (intermediate_surf.nvertices, intermediate_surf.nfaces)) debug('SVS', "Mapping source to high-res surface:" " %d nodes, %d faces" % (source_surf.nvertices, source_surf.nfaces)) if distance_metric[0].lower() == 'e' and outside_node_margin: # euclidean distance: identity mapping # this is *slow* n = source_surf.nvertices xyz = source_surf.vertices src2intermediate = dict((i, tuple(xyz[i])) for i in range(n)) else: # find a mapping from nodes in source_surf to those in # intermediate surface src2intermediate = source_surf.map_to_high_resolution_surf(\ intermediate_surf) # if no sources are given, then visit all ndoes if source_surf_nodes is None: source_surf_nodes = np.arange(source_surf.nvertices) n = len(source_surf_nodes) if _debug(): debug('SVS', "Performing surface-based voxel selection" " for %d centers" % n) # visit in random order, for for better ETA estimate visitorder = list(np.random.permutation(len(source_surf_nodes))) # construct mapping from nodes to enclosing voxels n2v = vol_surf_mapping.get_node2voxels_mapping() if __debug__: debug('SVS', "Generated mapping from nodes" " to intersecting voxels") # build voxel selector voxel_selector = VoxelSelector(radius, intermediate_surf, n2v, distance_metric, outside_node_margin=outside_node_margin) if _debug(): debug('SVS', "Instantiated voxel selector (radius %r)" % radius) # structure to keep output data. Initialize with None, then # make a sparse_attributes instance when we know what the attributes are node2volume_attributes = None attribute_mapper = voxel_selector.disc_voxel_indices_and_attributes srcs_order = [source_surf_nodes[node] for node in visitorder] src_trg_nodes = [(src, src2intermediate[src]) for src in srcs_order] if nproc is not None and nproc > 1 and not externals.exists('pprocess'): raise RuntimeError("The 'pprocess' module is required for " "multiprocess searchlights. Please either " "install python-pprocess, or reduce `nproc` " "to 1 (got nproc=%i) or set to default None" % nproc) if nproc is None: if externals.exists('pprocess'): try: import pprocess nproc = pprocess.get_number_of_cores() or 1 if _debug() : debug("SVS", 'Using pprocess with %d cores' % nproc) except: if _debug(): debug("SVS", 'pprocess not available') if nproc is None: # importing pprocess failed - so use a single core nproc = 1 debug("SVS", 'Using %d cores - pprocess not available' % nproc) # get the the voxel selection parameters parameter_dict = vol_surf_mapping.get_parameter_dict() parameter_dict.update(dict(radius=radius, outside_node_margin=outside_node_margin, distance_metric=distance_metric), source_nvertices=source_surf.nvertices) init_output = lambda: volume_mask_dict.VolumeMaskDictionary( vol_surf_mapping.volgeom, intermediate_surf, meta=parameter_dict) if nproc > 1: if results_backend == 'hdf5': externals.exists('h5py', raise_=True) elif results_backend is None: if externals.exists('h5py') and externals.versions['hdf5'] >= '1.8.7': results_backend = 'hdf5' else: results_backend = 'native' if _debug(): debug('SVS', "Using '%s' backend" % (results_backend,)) if not results_backend in ('native', 'hdf5'): raise ValueError('Illegal results backend %r' % results_backend) import pprocess n_srcs = len(src_trg_nodes) blocks = np.array_split(np.arange(n_srcs), nproc) results = pprocess.Map(limit=nproc) reducer = results.manage(pprocess.MakeParallel(_reduce_mapper)) if __debug__: debug('SVS', "Starting %d child processes", (len(blocks),)) for i, block in enumerate(blocks): empty_dict = init_output() src_trg = [] for idx in block: src_trg.append(src_trg_nodes[idx]) if _debug(): debug('SVS', " starting block %d/%d: %d centers" % (i + 1, nproc, len(src_trg)), cr=True) reducer(empty_dict, attribute_mapper, src_trg, eta_step=eta_step, proc_id='%d' % (i + 1,), results_backend=results_backend, tmp_prefix=tmp_prefix) if _debug(): debug('SVS', '') debug('SVS', 'Started all %d child processes' % (len(blocks))) tstart = time.time() node2volume_attributes = None for i, result in enumerate(results): if result is None: continue if results_backend == 'hdf5': result_fn = result result = h5load(result_fn) os.remove(result_fn) if node2volume_attributes is None: # first time we have actual results. # Use as a starting point node2volume_attributes = result if _debug(): debug('SVS', '') debug('SVS', "Merging results from %d child " "processes using '%s' backend" % (len(blocks), results_backend)) else: # merge new with current data node2volume_attributes.merge(result) if _debug(): debug('SVS', " merged result block %d/%d" % (i + 1, nproc), cr=True) if _debug(): telapsed = time.time() - tstart debug('SVS', "") debug('SVS', 'Merged results from %d child processed - ' 'took %s' % (len(blocks), seconds2prettystring(telapsed))) else: empty_dict = init_output() node2volume_attributes = _reduce_mapper(empty_dict, attribute_mapper, src_trg_nodes, eta_step=eta_step) debug('SVS', "") if _debug(): if node2volume_attributes is None: msgs = ["Voxel selection completed: none of %d nodes have " "voxels associated" % len(visitorder)] else: nvox_selected = np.sum(node2volume_attributes.get_mask() != 0) vg = vol_surf_mapping.volgeom msgs = ["Voxel selection completed: %d / %d nodes have " "voxels associated" % (len(node2volume_attributes.keys()), len(visitorder)), "Selected %d / %d voxels (%.0f%%) in the mask at least once" % (nvox_selected, vg.nvoxels_mask, 100. * nvox_selected / vg.nvoxels_mask)] for msg in msgs: debug("SVS", msg) if node2volume_attributes is None: warning('No voxels associated with any of %d nodes' % len(visitorder)) return node2volume_attributes
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) selected = [ _ for _ in range(ndatasets) if _ not in params.exclude_from_model ] ref_ds_train = selected.index(params.ref_ds) params.hyperalignment.params.ref_ds = ref_ds_train warning('Using %dth dataset as the reference dataset (%dth after ' 'excluding datasets)' % (params.ref_ds, ref_ds_train)) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning( '%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max( np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( ref_ds=params.ref_ds, featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines(datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)] if isinstance(qe, SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError( "Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True." ) self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[ params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets) ] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug( 'SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [ self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks ] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug( 'Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections ] return self.projections
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: qe_ids = self._queryengine.ids # known to qe if not set(qe_ids).issuperset(roi_ids): raise IndexError( "Some roi_ids are not known to the query engine %s: %s" % (self._queryengine, set(roi_ids).difference(qe_ids))) else: roi_ids = self._queryengine.ids # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. # # THe original code was: # mapper.append(StaticFeatureSelection(roi_ids, # dshape=dataset.shape[1:])) feat_sel_mapper = StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:]) if 'append' in dir(mapper): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self._queryengine.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if isinstance(self.__roi_ids, str): roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0] elif self.__roi_ids is not None: roi_ids = self.__roi_ids # safeguard against stupidity if __debug__: qe_ids = self._queryengine.ids # known to qe if not set(qe_ids).issuperset(roi_ids): raise IndexError( "Some roi_ids are not known to the query engine %s: %s" % (self._queryengine, set(roi_ids).difference(qe_ids))) else: roi_ids = self._queryengine.ids # pass to subclass results = self._sl_call(dataset, roi_ids, nproc) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__roi_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) # NNO if the orignal mapper has no append (because it's not a # chainmapper, for example), we make our own chainmapper. # # THe original code was: # mapper.append(StaticFeatureSelection(roi_ids, # dshape=dataset.shape[1:])) feat_sel_mapper = StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:]) if 'append' in dir(mapper): mapper.append(feat_sel_mapper) else: mapper = ChainMapper([dataset.a.mapper, feat_sel_mapper]) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # store the center ids as a feature attribute results.fa['center_ids'] = roi_ids # return raw results, base-class will take care of transformations return results
def __call__(self, datasets): """Estimate mappers for each dataset using searchlight-based hyperalignment. Parameters ---------- datasets : list or tuple of datasets Returns ------- A list of trained StaticProjectionMappers of the same length as datasets """ # Perform some checks first before modifying internal state params = self.params ndatasets = len(datasets) if len(datasets) <= 1: raise ValueError("SearchlightHyperalignment needs > 1 dataset to " "operate on. Got: %d" % self.ndatasets) if params.ref_ds in params.exclude_from_model: raise ValueError("Requested reference dataset %i is also " "in the exclude list." % params.ref_ds) if params.ref_ds >= ndatasets: raise ValueError("Requested reference dataset %i is out of " "bounds. We have only %i datasets provided" % (params.ref_ds, self.ndatasets)) # The rest of the checks are just warnings self.ndatasets = ndatasets _shpaldebug("SearchlightHyperalignment %s for %i datasets" % (self, self.ndatasets)) if params.ref_ds != params.hyperalignment.params.ref_ds: warning('Supplied ref_ds & hyperalignment instance ref_ds:%d differ.' % params.hyperalignment.params.ref_ds) warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds) params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds) if len(params.exclude_from_model) > 0: warning("These datasets will not participate in building common " "model: %s" % params.exclude_from_model) if __debug__: # verify that datasets were zscored prior the alignment since it is # assumed/required preprocessing step for ids, ds in enumerate(datasets): for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds', 1)): vals = f(ds, axis=0) vals_comp = np.abs(vals - tval) > 1e-5 if np.any(vals_comp): warning('%d %s are too different (max diff=%g) from %d in ' 'dataset %d to come from a zscored dataset. ' 'Please zscore datasets first for correct operation ' '(unless if was intentional)' % (np.sum(vals_comp), fname, np.max(np.abs(vals)), tval, ids)) # Setting up SearchlightHyperalignment # we need to know which original features where comprising the # individual SL ROIs _shpaldebug('Initializing FeatureSelectionHyperalignment.') hmeasure = FeatureSelectionHyperalignment( featsel=params.featsel, hyperalignment=params.hyperalignment, full_matrix=params.combine_neighbormappers, use_same_features=params.use_same_features, exclude_from_model=params.exclude_from_model, dtype=params.dtype) # Performing SL processing manually _shpaldebug("Setting up for searchlights") if params.nproc is None and externals.exists('pprocess'): import pprocess try: params.nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) params.nproc = 1 # XXX I think this class should already accept a single dataset only. # It should have a ``space`` setting that names a sample attribute that # can be used to identify individual/original datasets. # Taking a single dataset as argument would be cleaner, because the # algorithm relies on the assumption that there is a coarse feature # alignment, i.e. the SL ROIs cover roughly the same area queryengines = self._get_trained_queryengines( datasets, params.queryengine, params.radius, params.ref_ds) # For surface nodes to voxels queryengines, roi_seed hardly makes sense if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine): self.force_roi_seed = False if not self.params.combine_neighbormappers: raise NotImplementedError("Mapping from voxels to surface nodes is not " "implmented yet. Try setting combine_neighbormappers to True.") self.nfeatures = datasets[params.ref_ds].nfeatures _shpaldebug("Performing Hyperalignment in searchlights") # Setting up centers for running SL Hyperalignment if params.sparse_radius is None: roi_ids = self._get_verified_ids(queryengines) \ if params.mask_node_ids is None \ else params.mask_node_ids else: if params.queryengine is not None: raise NotImplementedError( "using sparse_radius whenever custom queryengine is " "provided is not yet supported.") _shpaldebug("Setting up sparse neighborhood") from mvpa2.misc.neighborhood import scatter_neighborhoods if params.mask_node_ids is None: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices, deterministic=True) roi_ids = sidx else: scoords, sidx = scatter_neighborhoods( Sphere(params.sparse_radius), datasets[params.ref_ds].fa.voxel_indices[params.mask_node_ids], deterministic=True) roi_ids = [params.mask_node_ids[sid] for sid in sidx] # Initialize projections _shpaldebug('Initializing projection matrices') self.projections = [ csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype) for isub in range(self.ndatasets)] # compute if params.nproc is not None and params.nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), params.nproc) params.nblocks = nproc_needed \ if params.nblocks is None else params.nblocks params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off %s child processes for nblocks=%i" % (nproc_needed, params.nblocks)) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) seed = mvpa2.get_random_seed() for iblock, block in enumerate(node_blocks): # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, datasets, copy.copy(hmeasure), queryengines, seed=seed, iblock=iblock) else: # otherwise collect the results in an 1-item list _shpaldebug('Using 1 process to compute mappers.') if params.nblocks is None: params.nblocks = 1 params.nblocks = min(len(roi_ids), params.nblocks) node_blocks = np.array_split(roi_ids, params.nblocks) p_results = [self._proc_block(block, datasets, hmeasure, queryengines) for block in node_blocks] results_ds = self.__handle_all_results(p_results) # Dummy iterator for, you know, iteration list(results_ds) _shpaldebug('Wrapping projection matrices into StaticProjectionMappers') self.projections = [ StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon else StaticProjectionMapper(proj=proj) for proj in self.projections] return self.projections