def _call(self, ds): # local binding generator = self._generator node = self._node ca = self.ca space = self.get_space() concat_as = self._concat_as if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or not node.ca.is_enabled("stats")): warning("'stats' conditional attribute was enabled, but " "the assigned node '%s' either doesn't support it, " "or it is disabled" % node) # precharge conditional attributes ca.datasets = [] # run the node an all generated datasets results = [] for i, sds in enumerate(generator.generate(ds)): if ca.is_enabled("datasets"): # store dataset in ca ca.datasets.append(sds) # run the beast result = node(sds) # callback if not self._callback is None: self._callback(data=sds, node=node, result=result) # subclass postprocessing result = self._repetition_postcall(sds, node, result) if space: # XXX maybe try to get something more informative from the # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks' # to indicate what was trained and what was tested. Now it is # more tricky, because `node` could be anything result.set_attr(space, (i,)) # store results.append(result) if ca.is_enabled("stats") and node.ca.has_key("stats") \ and node.ca.is_enabled("stats"): if not ca.is_set('stats'): # create empty stats container of matching type ca.stats = node.ca['stats'].value.__class__() # harvest summary stats ca['stats'].value.__iadd__(node.ca['stats'].value) # charge condition attribute self.ca.repetition_results = results # stack all results into a single Dataset if concat_as == 'samples': results = vstack(results) elif concat_as == 'features': results = hstack(results) else: raise ValueError("Unkown concatenation mode '%s'" % concat_as) # no need to store the raw results, since the Measure class will # automatically store them in a CA return results
def _sl_call(self, dataset, roi_ids, nproc): """Classical generic searchlight implementation """ # compute if nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks nproc_needed = min(len(roi_ids), nproc) roi_blocks = np.array_split(roi_ids, nproc_needed) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc_needed) if __debug__: debug('SLC', "Starting off child processes for nproc=%i" % nproc_needed) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) for block in roi_blocks: # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, dataset, copy.copy(self.__datameasure)) # collect results results = [] if self.ca.is_enabled('roi_sizes'): roi_sizes = [] else: roi_sizes = None for r, rsizes in p_results: results += r if not roi_sizes is None: roi_sizes += rsizes else: # otherwise collect the results in a list results, roi_sizes = \ self._proc_block(roi_ids, dataset, self.__datameasure) if __debug__ and 'SLC' in debug.active: debug('SLC', '') # just newline resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A' debug('SLC', ' hstacking %d results of shape %s' % (len(results), resshape)) # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack results = hstack(results) if __debug__: debug('SLC', " hstacked shape %s" % (results.shape,)) return results, roi_sizes
def _sl_call(self, dataset, roi_ids, nproc): """Classical generic searchlight implementation """ # compute if nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks roi_blocks = np.array_split(roi_ids, nproc) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc) if __debug__: debug("SLC", "Starting off child processes for nproc=%i" % nproc) compute = p_results.manage(pprocess.MakeParallel(self._proc_block)) for block in roi_blocks: # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, dataset, copy.copy(self.__datameasure)) # collect results results = [] if self.ca.is_enabled("roi_sizes"): roi_sizes = [] else: roi_sizes = None for r, rsizes in p_results: results += r if not roi_sizes is None: roi_sizes += rsizes else: # otherwise collect the results in a list results, roi_sizes = self._proc_block(roi_ids, dataset, self.__datameasure) if __debug__: debug("SLC", "") # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack results = hstack(results) return results, roi_sizes
def _call(self, dataset): """Perform the ROI search. """ # local binding nproc = self.__nproc if nproc is None and externals.exists('pprocess'): import pprocess try: nproc = pprocess.get_number_of_cores() or 1 except AttributeError: warning("pprocess version %s has no API to figure out maximal " "number of cores. Using 1" % externals.versions['pprocess']) nproc = 1 # train the queryengine self.__qe.train(dataset) # decide whether to run on all possible center coords or just a provided # subset if self.__center_ids is not None: roi_ids = self.__center_ids # safeguard against stupidity if __debug__: if max(roi_ids) >= dataset.nfeatures: raise IndexError, \ "Maximal center_id found is %s whenever given " \ "dataset has only %d features" \ % (max(roi_ids), dataset.nfeatures) else: roi_ids = np.arange(dataset.nfeatures) # compute if nproc > 1: # split all target ROIs centers into `nproc` equally sized blocks roi_blocks = np.array_split(roi_ids, nproc) # the next block sets up the infrastructure for parallel computing # this can easily be changed into a ParallelPython loop, if we # decide to have a PP job server in PyMVPA import pprocess p_results = pprocess.Map(limit=nproc) compute = p_results.manage( pprocess.MakeParallel(self._proc_block)) for block in roi_blocks: # should we maybe deepcopy the measure to have a unique and # independent one per process? compute(block, dataset, copy.copy(self.__datameasure)) # collect results results = [] if self.ca.is_enabled('roisizes'): roisizes = [] else: roisizes = None for r, rsizes in p_results: results += r if not roisizes is None: roisizes += rsizes else: # otherwise collect the results in a list results, roisizes = \ self._proc_block(roi_ids, dataset, self.__datameasure) if not roisizes is None: self.ca.roisizes = roisizes if __debug__: debug('SLC', '') # but be careful: this call also serves as conversion from parallel maps # to regular lists! # this uses the Dataset-hstack results = hstack(results) if 'mapper' in dataset.a: # since we know the space we can stick the original mapper into the # results as well if self.__center_ids is None: results.a['mapper'] = copy.copy(dataset.a.mapper) else: # there is an additional selection step that needs to be # expressed by another mapper mapper = copy.copy(dataset.a.mapper) mapper.append(FeatureSliceMapper(self.__center_ids, dshape=dataset.shape[1:])) results.a['mapper'] = mapper # charge state self.ca.raw_results = results # return raw results, base-class will take care of transformations return results