Example #1
0
File: base.py Project: esc/PyMVPA
    def _call(self, ds):
        # local binding
        generator = self._generator
        node = self._node
        ca = self.ca
        space = self.get_space()
        concat_as = self._concat_as

        if self.ca.is_enabled("stats") and (not node.ca.has_key("stats") or
                                            not node.ca.is_enabled("stats")):
            warning("'stats' conditional attribute was enabled, but "
                    "the assigned node '%s' either doesn't support it, "
                    "or it is disabled" % node)
        # precharge conditional attributes
        ca.datasets = []

        # run the node an all generated datasets
        results = []
        for i, sds in enumerate(generator.generate(ds)):
            if ca.is_enabled("datasets"):
                # store dataset in ca
                ca.datasets.append(sds)
            # run the beast
            result = node(sds)
            # callback
            if not self._callback is None:
                self._callback(data=sds, node=node, result=result)
            # subclass postprocessing
            result = self._repetition_postcall(sds, node, result)
            if space:
                # XXX maybe try to get something more informative from the
                # processing node (e.g. in 0.5 it used to be 'chunks'->'chunks'
                # to indicate what was trained and what was tested. Now it is
                # more tricky, because `node` could be anything
                result.set_attr(space, (i,))
            # store
            results.append(result)

            if ca.is_enabled("stats") and node.ca.has_key("stats") \
               and node.ca.is_enabled("stats"):
                if not ca.is_set('stats'):
                    # create empty stats container of matching type
                    ca.stats = node.ca['stats'].value.__class__()
                # harvest summary stats
                ca['stats'].value.__iadd__(node.ca['stats'].value)

        # charge condition attribute
        self.ca.repetition_results = results

        # stack all results into a single Dataset
        if concat_as == 'samples':
            results = vstack(results)
        elif concat_as == 'features':
            results = hstack(results)
        else:
            raise ValueError("Unkown concatenation mode '%s'" % concat_as)
        # no need to store the raw results, since the Measure class will
        # automatically store them in a CA
        return results
Example #2
0
    def _sl_call(self, dataset, roi_ids, nproc):
        """Classical generic searchlight implementation
        """
        # compute
        if nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), nproc)
            roi_blocks = np.array_split(roi_ids, nproc_needed)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug('SLC', "Starting off child processes for nproc=%i"
                      % nproc_needed)
            compute = p_results.manage(
                        pprocess.MakeParallel(self._proc_block))
            for block in roi_blocks:
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, dataset, copy.copy(self.__datameasure))

            # collect results
            results = []
            if self.ca.is_enabled('roi_sizes'):
                roi_sizes = []
            else:
                roi_sizes = None

            for r, rsizes in p_results:
                results += r
                if not roi_sizes is None:
                    roi_sizes += rsizes
        else:
            # otherwise collect the results in a list
            results, roi_sizes = \
                    self._proc_block(roi_ids, dataset, self.__datameasure)

        if __debug__ and 'SLC' in debug.active:
            debug('SLC', '')            # just newline
            resshape = len(results) and np.asanyarray(results[0]).shape or 'N/A'
            debug('SLC', ' hstacking %d results of shape %s'
                  % (len(results), resshape))

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        results = hstack(results)

        if __debug__:
            debug('SLC', " hstacked shape %s" % (results.shape,))

        return results, roi_sizes
Example #3
0
    def _sl_call(self, dataset, roi_ids, nproc):
        """Classical generic searchlight implementation
        """
        # compute
        if nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            roi_blocks = np.array_split(roi_ids, nproc)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess

            p_results = pprocess.Map(limit=nproc)
            if __debug__:
                debug("SLC", "Starting off child processes for nproc=%i" % nproc)
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            for block in roi_blocks:
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, dataset, copy.copy(self.__datameasure))

            # collect results
            results = []
            if self.ca.is_enabled("roi_sizes"):
                roi_sizes = []
            else:
                roi_sizes = None

            for r, rsizes in p_results:
                results += r
                if not roi_sizes is None:
                    roi_sizes += rsizes
        else:
            # otherwise collect the results in a list
            results, roi_sizes = self._proc_block(roi_ids, dataset, self.__datameasure)

        if __debug__:
            debug("SLC", "")

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        results = hstack(results)

        return results, roi_sizes
Example #4
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.__nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self.__qe.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if self.__center_ids is not None:
            roi_ids = self.__center_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # compute
        if nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            roi_blocks = np.array_split(roi_ids, nproc)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc)
            compute = p_results.manage(
                        pprocess.MakeParallel(self._proc_block))
            for block in roi_blocks:
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, dataset, copy.copy(self.__datameasure))

            # collect results
            results = []
            if self.ca.is_enabled('roisizes'):
                roisizes = []
            else:
                roisizes = None

            for r, rsizes in p_results:
                results += r
                if not roisizes is None:
                    roisizes += rsizes
        else:
            # otherwise collect the results in a list
            results, roisizes = \
                    self._proc_block(roi_ids, dataset, self.__datameasure)

        if not roisizes is None:
            self.ca.roisizes = roisizes

        if __debug__:
            debug('SLC', '')

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        results = hstack(results)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__center_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(FeatureSliceMapper(self.__center_ids,
                                                 dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results