Ejemplo n.º 1
0
    def _sl_call(self, dataset, roi_ids, nproc):
        """Classical generic searchlight implementation
        """
        assert (self.results_backend in ('native', 'hdf5'))
        # compute
        if nproc is not None and nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), nproc)
            nblocks = nproc_needed \
                      if self.nblocks is None else self.nblocks
            roi_blocks = np.array_split(roi_ids, nblocks)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug(
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            for iblock, block in enumerate(roi_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                seed = mvpa2.get_random_seed()
                compute(block,
                        dataset,
                        copy.copy(self.__datameasure),
                        seed=seed,
                        iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            p_results = [
                self._proc_block(roi_ids, dataset, self.__datameasure)
            ]

        # Finally collect and possibly process results
        # p_results here is either a generator from pprocess.Map or a list.
        # In case of a generator it allows to process results as they become
        # available
        result_ds = self.results_fx(
            sl=self,
            dataset=dataset,
            roi_ids=roi_ids,
            results=self.__handle_all_results(p_results))

        # Assure having a dataset (for paranoid ones)
        if not is_datasetlike(result_ds):
            try:
                result_a = np.atleast_1d(result_ds)
            except ValueError, e:
                if 'setting an array element with a sequence' in str(e):
                    # try forcing object array.  Happens with
                    # test_custom_results_fx_logic on numpy 1.4.1 on Debian
                    # squeeze
                    result_a = np.array(result_ds, dtype=object)
                else:
                    raise
            result_ds = Dataset(result_a)
Ejemplo n.º 2
0
    def _sl_call(self, dataset, roi_ids, nproc):
        """Classical generic searchlight implementation
        """
        assert(self.results_backend in ('native', 'hdf5'))
        # compute
        if nproc is not None and nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), nproc)
            nblocks = nproc_needed \
                      if self.nblocks is None else self.nblocks
            roi_blocks = np.array_split(roi_ids, nblocks)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug('SLC', "Starting off %s child processes for nblocks=%i"
                      % (nproc_needed, nblocks))
            compute = p_results.manage(
                        pprocess.MakeParallel(self._proc_block))
            for iblock, block in enumerate(roi_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                seed = mvpa2.get_random_seed()
                compute(block, dataset, copy.copy(self.__datameasure),
                        seed=seed, iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            p_results = [
                    self._proc_block(roi_ids, dataset, self.__datameasure)]

        # Finally collect and possibly process results
        # p_results here is either a generator from pprocess.Map or a list.
        # In case of a generator it allows to process results as they become
        # available
        result_ds = self.results_fx(sl=self,
                                    dataset=dataset,
                                    roi_ids=roi_ids,
                                    results=self.__handle_all_results(p_results))

        # Assure having a dataset (for paranoid ones)
        if not is_datasetlike(result_ds):
            try:
                result_a = np.atleast_1d(result_ds)
            except ValueError, e:
                if 'setting an array element with a sequence' in str(e):
                    # try forcing object array.  Happens with
                    # test_custom_results_fx_logic on numpy 1.4.1 on Debian
                    # squeeze
                    result_a = np.array(result_ds, dtype=object)
                else:
                    raise
            result_ds = Dataset(result_a)
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        selected = [
            _ for _ in range(ndatasets) if _ not in params.exclude_from_model
        ]
        ref_ds_train = selected.index(params.ref_ds)
        params.hyperalignment.params.ref_ds = ref_ds_train
        warning('Using %dth dataset as the reference dataset (%dth after '
                'excluding datasets)' % (params.ref_ds, ref_ds_train))
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                                                               1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning(
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            ref_ds=params.ref_ds,
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
                                                      params.queryengine,
                                                      params.radius,
                                                      params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)]
        if isinstance(qe, SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
                )
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[
                        params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)
        ]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug(
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block,
                        datasets,
                        copy.copy(hmeasure),
                        queryengines,
                        seed=seed,
                        iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
            ]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug(
            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        ]
        return self.projections
Ejemplo n.º 4
0
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided"
                             % (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets"
                    % (self, self.ndatasets))

        if params.ref_ds != params.hyperalignment.params.ref_ds:
            warning('Supplied ref_ds & hyperalignment instance ref_ds:%d differ.'
                    % params.hyperalignment.params.ref_ds)
            warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds)
            params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds)
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0),
                                       (np.std, 'stds', 1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning('%d %s are too different (max diff=%g) from %d in '
                                'dataset %d to come from a zscored dataset. '
                                'Please zscore datasets first for correct operation '
                                '(unless if was intentional)'
                                % (np.sum(vals_comp), fname,
                                   np.max(np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(
            datasets, params.queryengine, params.radius, params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError("Mapping from voxels to surface nodes is not "
                        "implmented yet. Try setting combine_neighbormappers to True.")
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug('SLC', "Starting off %s child processes for nblocks=%i"
                      % (nproc_needed, params.nblocks))
            compute = p_results.manage(
                        pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, datasets, copy.copy(hmeasure), queryengines,
                        seed=seed, iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [self._proc_block(block, datasets, hmeasure, queryengines)
                         for block in node_blocks]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug('Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon
            else StaticProjectionMapper(proj=proj)
            for proj in self.projections]
        return self.projections