Ejemplo n.º 1
def cast_unsuitable_regions_by_label_MT(multi_level_mask, thresholds_list, parameter_dict):
    '''cast small region as noise and big region as vessel'''
    nodule_mask = np.zeros_like(multi_level_mask, np.int8)
    multi_image_labels = range(int(np.max(multi_level_mask)))
    loop_times = len(multi_image_labels)
    # TODO map style parallezision
    shared_array_s = mp.Array(ctypes.c_int8, loop_times * np.size(nodule_mask))
    shared_array = np.frombuffer(shared_array_s.get_obj(), dtype=np.int8).reshape((loop_times,) + nodule_mask.shape)

    num_of_proc = pprocess.get_number_of_cores()
    results = pprocess.Map(limit=num_of_proc / 2)
    para_func = results.manage(pprocess.MakeParallel(put_result_into_shared_memory))

    for i in range(loop_times):
        one_label = multi_image_labels[i]
        para_func(shared_array, multi_level_mask, thresholds_list, parameter_dict, one_label)

    for num_of_loop in range(shared_array.shape[0]):
        nodule_mask = np.logical_or(nodule_mask, shared_array[num_of_loop, ...])

    datastate = shared_array_s.get_obj()._wrapper._state
    arenaobj = datastate[0][0]
    mp.heap.BufferWrapper._heap = mp.heap.Heap()

    return nodule_mask
Ejemplo n.º 2
def filter_regions(multi_level_mask, thresholds_list, parameter_dict, one_label):
    one_threshold = thresholds_list[one_label]
    mask = (multi_level_mask > one_label)
    min_size = parameter_dict['small_vol_threshold']
    label_image, bounding_box_slices = lyBWareaopen(mask, min_size)

    region_result = [False]
    '''single thread'''
    # for label_number in range(1, np.max(label_image) + 1):
    #     single_result = region_task(label_number, label_image, bounding_box_slices, one_threshold, parameter_dict)
    #     region_result.append(single_result)

    '''multi thread'''
    results = pprocess.Map(limit=pprocess.get_number_of_cores())
    calc = results.manage(pprocess.MakeParallel(region_task))

    for label_number in range(1, np.max(label_image) + 1):
        calc(label_number, label_image, bounding_box_slices, one_threshold, parameter_dict)

    for i, result in enumerate(results):

    region_result = np.array(region_result, np.bool)
    tuild_result = np.logical_not(region_result)
    label_image[tuild_result[label_image]] = 0

    return (label_image > 0)
Ejemplo n.º 3
    def _call(self, dataset):
        """Perform the ROI search.
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results, roi_sizes = self._sl_call(dataset, roi_ids, nproc)

        if not roi_sizes is None:
            self.ca.roi_sizes = roi_sizes

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
Ejemplo n.º 4
    def _call(self, dataset):
        """Perform the ROI search.
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                nproc = 1
        # train the queryengine

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                    StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
Ejemplo n.º 5
    def _call(self, dataset):
        """Perform the ROI search.
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)
        # charge state
        self.ca.raw_results = results
        # return raw results, base-class will take care of transformations
        return results
Ejemplo n.º 6
    def _call(self, dataset):
        """Perform the ROI search.
        # local binding
        nproc = self.__nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine

        # decide whether to run on all possible center coords or just a provided
        # subset
        if self.__center_ids is not None:
            roi_ids = self.__center_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
            roi_ids = np.arange(dataset.nfeatures)

        # compute
        if nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            roi_blocks = np.array_split(roi_ids, nproc)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc)
            compute = p_results.manage(
            for block in roi_blocks:
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, dataset, copy.copy(self.__datameasure))

            # collect results
            results = []
            if self.ca.is_enabled('roisizes'):
                roisizes = []
                roisizes = None

            for r, rsizes in p_results:
                results += r
                if not roisizes is None:
                    roisizes += rsizes
            # otherwise collect the results in a list
            results, roisizes = \
                    self._proc_block(roi_ids, dataset, self.__datameasure)

        if not roisizes is None:
            self.ca.roisizes = roisizes

        if __debug__:
            debug('SLC', '')

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        results = hstack(results)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__center_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
Ejemplo n.º 7
def voxel_selection(vol_surf_mapping,
    Voxel selection for multiple center nodes on the surface

    vol_surf_mapping: volsurf.VolSurfMapping
        Contains gray and white matter surface, and volume geometry
    radius: int or float
        Size of searchlight. If an integer, then it indicates the number of
        voxels. If a float, then it indicates the radius of the disc
    source_surf: surf.Surface or None
        Surface used to compute distance between nodes. If omitted, it is
        the average of the gray and white surfaces.
    source_surf_nodes: list of int or numpy array or None
        Indices of nodes in source_surf that serve as searchlight center.
        By default every node serves as a searchlight center.
    distance_metric: str
        Distance metric between nodes. 'euclidean' or 'dijksta' (default)
    eta_step: int
        Report progress every eta_step (default: 10).
    nproc: int or None
        Number of parallel threads. None means as many threads as the
        system supports. The pprocess is required for parallel threads; if
        it cannot be used, then a single thread is used.
    outside_node_margin: float or True or None (default)
        By default nodes outside the volume are skipped; using this
        parameter allows for a marign. If this value is a float (possibly
        np.inf), then all nodes within outside_node_margin Dijkstra
        distance from any node within the volume are still assigned
        associated voxels. If outside_node_margin is True, then a node is
        always assigned voxels regardless of its position in the volume.
    results_backend : 'native' or 'hdf5' or None (default).
        Specifies the way results are provided back from a processing block
        in case of nproc > 1. 'native' is pickling/unpickling of results by
        pprocess, while 'hdf5' would use h5save/h5load functionality.
        'hdf5' might be more time and memory efficient in some cases.
        If None, then 'hdf5' if used if available, else 'native'.
    tmp_prefix : str, optional
        If specified -- serves as a prefix for temporary files storage
        if results_backend == 'hdf5'.  Thus can specify the directory to use
        (trailing file path separator is not added automagically).

    sel: volume_mask_dict.VolumeMaskDictionary
        Voxel selection results, that associates, which each node, the indices
        of the surrounding voxels.

    # construct the intermediate surface, which is used
    # to measure distances
    intermediate_surf = (vol_surf_mapping.pial_surface * .5) + \
                        (vol_surf_mapping.white_surface * .5)

    if source_surf is None:
        source_surf = intermediate_surf
        source_surf = surf.from_any(source_surf)

    if _debug():
            'SVS', "Generated high-res intermediate surface: "
            "%d nodes, %d faces" %
            (intermediate_surf.nvertices, intermediate_surf.nfaces))
            'SVS', "Mapping source to high-res surface:"
            " %d nodes, %d faces" %
            (source_surf.nvertices, source_surf.nfaces))

    if distance_metric[0].lower() == 'e' and outside_node_margin:
        # euclidean distance: identity mapping
        # this is *slow*
        n = source_surf.nvertices
        xyz = source_surf.vertices
        src2intermediate = dict((i, tuple(xyz[i])) for i in xrange(n))
        # find a mapping from nodes in source_surf to those in
        # intermediate surface
        src2intermediate = source_surf.map_to_high_resolution_surf(\

    # if no sources are given, then visit all ndoes
    if source_surf_nodes is None:
        source_surf_nodes = np.arange(source_surf.nvertices)

    n = len(source_surf_nodes)

    if _debug():
        debug('SVS', "Performing surface-based voxel selection"
              " for %d centers" % n)

    # visit in random order, for for better ETA estimate
    visitorder = list(np.random.permutation(len(source_surf_nodes)))

    # construct mapping from nodes to enclosing voxels
    n2v = vol_surf_mapping.get_node2voxels_mapping()

    if __debug__:
        debug('SVS', "Generated mapping from nodes" " to intersecting voxels")

    # build voxel selector
    voxel_selector = VoxelSelector(radius,

    if _debug():
        debug('SVS', "Instantiated voxel selector (radius %r)" % radius)

    # structure to keep output data. Initialize with None, then
    # make a sparse_attributes instance when we know what the attributes are
    node2volume_attributes = None

    attribute_mapper = voxel_selector.disc_voxel_indices_and_attributes

    srcs_order = [source_surf_nodes[node] for node in visitorder]
    src_trg_nodes = [(src, src2intermediate[src]) for src in srcs_order]

    if nproc is not None and nproc > 1 and not externals.exists('pprocess'):
        raise RuntimeError("The 'pprocess' module is required for "
                           "multiprocess searchlights. Please either "
                           "install python-pprocess, or reduce `nproc` "
                           "to 1 (got nproc=%i) or set to default None" %

    if nproc is None:
        if externals.exists('pprocess'):
                import pprocess
                nproc = pprocess.get_number_of_cores() or 1
                if _debug():
                    debug("SVS", 'Using pprocess with %d cores' % nproc)
                if _debug():
                    debug("SVS", 'pprocess not available')

        if nproc is None:
            # importing pprocess failed - so use a single core
            nproc = 1
            debug("SVS", 'Using %d cores - pprocess not available' % nproc)

    # get the the voxel selection parameters
    parameter_dict = vol_surf_mapping.get_parameter_dict()

    init_output = lambda: volume_mask_dict.VolumeMaskDictionary(
        vol_surf_mapping.volgeom, intermediate_surf, meta=parameter_dict)

    if nproc > 1:
        if results_backend == 'hdf5':
            externals.exists('h5py', raise_=True)
        elif results_backend is None:
            if externals.exists(
                    'h5py') and externals.versions['hdf5'] >= '1.8.7':
                results_backend = 'hdf5'
                results_backend = 'native'
        if _debug():
            debug('SVS', "Using '%s' backend" % (results_backend, ))

        if not results_backend in ('native', 'hdf5'):
            raise ValueError('Illegal results backend %r' % results_backend)

        import pprocess
        n_srcs = len(src_trg_nodes)
        blocks = np.array_split(np.arange(n_srcs), nproc)

        results = pprocess.Map(limit=nproc)
        reducer = results.manage(pprocess.MakeParallel(_reduce_mapper))

        if __debug__:
            debug('SVS', "Starting %d child processes", (len(blocks), ))

        for i, block in enumerate(blocks):
            empty_dict = init_output()

            src_trg = []
            for idx in block:

            if _debug():
                      "  starting block %d/%d: %d centers" %
                      (i + 1, nproc, len(src_trg)),

                    proc_id='%d' % (i + 1, ),
        if _debug():
            debug('SVS', '')
            debug('SVS', 'Started all %d child processes' % (len(blocks)))
            tstart = time.time()

        node2volume_attributes = None
        for i, result in enumerate(results):
            if result is None:

            if results_backend == 'hdf5':
                result_fn = result
                result = h5load(result_fn)

            if node2volume_attributes is None:
                # first time we have actual results.
                # Use as a starting point
                node2volume_attributes = result
                if _debug():
                    debug('SVS', '')
                        'SVS', "Merging results from %d child "
                        "processes using '%s' backend" %
                        (len(blocks), results_backend))
                # merge new with current data
            if _debug():
                      "  merged result block %d/%d" % (i + 1, nproc),

        if _debug():
            telapsed = time.time() - tstart
            debug('SVS', "")
                'SVS', 'Merged results from %d child processed - '
                'took %s' % (len(blocks), seconds2prettystring(telapsed)))

        empty_dict = init_output()
        node2volume_attributes = _reduce_mapper(empty_dict,
        debug('SVS', "")

    if _debug():
        if node2volume_attributes is None:
            msgs = [
                "Voxel selection completed: none of %d nodes have "
                "voxels associated" % len(visitorder)
            nvox_selected = np.sum(node2volume_attributes.get_mask() != 0)
            vg = vol_surf_mapping.volgeom

            msgs = [
                "Voxel selection completed: %d / %d nodes have "
                "voxels associated" %
                (len(node2volume_attributes.keys()), len(visitorder)),
                "Selected %d / %d  voxels (%.0f%%) in the mask at least once" %
                (nvox_selected, vg.nvoxels_mask,
                 100. * nvox_selected / vg.nvoxels_mask)

        for msg in msgs:
            debug("SVS", msg)

    if node2volume_attributes is None:
        warning('No voxels associated with any of %d nodes' % len(visitorder))
    return node2volume_attributes
Ejemplo n.º 8
def voxel_selection(vol_surf_mapping, radius, source_surf=None, source_surf_nodes=None,
                    eta_step=10, nproc=None,
                    results_backend=None, tmp_prefix='tmpvoxsel'):

    Voxel selection for multiple center nodes on the surface

    vol_surf_mapping: volsurf.VolSurfMapping
        Contains gray and white matter surface, and volume geometry
    radius: int or float
        Size of searchlight. If an integer, then it indicates the number of
        voxels. If a float, then it indicates the radius of the disc
    source_surf: surf.Surface or None
        Surface used to compute distance between nodes. If omitted, it is
        the average of the gray and white surfaces.
    source_surf_nodes: list of int or numpy array or None
        Indices of nodes in source_surf that serve as searchlight center.
        By default every node serves as a searchlight center.
    distance_metric: str
        Distance metric between nodes. 'euclidean' or 'dijksta' (default)
    eta_step: int
        Report progress every eta_step (default: 10).
    nproc: int or None
        Number of parallel threads. None means as many threads as the
        system supports. The pprocess is required for parallel threads; if
        it cannot be used, then a single thread is used.
    outside_node_margin: float or True or None (default)
        By default nodes outside the volume are skipped; using this
        parameter allows for a marign. If this value is a float (possibly
        np.inf), then all nodes within outside_node_margin Dijkstra
        distance from any node within the volume are still assigned
        associated voxels. If outside_node_margin is True, then a node is
        always assigned voxels regardless of its position in the volume.
    results_backend : 'native' or 'hdf5' or None (default).
        Specifies the way results are provided back from a processing block
        in case of nproc > 1. 'native' is pickling/unpickling of results by
        pprocess, while 'hdf5' would use h5save/h5load functionality.
        'hdf5' might be more time and memory efficient in some cases.
        If None, then 'hdf5' if used if available, else 'native'.
    tmp_prefix : str, optional
        If specified -- serves as a prefix for temporary files storage
        if results_backend == 'hdf5'.  Thus can specify the directory to use
        (trailing file path separator is not added automagically).

    sel: volume_mask_dict.VolumeMaskDictionary
        Voxel selection results, that associates, which each node, the indices
        of the surrounding voxels.

    # construct the intermediate surface, which is used
    # to measure distances
    intermediate_surf = (vol_surf_mapping.pial_surface * .5) + \
                        (vol_surf_mapping.white_surface * .5)

    if source_surf is None:
        source_surf = intermediate_surf
        source_surf = surf.from_any(source_surf)

    if _debug():
        debug('SVS', "Generated high-res intermediate surface: "
              "%d nodes, %d faces" %
              (intermediate_surf.nvertices, intermediate_surf.nfaces))
        debug('SVS', "Mapping source to high-res surface:"
              " %d nodes, %d faces" %
              (source_surf.nvertices, source_surf.nfaces))

    if distance_metric[0].lower() == 'e' and outside_node_margin:
        # euclidean distance: identity mapping
        # this is *slow*
        n = source_surf.nvertices
        xyz = source_surf.vertices
        src2intermediate = dict((i, tuple(xyz[i])) for i in range(n))
        # find a mapping from nodes in source_surf to those in
        # intermediate surface
        src2intermediate = source_surf.map_to_high_resolution_surf(\

    # if no sources are given, then visit all ndoes
    if source_surf_nodes is None:
        source_surf_nodes = np.arange(source_surf.nvertices)

    n = len(source_surf_nodes)

    if _debug():
              "Performing surface-based voxel selection"
              " for %d centers" % n)

    # visit in random order, for for better ETA estimate
    visitorder = list(np.random.permutation(len(source_surf_nodes)))

    # construct mapping from nodes to enclosing voxels
    n2v = vol_surf_mapping.get_node2voxels_mapping()

    if __debug__:
        debug('SVS', "Generated mapping from nodes"
              " to intersecting voxels")

    # build voxel selector
    voxel_selector = VoxelSelector(radius, intermediate_surf, n2v,

    if _debug():
        debug('SVS', "Instantiated voxel selector (radius %r)" % radius)

    # structure to keep output data. Initialize with None, then
    # make a sparse_attributes instance when we know what the attributes are
    node2volume_attributes = None

    attribute_mapper = voxel_selector.disc_voxel_indices_and_attributes

    srcs_order = [source_surf_nodes[node] for node in visitorder]
    src_trg_nodes = [(src, src2intermediate[src]) for src in srcs_order]

    if nproc is not None and nproc > 1 and not externals.exists('pprocess'):
        raise RuntimeError("The 'pprocess' module is required for "
                           "multiprocess searchlights. Please either "
                           "install python-pprocess, or reduce `nproc` "
                           "to 1 (got nproc=%i) or set to default None"
                           % nproc)

    if nproc is None:
        if externals.exists('pprocess'):
                import pprocess
                nproc = pprocess.get_number_of_cores() or 1
                if _debug() :
                    debug("SVS", 'Using pprocess with %d cores' % nproc)
                if _debug():
                    debug("SVS", 'pprocess not available')

        if nproc is None:
            # importing pprocess failed - so use a single core
            nproc = 1
            debug("SVS", 'Using %d cores - pprocess not available' % nproc)

    # get the the voxel selection parameters
    parameter_dict = vol_surf_mapping.get_parameter_dict()

    init_output = lambda: volume_mask_dict.VolumeMaskDictionary(

    if nproc > 1:
        if results_backend == 'hdf5':
            externals.exists('h5py', raise_=True)
        elif results_backend is None:
            if externals.exists('h5py') and externals.versions['hdf5'] >= '1.8.7':
                results_backend = 'hdf5'
                results_backend = 'native'
        if _debug():
            debug('SVS', "Using '%s' backend" % (results_backend,))

        if not results_backend in ('native', 'hdf5'):
            raise ValueError('Illegal results backend %r' % results_backend)

        import pprocess
        n_srcs = len(src_trg_nodes)
        blocks = np.array_split(np.arange(n_srcs), nproc)

        results = pprocess.Map(limit=nproc)
        reducer = results.manage(pprocess.MakeParallel(_reduce_mapper))

        if __debug__:
            debug('SVS', "Starting %d child processes", (len(blocks),))

        for i, block in enumerate(blocks):
            empty_dict = init_output()

            src_trg = []
            for idx in block:

            if _debug():
                debug('SVS', "  starting block %d/%d: %d centers" %
                            (i + 1, nproc, len(src_trg)), cr=True)

            reducer(empty_dict, attribute_mapper, src_trg,
                    eta_step=eta_step, proc_id='%d' % (i + 1,),
                    results_backend=results_backend, tmp_prefix=tmp_prefix)
        if _debug():
            debug('SVS', '')
            debug('SVS', 'Started all %d child processes' % (len(blocks)))
            tstart = time.time()

        node2volume_attributes = None
        for i, result in enumerate(results):
            if result is None:

            if results_backend == 'hdf5':
                result_fn = result
                result = h5load(result_fn)

            if node2volume_attributes is None:
                # first time we have actual results.
                # Use as a starting point
                node2volume_attributes = result
                if _debug():
                    debug('SVS', '')
                    debug('SVS', "Merging results from %d child "
                                 "processes using '%s' backend" %
                                 (len(blocks), results_backend))
                # merge new with current data
            if _debug():
                debug('SVS', "  merged result block %d/%d" % (i + 1, nproc),

        if _debug():
            telapsed = time.time() - tstart
            debug('SVS', "")
            debug('SVS', 'Merged results from %d child processed - '
                         'took %s' %
                         (len(blocks), seconds2prettystring(telapsed)))

        empty_dict = init_output()
        node2volume_attributes = _reduce_mapper(empty_dict,
        debug('SVS', "")

    if _debug():
        if node2volume_attributes is None:
            msgs = ["Voxel selection completed: none of %d nodes have "
                    "voxels associated" % len(visitorder)]
            nvox_selected = np.sum(node2volume_attributes.get_mask() != 0)
            vg = vol_surf_mapping.volgeom

            msgs = ["Voxel selection completed: %d / %d nodes have "
                    "voxels associated" %
                    (len(node2volume_attributes.keys()), len(visitorder)),
                    "Selected %d / %d  voxels (%.0f%%) in the mask at least once" %
                    (nvox_selected, vg.nvoxels_mask,
                     100. * nvox_selected / vg.nvoxels_mask)]

        for msg in msgs:
            debug("SVS", msg)

    if node2volume_attributes is None:
        warning('No voxels associated with any of %d nodes' %
    return node2volume_attributes
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based

          datasets : list or tuple of datasets

        A list of trained StaticProjectionMappers of the same length as datasets

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        selected = [
            _ for _ in range(ndatasets) if _ not in params.exclude_from_model
        ref_ds_train = selected.index(params.ref_ds)
        params.hyperalignment.params.ref_ds = ref_ds_train
        warning('Using %dth dataset as the reference dataset (%dth after '
                'excluding datasets)' % (params.ref_ds, ref_ds_train))
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)]
        if isinstance(qe, SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                roi_ids = sidx
                scoords, sidx = scatter_neighborhoods(
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration

            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        return self.projections
Ejemplo n.º 10
    def _call(self, dataset):
        """Perform the ROI search.
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                # THe original code was:
                # mapper.append(StaticFeatureSelection(roi_ids,
                #                                     dshape=dataset.shape[1:]))
                feat_sel_mapper = StaticFeatureSelection(roi_ids,
                if 'append' in dir(mapper):
                    mapper = ChainMapper([dataset.a.mapper,

                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results
        # return raw results, base-class will take care of transformations
        return results
Ejemplo n.º 11
    def _call(self, dataset):
        """Perform the ROI search.
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                # THe original code was:
                # mapper.append(StaticFeatureSelection(roi_ids,
                #                                     dshape=dataset.shape[1:])) 
                feat_sel_mapper = StaticFeatureSelection(roi_ids,
                if 'append' in dir(mapper):
                    mapper = ChainMapper([dataset.a.mapper,

                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # store the center ids as a feature attribute
        results.fa['center_ids'] = roi_ids

        # return raw results, base-class will take care of transformations
        return results
Ejemplo n.º 12
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based

          datasets : list or tuple of datasets

        A list of trained StaticProjectionMappers of the same length as datasets

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided"
                             % (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets"
                    % (self, self.ndatasets))

        if params.ref_ds != params.hyperalignment.params.ref_ds:
            warning('Supplied ref_ds & hyperalignment instance ref_ds:%d differ.'
                    % params.hyperalignment.params.ref_ds)
            warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds)
            params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds)
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0),
                                       (np.std, 'stds', 1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning('%d %s are too different (max diff=%g) from %d in '
                                'dataset %d to come from a zscored dataset. '
                                'Please zscore datasets first for correct operation '
                                '(unless if was intentional)'
                                % (np.sum(vals_comp), fname,
                                   np.max(np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(
            datasets, params.queryengine, params.radius, params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError("Mapping from voxels to surface nodes is not "
                        "implmented yet. Try setting combine_neighbormappers to True.")
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                roi_ids = sidx
                scoords, sidx = scatter_neighborhoods(
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug('SLC', "Starting off %s child processes for nblocks=%i"
                      % (nproc_needed, params.nblocks))
            compute = p_results.manage(
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, datasets, copy.copy(hmeasure), queryengines,
                        seed=seed, iblock=iblock)
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [self._proc_block(block, datasets, hmeasure, queryengines)
                         for block in node_blocks]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration

        _shpaldebug('Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon
            else StaticProjectionMapper(proj=proj)
            for proj in self.projections]
        return self.projections