예제 #1
0
def cast_unsuitable_regions_by_label_MT(multi_level_mask, thresholds_list, parameter_dict):
    '''cast small region as noise and big region as vessel'''
    nodule_mask = np.zeros_like(multi_level_mask, np.int8)
    multi_image_labels = range(int(np.max(multi_level_mask)))
    multi_image_labels.reverse()
    loop_times = len(multi_image_labels)
    # TODO map style parallezision
    shared_array_s = mp.Array(ctypes.c_int8, loop_times * np.size(nodule_mask))
    shared_array = np.frombuffer(shared_array_s.get_obj(), dtype=np.int8).reshape((loop_times,) + nodule_mask.shape)

    num_of_proc = pprocess.get_number_of_cores()
    results = pprocess.Map(limit=num_of_proc / 2)
    para_func = results.manage(pprocess.MakeParallel(put_result_into_shared_memory))

    for i in range(loop_times):
        one_label = multi_image_labels[i]
        para_func(shared_array, multi_level_mask, thresholds_list, parameter_dict, one_label)
    results.finish()

    for num_of_loop in range(shared_array.shape[0]):
        nodule_mask = np.logical_or(nodule_mask, shared_array[num_of_loop, ...])

    datastate = shared_array_s.get_obj()._wrapper._state
    arenaobj = datastate[0][0]
    arenaobj.buffer.close()
    mp.heap.BufferWrapper._heap = mp.heap.Heap()

    return nodule_mask
예제 #2
0
def filter_regions(multi_level_mask, thresholds_list, parameter_dict, one_label):
    one_threshold = thresholds_list[one_label]
    mask = (multi_level_mask > one_label)
    min_size = parameter_dict['small_vol_threshold']
    label_image, bounding_box_slices = lyBWareaopen(mask, min_size)

    region_result = [False]
    '''single thread'''
    # for label_number in range(1, np.max(label_image) + 1):
    #     single_result = region_task(label_number, label_image, bounding_box_slices, one_threshold, parameter_dict)
    #     region_result.append(single_result)

    '''multi thread'''
    results = pprocess.Map(limit=pprocess.get_number_of_cores())
    calc = results.manage(pprocess.MakeParallel(region_task))

    for label_number in range(1, np.max(label_image) + 1):
        calc(label_number, label_image, bounding_box_slices, one_threshold, parameter_dict)

    for i, result in enumerate(results):
        region_result.append(result)

    region_result = np.array(region_result, np.bool)
    tuild_result = np.logical_not(region_result)
    label_image[tuild_result[label_image]] = 0

    return (label_image > 0)
예제 #3
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results, roi_sizes = self._sl_call(dataset, roi_ids, nproc)

        if not roi_sizes is None:
            self.ca.roi_sizes = roi_sizes

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(StaticFeatureSelection(roi_ids,
                                                     dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
예제 #4
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(
                    StaticFeatureSelection(roi_ids, dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
예제 #5
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
                             set(roi_ids).difference(qe_ids)))
        else:
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)
        # charge state
        self.ca.raw_results = results
        # return raw results, base-class will take care of transformations
        return results
예제 #6
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.__nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self.__qe.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if self.__center_ids is not None:
            roi_ids = self.__center_ids
            # safeguard against stupidity
            if __debug__:
                if max(roi_ids) >= dataset.nfeatures:
                    raise IndexError, \
                          "Maximal center_id found is %s whenever given " \
                          "dataset has only %d features" \
                          % (max(roi_ids), dataset.nfeatures)
        else:
            roi_ids = np.arange(dataset.nfeatures)

        # compute
        if nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            roi_blocks = np.array_split(roi_ids, nproc)

            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc)
            compute = p_results.manage(
                        pprocess.MakeParallel(self._proc_block))
            for block in roi_blocks:
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, dataset, copy.copy(self.__datameasure))

            # collect results
            results = []
            if self.ca.is_enabled('roisizes'):
                roisizes = []
            else:
                roisizes = None

            for r, rsizes in p_results:
                results += r
                if not roisizes is None:
                    roisizes += rsizes
        else:
            # otherwise collect the results in a list
            results, roisizes = \
                    self._proc_block(roi_ids, dataset, self.__datameasure)

        if not roisizes is None:
            self.ca.roisizes = roisizes

        if __debug__:
            debug('SLC', '')

        # but be careful: this call also serves as conversion from parallel maps
        # to regular lists!
        # this uses the Dataset-hstack
        results = hstack(results)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__center_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)
                mapper.append(FeatureSliceMapper(self.__center_ids,
                                                 dshape=dataset.shape[1:]))
                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # return raw results, base-class will take care of transformations
        return results
예제 #7
0
def voxel_selection(vol_surf_mapping,
                    radius,
                    source_surf=None,
                    source_surf_nodes=None,
                    distance_metric='dijkstra',
                    eta_step=10,
                    nproc=None,
                    outside_node_margin=None,
                    results_backend=None,
                    tmp_prefix='tmpvoxsel'):
    """
    Voxel selection for multiple center nodes on the surface

    Parameters
    ----------
    vol_surf_mapping: volsurf.VolSurfMapping
        Contains gray and white matter surface, and volume geometry
    radius: int or float
        Size of searchlight. If an integer, then it indicates the number of
        voxels. If a float, then it indicates the radius of the disc
    source_surf: surf.Surface or None
        Surface used to compute distance between nodes. If omitted, it is
        the average of the gray and white surfaces.
    source_surf_nodes: list of int or numpy array or None
        Indices of nodes in source_surf that serve as searchlight center.
        By default every node serves as a searchlight center.
    distance_metric: str
        Distance metric between nodes. 'euclidean' or 'dijksta' (default)
    eta_step: int
        Report progress every eta_step (default: 10).
    nproc: int or None
        Number of parallel threads. None means as many threads as the
        system supports. The pprocess is required for parallel threads; if
        it cannot be used, then a single thread is used.
    outside_node_margin: float or True or None (default)
        By default nodes outside the volume are skipped; using this
        parameter allows for a marign. If this value is a float (possibly
        np.inf), then all nodes within outside_node_margin Dijkstra
        distance from any node within the volume are still assigned
        associated voxels. If outside_node_margin is True, then a node is
        always assigned voxels regardless of its position in the volume.
    results_backend : 'native' or 'hdf5' or None (default).
        Specifies the way results are provided back from a processing block
        in case of nproc > 1. 'native' is pickling/unpickling of results by
        pprocess, while 'hdf5' would use h5save/h5load functionality.
        'hdf5' might be more time and memory efficient in some cases.
        If None, then 'hdf5' if used if available, else 'native'.
    tmp_prefix : str, optional
        If specified -- serves as a prefix for temporary files storage
        if results_backend == 'hdf5'.  Thus can specify the directory to use
        (trailing file path separator is not added automagically).

    Returns
    -------
    sel: volume_mask_dict.VolumeMaskDictionary
        Voxel selection results, that associates, which each node, the indices
        of the surrounding voxels.
    """

    # construct the intermediate surface, which is used
    # to measure distances
    intermediate_surf = (vol_surf_mapping.pial_surface * .5) + \
                        (vol_surf_mapping.white_surface * .5)

    if source_surf is None:
        source_surf = intermediate_surf
    else:
        source_surf = surf.from_any(source_surf)

    if _debug():
        debug(
            'SVS', "Generated high-res intermediate surface: "
            "%d nodes, %d faces" %
            (intermediate_surf.nvertices, intermediate_surf.nfaces))
        debug(
            'SVS', "Mapping source to high-res surface:"
            " %d nodes, %d faces" %
            (source_surf.nvertices, source_surf.nfaces))

    if distance_metric[0].lower() == 'e' and outside_node_margin:
        # euclidean distance: identity mapping
        # this is *slow*
        n = source_surf.nvertices
        xyz = source_surf.vertices
        src2intermediate = dict((i, tuple(xyz[i])) for i in xrange(n))
    else:
        # find a mapping from nodes in source_surf to those in
        # intermediate surface
        src2intermediate = source_surf.map_to_high_resolution_surf(\
                                                        intermediate_surf)

    # if no sources are given, then visit all ndoes
    if source_surf_nodes is None:
        source_surf_nodes = np.arange(source_surf.nvertices)

    n = len(source_surf_nodes)

    if _debug():
        debug('SVS', "Performing surface-based voxel selection"
              " for %d centers" % n)

    # visit in random order, for for better ETA estimate
    visitorder = list(np.random.permutation(len(source_surf_nodes)))

    # construct mapping from nodes to enclosing voxels
    n2v = vol_surf_mapping.get_node2voxels_mapping()

    if __debug__:
        debug('SVS', "Generated mapping from nodes" " to intersecting voxels")

    # build voxel selector
    voxel_selector = VoxelSelector(radius,
                                   intermediate_surf,
                                   n2v,
                                   distance_metric,
                                   outside_node_margin=outside_node_margin)

    if _debug():
        debug('SVS', "Instantiated voxel selector (radius %r)" % radius)

    # structure to keep output data. Initialize with None, then
    # make a sparse_attributes instance when we know what the attributes are
    node2volume_attributes = None

    attribute_mapper = voxel_selector.disc_voxel_indices_and_attributes

    srcs_order = [source_surf_nodes[node] for node in visitorder]
    src_trg_nodes = [(src, src2intermediate[src]) for src in srcs_order]

    if nproc is not None and nproc > 1 and not externals.exists('pprocess'):
        raise RuntimeError("The 'pprocess' module is required for "
                           "multiprocess searchlights. Please either "
                           "install python-pprocess, or reduce `nproc` "
                           "to 1 (got nproc=%i) or set to default None" %
                           nproc)

    if nproc is None:
        if externals.exists('pprocess'):
            try:
                import pprocess
                nproc = pprocess.get_number_of_cores() or 1
                if _debug():
                    debug("SVS", 'Using pprocess with %d cores' % nproc)
            except:
                if _debug():
                    debug("SVS", 'pprocess not available')

        if nproc is None:
            # importing pprocess failed - so use a single core
            nproc = 1
            debug("SVS", 'Using %d cores - pprocess not available' % nproc)

    # get the the voxel selection parameters
    parameter_dict = vol_surf_mapping.get_parameter_dict()
    parameter_dict.update(dict(radius=radius,
                               outside_node_margin=outside_node_margin,
                               distance_metric=distance_metric),
                          source_nvertices=source_surf.nvertices)

    init_output = lambda: volume_mask_dict.VolumeMaskDictionary(
        vol_surf_mapping.volgeom, intermediate_surf, meta=parameter_dict)

    if nproc > 1:
        if results_backend == 'hdf5':
            externals.exists('h5py', raise_=True)
        elif results_backend is None:
            if externals.exists(
                    'h5py') and externals.versions['hdf5'] >= '1.8.7':
                results_backend = 'hdf5'
            else:
                results_backend = 'native'
        if _debug():
            debug('SVS', "Using '%s' backend" % (results_backend, ))

        if not results_backend in ('native', 'hdf5'):
            raise ValueError('Illegal results backend %r' % results_backend)

        import pprocess
        n_srcs = len(src_trg_nodes)
        blocks = np.array_split(np.arange(n_srcs), nproc)

        results = pprocess.Map(limit=nproc)
        reducer = results.manage(pprocess.MakeParallel(_reduce_mapper))

        if __debug__:
            debug('SVS', "Starting %d child processes", (len(blocks), ))

        for i, block in enumerate(blocks):
            empty_dict = init_output()

            src_trg = []
            for idx in block:
                src_trg.append(src_trg_nodes[idx])

            if _debug():
                debug('SVS',
                      "  starting block %d/%d: %d centers" %
                      (i + 1, nproc, len(src_trg)),
                      cr=True)

            reducer(empty_dict,
                    attribute_mapper,
                    src_trg,
                    eta_step=eta_step,
                    proc_id='%d' % (i + 1, ),
                    results_backend=results_backend,
                    tmp_prefix=tmp_prefix)
        if _debug():
            debug('SVS', '')
            debug('SVS', 'Started all %d child processes' % (len(blocks)))
            tstart = time.time()

        node2volume_attributes = None
        for i, result in enumerate(results):
            if result is None:
                continue

            if results_backend == 'hdf5':
                result_fn = result
                result = h5load(result_fn)
                os.remove(result_fn)

            if node2volume_attributes is None:
                # first time we have actual results.
                # Use as a starting point
                node2volume_attributes = result
                if _debug():
                    debug('SVS', '')
                    debug(
                        'SVS', "Merging results from %d child "
                        "processes using '%s' backend" %
                        (len(blocks), results_backend))
            else:
                # merge new with current data
                node2volume_attributes.merge(result)
            if _debug():
                debug('SVS',
                      "  merged result block %d/%d" % (i + 1, nproc),
                      cr=True)

        if _debug():
            telapsed = time.time() - tstart
            debug('SVS', "")
            debug(
                'SVS', 'Merged results from %d child processed - '
                'took %s' % (len(blocks), seconds2prettystring(telapsed)))

    else:
        empty_dict = init_output()
        node2volume_attributes = _reduce_mapper(empty_dict,
                                                attribute_mapper,
                                                src_trg_nodes,
                                                eta_step=eta_step)
        debug('SVS', "")

    if _debug():
        if node2volume_attributes is None:
            msgs = [
                "Voxel selection completed: none of %d nodes have "
                "voxels associated" % len(visitorder)
            ]
        else:
            nvox_selected = np.sum(node2volume_attributes.get_mask() != 0)
            vg = vol_surf_mapping.volgeom

            msgs = [
                "Voxel selection completed: %d / %d nodes have "
                "voxels associated" %
                (len(node2volume_attributes.keys()), len(visitorder)),
                "Selected %d / %d  voxels (%.0f%%) in the mask at least once" %
                (nvox_selected, vg.nvoxels_mask,
                 100. * nvox_selected / vg.nvoxels_mask)
            ]

        for msg in msgs:
            debug("SVS", msg)

    if node2volume_attributes is None:
        warning('No voxels associated with any of %d nodes' % len(visitorder))
    return node2volume_attributes
예제 #8
0
def voxel_selection(vol_surf_mapping, radius, source_surf=None, source_surf_nodes=None,
                    distance_metric='dijkstra',
                    eta_step=10, nproc=None,
                    outside_node_margin=None,
                    results_backend=None, tmp_prefix='tmpvoxsel'):

    """
    Voxel selection for multiple center nodes on the surface

    Parameters
    ----------
    vol_surf_mapping: volsurf.VolSurfMapping
        Contains gray and white matter surface, and volume geometry
    radius: int or float
        Size of searchlight. If an integer, then it indicates the number of
        voxels. If a float, then it indicates the radius of the disc
    source_surf: surf.Surface or None
        Surface used to compute distance between nodes. If omitted, it is
        the average of the gray and white surfaces.
    source_surf_nodes: list of int or numpy array or None
        Indices of nodes in source_surf that serve as searchlight center.
        By default every node serves as a searchlight center.
    distance_metric: str
        Distance metric between nodes. 'euclidean' or 'dijksta' (default)
    eta_step: int
        Report progress every eta_step (default: 10).
    nproc: int or None
        Number of parallel threads. None means as many threads as the
        system supports. The pprocess is required for parallel threads; if
        it cannot be used, then a single thread is used.
    outside_node_margin: float or True or None (default)
        By default nodes outside the volume are skipped; using this
        parameter allows for a marign. If this value is a float (possibly
        np.inf), then all nodes within outside_node_margin Dijkstra
        distance from any node within the volume are still assigned
        associated voxels. If outside_node_margin is True, then a node is
        always assigned voxels regardless of its position in the volume.
    results_backend : 'native' or 'hdf5' or None (default).
        Specifies the way results are provided back from a processing block
        in case of nproc > 1. 'native' is pickling/unpickling of results by
        pprocess, while 'hdf5' would use h5save/h5load functionality.
        'hdf5' might be more time and memory efficient in some cases.
        If None, then 'hdf5' if used if available, else 'native'.
    tmp_prefix : str, optional
        If specified -- serves as a prefix for temporary files storage
        if results_backend == 'hdf5'.  Thus can specify the directory to use
        (trailing file path separator is not added automagically).

    Returns
    -------
    sel: volume_mask_dict.VolumeMaskDictionary
        Voxel selection results, that associates, which each node, the indices
        of the surrounding voxels.
    """

    # construct the intermediate surface, which is used
    # to measure distances
    intermediate_surf = (vol_surf_mapping.pial_surface * .5) + \
                        (vol_surf_mapping.white_surface * .5)

    if source_surf is None:
        source_surf = intermediate_surf
    else:
        source_surf = surf.from_any(source_surf)

    if _debug():
        debug('SVS', "Generated high-res intermediate surface: "
              "%d nodes, %d faces" %
              (intermediate_surf.nvertices, intermediate_surf.nfaces))
        debug('SVS', "Mapping source to high-res surface:"
              " %d nodes, %d faces" %
              (source_surf.nvertices, source_surf.nfaces))


    if distance_metric[0].lower() == 'e' and outside_node_margin:
        # euclidean distance: identity mapping
        # this is *slow*
        n = source_surf.nvertices
        xyz = source_surf.vertices
        src2intermediate = dict((i, tuple(xyz[i])) for i in range(n))
    else:
        # find a mapping from nodes in source_surf to those in
        # intermediate surface
        src2intermediate = source_surf.map_to_high_resolution_surf(\
                                                        intermediate_surf)

    # if no sources are given, then visit all ndoes
    if source_surf_nodes is None:
        source_surf_nodes = np.arange(source_surf.nvertices)

    n = len(source_surf_nodes)

    if _debug():
        debug('SVS',
              "Performing surface-based voxel selection"
              " for %d centers" % n)

    # visit in random order, for for better ETA estimate
    visitorder = list(np.random.permutation(len(source_surf_nodes)))

    # construct mapping from nodes to enclosing voxels
    n2v = vol_surf_mapping.get_node2voxels_mapping()

    if __debug__:
        debug('SVS', "Generated mapping from nodes"
              " to intersecting voxels")

    # build voxel selector
    voxel_selector = VoxelSelector(radius, intermediate_surf, n2v,
                                   distance_metric,
                                   outside_node_margin=outside_node_margin)

    if _debug():
        debug('SVS', "Instantiated voxel selector (radius %r)" % radius)


    # structure to keep output data. Initialize with None, then
    # make a sparse_attributes instance when we know what the attributes are
    node2volume_attributes = None

    attribute_mapper = voxel_selector.disc_voxel_indices_and_attributes

    srcs_order = [source_surf_nodes[node] for node in visitorder]
    src_trg_nodes = [(src, src2intermediate[src]) for src in srcs_order]

    if nproc is not None and nproc > 1 and not externals.exists('pprocess'):
        raise RuntimeError("The 'pprocess' module is required for "
                           "multiprocess searchlights. Please either "
                           "install python-pprocess, or reduce `nproc` "
                           "to 1 (got nproc=%i) or set to default None"
                           % nproc)

    if nproc is None:
        if externals.exists('pprocess'):
            try:
                import pprocess
                nproc = pprocess.get_number_of_cores() or 1
                if _debug() :
                    debug("SVS", 'Using pprocess with %d cores' % nproc)
            except:
                if _debug():
                    debug("SVS", 'pprocess not available')

        if nproc is None:
            # importing pprocess failed - so use a single core
            nproc = 1
            debug("SVS", 'Using %d cores - pprocess not available' % nproc)

    # get the the voxel selection parameters
    parameter_dict = vol_surf_mapping.get_parameter_dict()
    parameter_dict.update(dict(radius=radius,
                               outside_node_margin=outside_node_margin,
                               distance_metric=distance_metric),
                               source_nvertices=source_surf.nvertices)


    init_output = lambda: volume_mask_dict.VolumeMaskDictionary(
                                    vol_surf_mapping.volgeom,
                                    intermediate_surf,
                                    meta=parameter_dict)

    if nproc > 1:
        if results_backend == 'hdf5':
            externals.exists('h5py', raise_=True)
        elif results_backend is None:
            if externals.exists('h5py') and externals.versions['hdf5'] >= '1.8.7':
                results_backend = 'hdf5'
            else:
                results_backend = 'native'
        if _debug():
            debug('SVS', "Using '%s' backend" % (results_backend,))

        if not results_backend in ('native', 'hdf5'):
            raise ValueError('Illegal results backend %r' % results_backend)

        import pprocess
        n_srcs = len(src_trg_nodes)
        blocks = np.array_split(np.arange(n_srcs), nproc)

        results = pprocess.Map(limit=nproc)
        reducer = results.manage(pprocess.MakeParallel(_reduce_mapper))

        if __debug__:
            debug('SVS', "Starting %d child processes", (len(blocks),))

        for i, block in enumerate(blocks):
            empty_dict = init_output()

            src_trg = []
            for idx in block:
                src_trg.append(src_trg_nodes[idx])

            if _debug():
                debug('SVS', "  starting block %d/%d: %d centers" %
                            (i + 1, nproc, len(src_trg)), cr=True)

            reducer(empty_dict, attribute_mapper, src_trg,
                    eta_step=eta_step, proc_id='%d' % (i + 1,),
                    results_backend=results_backend, tmp_prefix=tmp_prefix)
        if _debug():
            debug('SVS', '')
            debug('SVS', 'Started all %d child processes' % (len(blocks)))
            tstart = time.time()

        node2volume_attributes = None
        for i, result in enumerate(results):
            if result is None:
                continue

            if results_backend == 'hdf5':
                result_fn = result
                result = h5load(result_fn)
                os.remove(result_fn)

            if node2volume_attributes is None:
                # first time we have actual results.
                # Use as a starting point
                node2volume_attributes = result
                if _debug():
                    debug('SVS', '')
                    debug('SVS', "Merging results from %d child "
                                 "processes using '%s' backend" %
                                 (len(blocks), results_backend))
            else:
                # merge new with current data
                node2volume_attributes.merge(result)
            if _debug():
                debug('SVS', "  merged result block %d/%d" % (i + 1, nproc),
                                cr=True)

        if _debug():
            telapsed = time.time() - tstart
            debug('SVS', "")
            debug('SVS', 'Merged results from %d child processed - '
                         'took %s' %
                         (len(blocks), seconds2prettystring(telapsed)))

    else:
        empty_dict = init_output()
        node2volume_attributes = _reduce_mapper(empty_dict,
                                                attribute_mapper,
                                                src_trg_nodes,
                                                eta_step=eta_step)
        debug('SVS', "")

    if _debug():
        if node2volume_attributes is None:
            msgs = ["Voxel selection completed: none of %d nodes have "
                    "voxels associated" % len(visitorder)]
        else:
            nvox_selected = np.sum(node2volume_attributes.get_mask() != 0)
            vg = vol_surf_mapping.volgeom

            msgs = ["Voxel selection completed: %d / %d nodes have "
                    "voxels associated" %
                    (len(node2volume_attributes.keys()), len(visitorder)),
                    "Selected %d / %d  voxels (%.0f%%) in the mask at least once" %
                    (nvox_selected, vg.nvoxels_mask,
                     100. * nvox_selected / vg.nvoxels_mask)]

        for msg in msgs:
            debug("SVS", msg)


    if node2volume_attributes is None:
        warning('No voxels associated with any of %d nodes' %
                        len(visitorder))
    return node2volume_attributes
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided" %
                             (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets" %
                    (self, self.ndatasets))

        selected = [
            _ for _ in range(ndatasets) if _ not in params.exclude_from_model
        ]
        ref_ds_train = selected.index(params.ref_ds)
        params.hyperalignment.params.ref_ds = ref_ds_train
        warning('Using %dth dataset as the reference dataset (%dth after '
                'excluding datasets)' % (params.ref_ds, ref_ds_train))
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0), (np.std, 'stds',
                                                               1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning(
                            '%d %s are too different (max diff=%g) from %d in '
                            'dataset %d to come from a zscored dataset. '
                            'Please zscore datasets first for correct operation '
                            '(unless if was intentional)' %
                            (np.sum(vals_comp), fname, np.max(
                                np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            ref_ds=params.ref_ds,
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1" %
                        externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(datasets,
                                                      params.queryengine,
                                                      params.radius,
                                                      params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        qe = queryengines[(0 if len(queryengines) == 1 else params.ref_ds)]
        if isinstance(qe, SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError(
                    "Mapping from voxels to surface nodes is not "
                    "implmented yet. Try setting combine_neighbormappers to True."
                )
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[
                        params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)
        ]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug(
                    'SLC', "Starting off %s child processes for nblocks=%i" %
                    (nproc_needed, params.nblocks))
            compute = p_results.manage(pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block,
                        datasets,
                        copy.copy(hmeasure),
                        queryengines,
                        seed=seed,
                        iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [
                self._proc_block(block, datasets, hmeasure, queryengines)
                for block in node_blocks
            ]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug(
            'Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T)
            if params.compute_recon else StaticProjectionMapper(proj=proj)
            for proj in self.projections
        ]
        return self.projections
예제 #10
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
                             set(roi_ids).difference(qe_ids)))
        else:
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                #
                # THe original code was:
                # mapper.append(StaticFeatureSelection(roi_ids,
                #                                     dshape=dataset.shape[1:]))
                feat_sel_mapper = StaticFeatureSelection(roi_ids,
                                                     dshape=dataset.shape[1:])
                if 'append' in dir(mapper):
                    mapper.append(feat_sel_mapper)
                else:
                    mapper = ChainMapper([dataset.a.mapper,
                                          feat_sel_mapper])

                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results
        # return raw results, base-class will take care of transformations
        return results
예제 #11
0
    def _call(self, dataset):
        """Perform the ROI search.
        """
        # local binding
        nproc = self.nproc

        if nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                nproc = 1
        # train the queryengine
        self._queryengine.train(dataset)

        # decide whether to run on all possible center coords or just a provided
        # subset
        if isinstance(self.__roi_ids, str):
            roi_ids = dataset.fa[self.__roi_ids].value.nonzero()[0]
        elif self.__roi_ids is not None:
            roi_ids = self.__roi_ids
            # safeguard against stupidity
            if __debug__:
                qe_ids = self._queryengine.ids # known to qe
                if not set(qe_ids).issuperset(roi_ids):
                    raise IndexError(
                          "Some roi_ids are not known to the query engine %s: %s"
                          % (self._queryengine,
                             set(roi_ids).difference(qe_ids)))
        else:
            roi_ids = self._queryengine.ids

        # pass to subclass
        results = self._sl_call(dataset, roi_ids, nproc)

        if 'mapper' in dataset.a:
            # since we know the space we can stick the original mapper into the
            # results as well
            if self.__roi_ids is None:
                results.a['mapper'] = copy.copy(dataset.a.mapper)
            else:
                # there is an additional selection step that needs to be
                # expressed by another mapper
                mapper = copy.copy(dataset.a.mapper)

                # NNO if the orignal mapper has no append (because it's not a
                # chainmapper, for example), we make our own chainmapper.
                #
                # THe original code was:
                # mapper.append(StaticFeatureSelection(roi_ids,
                #                                     dshape=dataset.shape[1:])) 
                feat_sel_mapper = StaticFeatureSelection(roi_ids,
                                                     dshape=dataset.shape[1:])
                if 'append' in dir(mapper):
                    mapper.append(feat_sel_mapper)
                else:
                    mapper = ChainMapper([dataset.a.mapper,
                                          feat_sel_mapper])

                results.a['mapper'] = mapper

        # charge state
        self.ca.raw_results = results

        # store the center ids as a feature attribute
        results.fa['center_ids'] = roi_ids


        # return raw results, base-class will take care of transformations
        return results
예제 #12
0
    def __call__(self, datasets):
        """Estimate mappers for each dataset using searchlight-based
        hyperalignment.

        Parameters
        ----------
          datasets : list or tuple of datasets

        Returns
        -------
        A list of trained StaticProjectionMappers of the same length as datasets
        """

        # Perform some checks first before modifying internal state
        params = self.params
        ndatasets = len(datasets)

        if len(datasets) <= 1:
            raise ValueError("SearchlightHyperalignment needs > 1 dataset to "
                             "operate on. Got: %d" % self.ndatasets)

        if params.ref_ds in params.exclude_from_model:
            raise ValueError("Requested reference dataset %i is also "
                             "in the exclude list." % params.ref_ds)

        if params.ref_ds >= ndatasets:
            raise ValueError("Requested reference dataset %i is out of "
                             "bounds. We have only %i datasets provided"
                             % (params.ref_ds, self.ndatasets))

        # The rest of the checks are just warnings
        self.ndatasets = ndatasets

        _shpaldebug("SearchlightHyperalignment %s for %i datasets"
                    % (self, self.ndatasets))

        if params.ref_ds != params.hyperalignment.params.ref_ds:
            warning('Supplied ref_ds & hyperalignment instance ref_ds:%d differ.'
                    % params.hyperalignment.params.ref_ds)
            warning('Using default hyperalignment instance with ref_ds: %d' % params.ref_ds)
            params.hyperalignment = Hyperalignment(ref_ds=params.ref_ds)
        if len(params.exclude_from_model) > 0:
            warning("These datasets will not participate in building common "
                    "model: %s" % params.exclude_from_model)

        if __debug__:
            # verify that datasets were zscored prior the alignment since it is
            # assumed/required preprocessing step
            for ids, ds in enumerate(datasets):
                for f, fname, tval in ((np.mean, 'means', 0),
                                       (np.std, 'stds', 1)):
                    vals = f(ds, axis=0)
                    vals_comp = np.abs(vals - tval) > 1e-5
                    if np.any(vals_comp):
                        warning('%d %s are too different (max diff=%g) from %d in '
                                'dataset %d to come from a zscored dataset. '
                                'Please zscore datasets first for correct operation '
                                '(unless if was intentional)'
                                % (np.sum(vals_comp), fname,
                                   np.max(np.abs(vals)), tval, ids))

        # Setting up SearchlightHyperalignment
        # we need to know which original features where comprising the
        # individual SL ROIs
        _shpaldebug('Initializing FeatureSelectionHyperalignment.')
        hmeasure = FeatureSelectionHyperalignment(
            featsel=params.featsel,
            hyperalignment=params.hyperalignment,
            full_matrix=params.combine_neighbormappers,
            use_same_features=params.use_same_features,
            exclude_from_model=params.exclude_from_model,
            dtype=params.dtype)

        # Performing SL processing manually
        _shpaldebug("Setting up for searchlights")
        if params.nproc is None and externals.exists('pprocess'):
            import pprocess
            try:
                params.nproc = pprocess.get_number_of_cores() or 1
            except AttributeError:
                warning("pprocess version %s has no API to figure out maximal "
                        "number of cores. Using 1"
                        % externals.versions['pprocess'])
                params.nproc = 1

        # XXX I think this class should already accept a single dataset only.
        # It should have a ``space`` setting that names a sample attribute that
        # can be used to identify individual/original datasets.
        # Taking a single dataset as argument would be cleaner, because the
        # algorithm relies on the assumption that there is a coarse feature
        # alignment, i.e. the SL ROIs cover roughly the same area
        queryengines = self._get_trained_queryengines(
            datasets, params.queryengine, params.radius, params.ref_ds)
        # For surface nodes to voxels queryengines, roi_seed hardly makes sense
        if isinstance(queryengines[params.ref_ds], SurfaceVerticesQueryEngine):
            self.force_roi_seed = False
            if not self.params.combine_neighbormappers:
                raise NotImplementedError("Mapping from voxels to surface nodes is not "
                        "implmented yet. Try setting combine_neighbormappers to True.")
        self.nfeatures = datasets[params.ref_ds].nfeatures
        _shpaldebug("Performing Hyperalignment in searchlights")
        # Setting up centers for running SL Hyperalignment
        if params.sparse_radius is None:
            roi_ids = self._get_verified_ids(queryengines) \
                if params.mask_node_ids is None \
                else params.mask_node_ids
        else:
            if params.queryengine is not None:
                raise NotImplementedError(
                    "using sparse_radius whenever custom queryengine is "
                    "provided is not yet supported.")
            _shpaldebug("Setting up sparse neighborhood")
            from mvpa2.misc.neighborhood import scatter_neighborhoods
            if params.mask_node_ids is None:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices,
                    deterministic=True)
                roi_ids = sidx
            else:
                scoords, sidx = scatter_neighborhoods(
                    Sphere(params.sparse_radius),
                    datasets[params.ref_ds].fa.voxel_indices[params.mask_node_ids],
                    deterministic=True)
                roi_ids = [params.mask_node_ids[sid] for sid in sidx]

        # Initialize projections
        _shpaldebug('Initializing projection matrices')
        self.projections = [
            csc_matrix((self.nfeatures, self.nfeatures), dtype=params.dtype)
            for isub in range(self.ndatasets)]

        # compute
        if params.nproc is not None and params.nproc > 1:
            # split all target ROIs centers into `nproc` equally sized blocks
            nproc_needed = min(len(roi_ids), params.nproc)
            params.nblocks = nproc_needed \
                if params.nblocks is None else params.nblocks
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            # the next block sets up the infrastructure for parallel computing
            # this can easily be changed into a ParallelPython loop, if we
            # decide to have a PP job server in PyMVPA
            import pprocess
            p_results = pprocess.Map(limit=nproc_needed)
            if __debug__:
                debug('SLC', "Starting off %s child processes for nblocks=%i"
                      % (nproc_needed, params.nblocks))
            compute = p_results.manage(
                        pprocess.MakeParallel(self._proc_block))
            seed = mvpa2.get_random_seed()
            for iblock, block in enumerate(node_blocks):
                # should we maybe deepcopy the measure to have a unique and
                # independent one per process?
                compute(block, datasets, copy.copy(hmeasure), queryengines,
                        seed=seed, iblock=iblock)
        else:
            # otherwise collect the results in an 1-item list
            _shpaldebug('Using 1 process to compute mappers.')
            if params.nblocks is None:
                params.nblocks = 1
            params.nblocks = min(len(roi_ids), params.nblocks)
            node_blocks = np.array_split(roi_ids, params.nblocks)
            p_results = [self._proc_block(block, datasets, hmeasure, queryengines)
                         for block in node_blocks]
        results_ds = self.__handle_all_results(p_results)
        # Dummy iterator for, you know, iteration
        list(results_ds)

        _shpaldebug('Wrapping projection matrices into StaticProjectionMappers')
        self.projections = [
            StaticProjectionMapper(proj=proj, recon=proj.T) if params.compute_recon
            else StaticProjectionMapper(proj=proj)
            for proj in self.projections]
        return self.projections