Ejemplo n.º 1
0
def extract_multiedge(subject_id_list,
                      input_dir,
                      base_feature_list=cfg.default_features_multi_edge,
                      weight_method_list=cfg.default_weight_method,
                      summary_stats=cfg.multi_edge_summary_func_default,
                      num_bins=cfg.default_num_bins,
                      edge_range_dict=cfg.edge_range_predefined,
                      atlas=cfg.default_atlas,
                      smoothing_param=cfg.default_smoothing_param,
                      node_size=cfg.default_node_size,
                      out_dir=None,
                      return_results=False,
                      overwrite_results=False,
                      num_procs=cfg.default_num_procs):
    """
    Extracts weighted networks (matrix of pair-wise ROI distances) based on multiple gray matter features based on Freesurfer processing.

    Parameters
    ----------
    subject_id_list : str or list
         must be path to a file containing subject IDs, or a list of subject IDs
    input_dir : str
        Path to the input directory where features can be read.
        For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored.
        Or another directory with a structure that graynet can parse.
    base_feature_list : list
        Set of features that drive the different edges between the pair of ROIs.

        For example, if you choose thickness and pial_curv, each pair of ROIs will have two edges.

        This multi-edge network can be turned into a single network based on averaging weights from different individual networks.

    weight_method : string(s), optional
        Type of distance (or metric) to compute between the pair of histograms.

        It must be one of the following methods:

        - 'chebyshev'
        - 'chebyshev_neg'
        - 'chi_square'
        - 'correlate'
        - 'correlate_1'
        - 'cosine'
        - 'cosine_1'
        - 'cosine_2'
        - 'cosine_alt'
        - 'euclidean'
        - 'fidelity_based'
        - 'histogram_intersection'
        - 'histogram_intersection_1'
        - 'jensen_shannon'
        - 'kullback_leibler'
        - 'manhattan'
        - 'minowski'
        - 'noelle_1'
        - 'noelle_2'
        - 'noelle_3'
        - 'noelle_4'
        - 'noelle_5'
        - 'relative_bin_deviation'
        - 'relative_deviation'

        Note only the following are *metrics*:

        - 'manhattan'
        - 'minowski'
        - 'euclidean'
        - 'noelle_2'
        - 'noelle_4'
        - 'noelle_5'

        The following are *semi- or quasi-metrics*:

        - 'kullback_leibler'
        - 'jensen_shannon'
        - 'chi_square'
        - 'chebyshev'
        - 'cosine_1'
        - 'chebyshev_neg'
        - 'correlate_1'
        - 'histogram_intersection_1'
        - 'relative_deviation'
        - 'relative_bin_deviation'
        - 'noelle_1'
        - 'noelle_3'

        The following are  classified to be similarity functions:

        - 'histogram_intersection'
        - 'correlate'
        - 'cosine'
        - 'cosine_2'
        - 'cosine_alt'
        - 'fidelity_based'

        *Default* choice: 'manhattan'.

    summary_stats : list of str
        A string, or list of strings, each representing a method (like 'median', 'prod' or 'max'),
        to compute a summay statistic from the array of multiple weights computed.

        This must be available as a member of numpy or scipy.stats.

    num_bins : int
        Number of histogram bins to use when computing pair-wise weights based on histogram distance. Default : 25

    edge_range_dict : tuple or list
        The range of edges (two finite values) within which to build the histogram e.g. ``--edge_range 0 5``.
        This can be helpful (and important) to ensure correspondence across multiple invocations of graynet (e.g. for different subjects), in terms of range across all bins as well as individual bin edges.

        Default :

            - ( 0.0, 5.0) for ``freesurfer_thickness`` and
            - (-0.3, 0.3) for ``freesurfer_curv``.

    atlas : str
        Name of the atlas whose parcellation to be used.
        Choices for cortical parcellation: ['fsaverage', 'glasser2016'], which are primary cortical.
        Volumetric whole-brain atlases will be added soon.

    smoothing_param : scalar
        Smoothing parameter, which could be fwhm for Freesurfer cortical features,
        or another relevant for the chosen base_feature_list.
        Default: assumed as fwhm=10mm for the default feature choice 'thickness'

    node_size : scalar, optional
        Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature.
        This feature is not implemented yet, just a placeholder and to enable default computation.

    out_dir : str, optional
        Path to output directory to store results.
        Default: None, results are returned, but not saved to disk.
        If this is None, return_results must be true.

    return_results : bool
        Flag to indicate whether to return the results to be returned.
        This flag helps to reduce the memory requirements, when the number of nodes in a parcellation or
        the number of subjects or weight methods are large, as it doesn't retain results for all combinations,
        when running from commmand line interface (or HPC). Default: False
        If this is False, out_dir must be specified to save the results to disk.

    overwrite_results : bool
        Flag to request overwriting of existing results, in case of reruns/failed jobs. By default, if the expected output file exists and is of non-zero size, its computation is skipped (assuming the file is complete, usable and not corrupted).

    num_procs : int
        Number of parallel processes to use to speed up computation.

    Returns
    -------
    edge_weights_all : dict, None
        If return_results is True, this will be a dictionary keyed in by a tuple: (weight method, subject_ID)
        The value of each edge_weights_all[(weight method, subject_ID)] is
        a numpy array of length p = k*(k-1)/2, with k = number of nodes in the atlas parcellation.
        If return_results is False, this will be None, which is the default.
    """

    # All the checks must happen here, as this is key function in the API
    check_params_multiedge(base_feature_list, input_dir, atlas,
                           smoothing_param, node_size, out_dir, return_results)
    atlas = check_atlas(atlas)

    subject_id_list, num_subjects, max_id_width, nd_id = check_subjects(
        subject_id_list)

    num_bins = check_num_bins(num_bins)
    edge_range_dict = check_edge_range_dict(edge_range_dict, base_feature_list)
    weight_method_list, num_weights, max_wtname_width, nd_wm = check_weights(
        weight_method_list)

    # validating the choice and getting a callable
    summary_stats, summary_stat_names, _, _, _ = check_stat_methods(
        summary_stats)

    num_procs = check_num_procs(num_procs)
    pretty_print_options = (max_id_width, nd_id, num_weights, max_wtname_width,
                            nd_wm)

    # roi_labels, ctx_annot = parcellate.freesurfer_roi_labels(atlas)
    # uniq_rois, roi_size, num_nodes = roi_info(roi_labels)
    uniq_rois, centroids, roi_labels = parcellate.roi_labels_centroids(atlas)

    print('\nProcessing {} features resampled to {} atlas,'
          ' smoothed at {} with node size {}'.format(base_feature_list, atlas,
                                                     smoothing_param,
                                                     node_size))

    if not return_results:
        if out_dir is None:
            raise ValueError(
                'When return_results=False, out_dir must be specified to be able to save the results.'
            )
        if not pexists(out_dir):
            os.mkdir(out_dir)

    partial_func_extract = partial(
        per_subject_multi_edge, input_dir, base_feature_list, roi_labels,
        centroids, weight_method_list, summary_stats, summary_stat_names,
        atlas, smoothing_param, node_size, num_bins, edge_range_dict, out_dir,
        return_results, overwrite_results, pretty_print_options)
    if num_procs > 1:
        chunk_size = int(np.ceil(num_subjects / num_procs))
        with Manager():
            with Pool(processes=num_procs) as pool:
                edge_weights_list_dicts = pool.map(partial_func_extract,
                                                   subject_id_list, chunk_size)
    else:
        # reverting to sequential processing
        edge_weights_list_dicts = [
            partial_func_extract(subject=sub_id) for sub_id in subject_id_list
        ]

    if return_results:
        edge_weights_all = dict()
        for combo in edge_weights_list_dicts:
            # each element from output of parallel loop is a dict keyed in by {subject, weight)
            edge_weights_all.update(combo)
    else:
        edge_weights_all = None

    print('\ngraynet computation done.')
    return edge_weights_all
    'freesurfer_curv': (-0.3, +0.3),
    'freesurfer_sulc': (-1.5, +1.5),
    'freesurfer_area': (0.0, 1.5)
}

expt_prefix = 'thk_curv_sulc_area_nbins25'
# You can choose only one or multiple, but keep them enclosed as a list or array.
histogram_dist = np.array([
    'chebyshev', 'chi_square', 'correlate', 'cosine', 'euclidean',
    'histogram_intersection', 'jensen_shannon', 'manhattan', 'minowski',
    'relative_deviation'
])
summary_stat = 'prod'
file_ext = 'multigraph_graynet.graphml'

atlas_rois, centroids, vertex_labels = parcellate.roi_labels_centroids(atlas)


def get_weights_order(graph, nodes=atlas_rois):
    "returns weights in the order of nodes requested"

    # order is not guaranteed below
    edge_dict = nx.get_edge_attributes(graph, 'weight')
    # so ordering it here, to ensure correspondence across subjects
    weights = [
        graph[x][y]['weight'] for x in nodes for y in nodes
        if (x, y) in edge_dict
    ]

    return np.array(weights)
Ejemplo n.º 3
0
def roiwise_stats_indiv(subject_id_list,
                        input_dir,
                        base_feature=cfg.default_feature_single_edge,
                        chosen_roi_stats=cfg.default_roi_statistic,
                        atlas=cfg.default_atlas,
                        smoothing_param=cfg.default_smoothing_param,
                        node_size=cfg.default_node_size,
                        out_dir=None,
                        return_results=False):
    """
    Computes the chosen summary statistics within each ROI.
    These summary stats (such as median) can serve as a baseline for network-level values produced by graynet.

    Options for summary statistics include 'median', 'entropy', 'kurtosis' and
    any other appropriate summary statistics listed under scipy.stats:
    https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions

    Parameters
    ----------
    subject_id_list : str or list
        must be path to a file containing subject IDs, or a list of subject IDs

    input_dir : str
        Path to the input directory where features can be read.
        For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored.
        Or another directory with a structure that graynet can parse.

    base_feature : str
        Specific type of feature to read for each subject from the input directory.

    chosen_roi_stats : list of str or callable
        If requested, graynet will compute chosen summary statistics (such as median) within each ROI of the chosen parcellation (and network weight computation is skipped).
        Default: 'median'. Supported summary statistics include 'median', 'mode', 'mean', 'std', 'gmean', 'hmean', 'variation',
        'entropy', 'skew' and 'kurtosis'.

        Other appropriate summary statistics listed under scipy.stats could used
        by passing in a callable with their parameters encapsulated:
        https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions
        For example, if you would like to compute 3rd k-statistic, you could construct a callable and passing ``third_kstat`` as in the argument:

        .. code-block:: python

            third_kstat  = lambda array: scipy.stats.kstat(array, n = 3)
            roi_medians = roiwise_stats_indiv(subject_id_list, fs_dir, base_feature, chosen_measure = third_kstat,
                atlas, fwhm, out_dir=None, return_results=True)

        Other possible options could trimmed mean estimator with 5% outliers removed or 3rd k-statistic:
        .. code-block:: python
            trimmed_mean = lambda array: scipy.stats.trim_mean(array, proportiontocut = 0.05)
            third_kstat  = lambda array: scipy.stats.kstat(array, n = 3)

        Notes: 'hmean' requires all values be positive.

    atlas : str
        Name of the atlas whose parcellation to be used.
        Available choices for cortical parcellation: ['fsaverage', 'glasser2016'].
        Volumetric whole-brain atlases will be added soon.

    smoothing_param : scalar
        Smoothing parameter, which could be fwhm for Freesurfer cortical features,
        or another relevant for the chosen base_feature.
        Default: assumed as fwhm=10mm for the default feature choice 'thickness'

    node_size : scalar, optional
        Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature.
        Not implemented.

    out_dir : str, optional
        Path to output directory to store results.
        Default: None, results are returned, but not saved to disk.
        If this is None, return_results must be true.

    return_results : bool
        Flag to indicating whether to keep the results to be returned to caller method.
        Helps to save memory (as it doesn't retain results all subjects and weight combinations),
        when running from command line interface (or HPC). Default: False
        If this is False, out_dir must be specified to save the results to disk.

    Returns
    -------
    roi_stats_all : dict, None
        If return_results is True, this will be a dictionary keyed in by subject_ID
        The value of each key roi_summary_all[subject] is
        a numpy array of length k, with k = number of nodes in the atlas parcellation.
        If return_results is False, this will be None, which is the default.
    """

    check_params_single_edge(base_feature, input_dir, atlas, smoothing_param,
                             node_size, out_dir, return_results)
    subject_id_list, num_subjects, max_id_width, nd_id = check_subjects(
        subject_id_list)
    stat_func_list, stat_func_names, num_stats, max_stat_width, nd_st = check_stat_methods(
        chosen_roi_stats)

    # roi_labels, ctx_annot = parcellate.freesurfer_roi_labels(atlas)
    # uniq_rois, roi_size, num_nodes = roi_info(roi_labels)
    uniq_rois, centroids, roi_labels = parcellate.roi_labels_centroids(atlas)

    print('\nProcessing {} features resampled to {} atlas,'
          ' smoothed at {} with node size {}'.format(base_feature, atlas,
                                                     smoothing_param,
                                                     node_size))

    if return_results:
        roi_stats_all = dict()
    else:
        roi_stats_all = None
        if out_dir is None:
            raise ValueError(
                'When return_results=False, out_dir must be specified to be able to save the results.'
            )
        if not pexists(out_dir):
            os.mkdir(out_dir)

    for sub_idx, subject in enumerate(subject_id_list):

        try:
            features = import_features(input_dir, [
                subject,
            ],
                                       base_feature,
                                       atlas=atlas,
                                       fwhm=smoothing_param)
        except:
            raise IOError(
                'Unable to read {} features for {}\n Skipping it.'.format(
                    base_feature, subject))

        data, rois = mask_background_roi(features[subject], roi_labels,
                                         cfg.null_roi_name)
        for ss, stat_func in enumerate(stat_func_list):
            sys.stdout.write(
                '\nProcessing id {sid:{id_width}} ({sidnum:{nd_id}}/{numsub:{nd_id}}) -- '
                'statistic {stname:{stat_name_width}} ({statnum:{nd_st}}/{numst:{nd_st}})'
                ' :'.format(sid=subject,
                            sidnum=sub_idx + 1,
                            numsub=num_subjects,
                            stname=stat_func_names[ss],
                            statnum=ss + 1,
                            numst=num_stats,
                            id_width=max_id_width,
                            stat_name_width=max_stat_width,
                            nd_id=nd_id,
                            nd_st=nd_st))

            try:
                roi_stats = calc_roi_statistics(data, rois, uniq_rois,
                                                stat_func)
                expt_id_no_network = stamp_experiment(base_feature,
                                                      stat_func_names[ss],
                                                      atlas, smoothing_param,
                                                      node_size)
                save_summary_stats(roi_stats, uniq_rois, stat_func_names[ss],
                                   out_dir, subject, expt_id_no_network)
                sys.stdout.write('Done.')
            except KeyboardInterrupt:
                print('Exiting on keyborad interrupt! \n'
                      'Abandoning the remaining processing for {} stats:\n'
                      '{}.'.format(num_stats - ss, stat_func_names[ss:]))
                sys.exit(1)
            except:
                traceback.print_exc()
                logging.debug(
                    'Error : unable to compute roi-wise {} for {}. Skipping it.'
                    .format(stat_func_names[ss], subject))

        if return_results:
            roi_stats_all[subject] = roi_stats

    return roi_stats_all
Ejemplo n.º 4
0
def extract(subject_id_list,
            input_dir,
            base_feature=cfg.default_feature_single_edge,
            weight_method_list=cfg.default_weight_method,
            num_bins=cfg.default_num_bins,
            edge_range=cfg.default_edge_range,
            atlas=cfg.default_atlas,
            smoothing_param=cfg.default_smoothing_param,
            node_size=cfg.default_node_size,
            out_dir=None,
            return_results=False,
            num_procs=cfg.default_num_procs):
    """
    Extracts weighted networks (matrix of pair-wise ROI distances) from gray matter features based on Freesurfer processing.

    Parameters
    ----------
    subject_id_list : str or list
         must be path to a file containing subject IDs, or a list of subject IDs
    input_dir : str
        Path to the input directory where features can be read.
        For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored.
        Or another directory with a structure that graynet can parse.
    base_feature : str
        Specific type of feature to read for each subject from the input directory.

    weight_method : string(s), optional
        Type of distance (or metric) to compute between the pair of histograms.

        It must be one of the following methods:

        - 'chebyshev'
        - 'chebyshev_neg'
        - 'chi_square'
        - 'correlate'
        - 'correlate_1'
        - 'cosine'
        - 'cosine_1'
        - 'cosine_2'
        - 'cosine_alt'
        - 'euclidean'
        - 'fidelity_based'
        - 'histogram_intersection'
        - 'histogram_intersection_1'
        - 'jensen_shannon'
        - 'kullback_leibler'
        - 'manhattan'
        - 'minowski'
        - 'noelle_1'
        - 'noelle_2'
        - 'noelle_3'
        - 'noelle_4'
        - 'noelle_5'
        - 'relative_bin_deviation'
        - 'relative_deviation'

        Note only the following are *metrics*:

        - 'manhattan'
        - 'minowski'
        - 'euclidean'
        - 'noelle_2'
        - 'noelle_4'
        - 'noelle_5'

        The following are *semi- or quasi-metrics*:

        - 'kullback_leibler'
        - 'jensen_shannon'
        - 'chi_square'
        - 'chebyshev'
        - 'cosine_1'
        - 'chebyshev_neg'
        - 'correlate_1'
        - 'histogram_intersection_1'
        - 'relative_deviation'
        - 'relative_bin_deviation'
        - 'noelle_1'
        - 'noelle_3'

        The following are  classified to be similarity functions:

        - 'histogram_intersection'
        - 'correlate'
        - 'cosine'
        - 'cosine_2'
        - 'cosine_alt'
        - 'fidelity_based'

        *Default* choice: 'manhattan'.

    num_bins : int
        Number of histogram bins to use when computing pair-wise weights based on histogram distance. Default : 25

    edge_range : tuple or list
        The range of edges (two finite values) within which to build the histogram e.g. ``--edge_range 0 5``.
        This can be helpful (and important) to ensure correspondence across multiple invocations of graynet (e.g. for different subjects), in terms of range across all bins as well as individual bin edges.

        Default :

            - ( 0.0, 5.0) for ``freesurfer_thickness`` and
            - (-0.3, 0.3) for ``freesurfer_curv``.

    atlas : str
        Name of the atlas whose parcellation to be used.
        Choices for cortical parcellation: ['fsaverage', 'glasser2016'], which are primary cortical.
        Volumetric whole-brain atlases will be added soon.

    smoothing_param : scalar
        Smoothing parameter, which could be fwhm for Freesurfer cortical features,
        or another relevant for the chosen base_feature.
        Default: assumed as fwhm=10mm for the default feature choice 'thickness'

    node_size : scalar, optional
        Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature.
        This feature is not implemented yet, just a placeholder and to enable default computation.

    out_dir : str, optional
        Path to output directory to store results.
        Default: None, results are returned, but not saved to disk.
        If this is None, return_results must be true.

    return_results : bool
        Flag to indicate whether to return the results to be returned.
        This flag helps to reduce the memory requirements, when the number of nodes in a parcellation or
        the number of subjects or weight methods are large, as it doesn't retain results for all combinations,
        when running from commmand line interface (or HPC). Default: False
        If this is False, out_dir must be specified to save the results to disk.

    num_procs : int
        Number of parallel processes to use to speed up computation.

    Returns
    -------
    edge_weights_all : dict, None
        If return_results is True, this will be a dictionary keyed in by a tuple: (weight method, subject_ID)
        The value of each edge_weights_all[(weight method, subject_ID)] is
        a numpy array of length p = k*(k-1)/2, with k = number of nodes in the atlas parcellation.
        If return_results is False, this will be None, which is the default.
    """

    # All the checks must happen here, as this is key function in the API
    check_params_single_edge(base_feature, input_dir, atlas, smoothing_param,
                             node_size, out_dir, return_results)
    atlas, atlas_name = check_atlas(atlas)

    subject_id_list, num_subjects, \
        max_id_width, nd_id = check_subjects(subject_id_list)

    num_bins, edge_range = check_weight_params(num_bins, edge_range)
    weight_method_list, num_weights, \
        max_wtname_width, nd_wm = check_weights(weight_method_list)

    num_procs = check_num_procs(num_procs)
    pretty_print_options = (max_id_width, nd_id, num_weights, max_wtname_width,
                            nd_wm)

    # roi_labels, ctx_annot = freesurfer_roi_labels(atlas)
    # uniq_rois, roi_size, num_nodes = roi_info(roi_labels)

    print('\nProcessing {} features'.format(base_feature))

    if not return_results:
        if out_dir is None:
            raise ValueError(
                'When return_results=False, out_dir must be specified '
                'to be able to save the results.')
        if not out_dir.exists():
            out_dir.mkdir(exist_ok=True, parents=True)

    if base_feature in cfg.features_cortical:
        uniq_rois, centroids, roi_labels = roi_labels_centroids(
            atlas, node_size)
        partial_func_extract = partial(extract_per_subject_cortical, input_dir,
                                       base_feature, roi_labels, centroids,
                                       weight_method_list, atlas, atlas_name,
                                       smoothing_param, node_size, num_bins,
                                       edge_range, out_dir, return_results,
                                       pretty_print_options)
    elif base_feature in cfg.features_volumetric:
        uniq_rois, centroids, roi_labels = volumetric_roi_info(atlas)
        partial_func_extract = partial(extract_per_subject_volumetric,
                                       input_dir, base_feature, roi_labels,
                                       centroids, weight_method_list, atlas,
                                       atlas_name, smoothing_param, node_size,
                                       num_bins, edge_range, out_dir,
                                       return_results, pretty_print_options)
    else:
        raise NotImplementedError('Chosen feature {} is not recognized as '
                                  'either cortical or volumetric! Choose one'
                                  'from the following options: {}'
                                  ''.format(cfg.base_feature_list))

    chunk_size = int(np.ceil(num_subjects / num_procs))
    with Manager():
        with Pool(processes=num_procs) as pool:
            edge_weights_list_dicts = pool.map(partial_func_extract,
                                               subject_id_list, chunk_size)

    if return_results:
        edge_weights_all = dict()
        for combo in edge_weights_list_dicts:
            # each element from output of parallel loop is a dict keyed in
            #   by {subject, weight)
            edge_weights_all.update(combo)
    else:
        edge_weights_all = None

    print('\ngraynet computation done.')
    return edge_weights_all