Beispiel #1
0
    for base_feature in features_freesurfer:

        id_list, classes = get_metadata(meta_file)
        class_set = list(set(classes.values()))
        class_set.sort()
        labels = {sub: class_set.index(cls) for sub, cls in classes.items()}

        out_path = pjoin(
            vis_out_dir,
            'raw_features_{}_{}.MLDataset.pkl'.format(base_feature,
                                                      '_'.join(class_set)))

        try:
            ds = MLDataset(filepath=out_path)
        except:
            traceback.print_exc()
            id_data = import_features(freesurfer_dir,
                                      id_list,
                                      base_feature,
                                      atlas=atlas,
                                      fwhm=fwhm)
            ds = MLDataset(data=id_data, labels=labels, classes=classes)
            ds.save(out_path)

        data, lbl, ids = ds.data_and_labels()
        print('{} {}\n min : {:.4f}\n max : {:.4f}'.format(
            dataset_name, base_feature, np.min(data), np.max(data)))
        for perc in [1, 5, 95, 99]:
            print('{:3d}% : {:10.4f}'.format(perc, np.percentile(data, perc)))
Beispiel #2
0
def per_subject_multi_edge(
    input_dir,
    base_feature_list,
    roi_labels,
    centroids,
    weight_method_list,
    summary_stats,
    summary_stat_names,
    atlas,
    atlas_name,
    smoothing_param,
    node_size,
    num_bins,
    edge_range_dict,
    out_dir,
    return_results,
    overwrite_results,
    pretty_print_options,
    subject=None
):  # purposefully leaving it last to enable partial function creation
    """
    Extracts give set of weights for one subject.
    """

    if subject is None:
        return

    if return_results:
        edge_weights_all = dict()
    else:
        edge_weights_all = None

    max_id_width, nd_id, num_weights, max_wtname_width, nd_wm = pretty_print_options

    for ww, weight_method in enumerate(weight_method_list):

        expt_id_multi = stamp_expt_multiedge(base_feature_list, atlas_name,
                                             smoothing_param, node_size,
                                             weight_method)
        out_path_multigraph = make_output_path_graph(
            out_dir, subject, [expt_id_multi, 'multigraph'])
        # skipping the computation if the file exists already
        if not overwrite_results and isfile(
                out_path_multigraph) and getsize(out_path_multigraph) > 0:
            print('\nMultigraph exists already at\n\t{}\n'
                  ' skipping its computation!'.format(out_path_multigraph))
            multigraph = None  # signal to re-read
        else:
            multigraph = nx.MultiGraph()

            for base_feature in base_feature_list:
                # # TODO refactor
                # unigraph, weight_vec = compute_unigraph(input_dir, subject, base_feature, weight_method, roi_labels,
                #                                         atlas, smoothing_param, node_size, centroids,
                #                                         num_bins, edge_range_dict,
                #                                         out_dir, overwrite_results, pretty_print_options)
                # if return_results:
                #     edge_weights_all[(weight_method, base_feature, subject)] = weight_vec

                try:
                    features = import_features(input_dir, [
                        subject,
                    ],
                                               base_feature,
                                               fwhm=smoothing_param,
                                               atlas=atlas)

                except:
                    traceback.print_exc()
                    warnings.warn(
                        'Unable to read {} features'
                        ' for {}\n Skipping it.'.format(base_feature, subject),
                        UserWarning)
                    return

                data, rois = mask_background_roi(features[subject], roi_labels,
                                                 cfg.null_roi_name)

                # unique stamp for each subject and weight
                expt_id_single = stamp_expt_weight(base_feature, atlas_name,
                                                   smoothing_param, node_size,
                                                   weight_method)
                sys.stdout.write(
                    '\nProcessing id {:{id_width}} --'
                    ' weight {:{wtname_width}} ({:{nd_wm}}/{:{nd_wm}})'
                    ' :'.format(subject,
                                weight_method,
                                ww + 1,
                                num_weights,
                                nd_id=nd_id,
                                nd_wm=nd_wm,
                                id_width=max_id_width,
                                wtname_width=max_wtname_width))

                # actual computation of pair-wise features
                try:
                    unigraph = hiwenet.extract(
                        data,
                        rois,
                        weight_method=weight_method,
                        num_bins=num_bins,
                        edge_range=edge_range_dict[base_feature],
                        return_networkx_graph=True)

                    # retrieving edge weights
                    weight_vec = np.array(
                        list(
                            nx.get_edge_attributes(unigraph,
                                                   'weight').values()))
                    warn_nan(weight_vec)
                    if return_results:
                        edge_weights_all[(weight_method, base_feature,
                                          subject)] = weight_vec

                except (RuntimeError, RuntimeWarning) as runexc:
                    print(runexc)
                except KeyboardInterrupt:
                    print('Exiting on keyborad interrupt! \n'
                          'Abandoning the remaining processing ')
                    sys.exit(1)
                except:
                    print('Unable to extract {} weights for {} for {}'.format(
                        weight_method, base_feature, subject))
                    traceback.print_exc()

                print('Done.')

                # TODO consider extracting some network features upon user request.

                add_nodal_positions(unigraph, centroids)
                save_per_subject_graph(unigraph, out_dir, subject,
                                       expt_id_single)

                # adding edges/weights from each feature to a multigraph
                # this also encodes the sources
                for u, v in unigraph.edges():
                    multigraph.add_edge(u,
                                        v,
                                        weight=unigraph[u][v]['weight'],
                                        base_feature=base_feature)

            # adding position info to nodes (for visualization later)
            add_nodal_positions(multigraph, centroids)
            save_graph(multigraph, out_path_multigraph, 'multi-edge')

        for stat_func, stat_name in zip(summary_stats, summary_stat_names):
            # creating single graph with a summary edge weight (like median)
            out_path_summary = make_output_path_graph(
                out_dir, subject, [expt_id_multi, stat_name, 'multigraph'])
            if not overwrite_results and isfile(
                    out_path_summary) and getsize(out_path_summary) > 0:
                print(
                    'Summary {} of multigraph exists already at\n\t{}\n skipping its computation!'
                    .format(stat_name, out_path_summary))
            else:
                if multigraph is None:
                    multigraph = nx.read_graphml(out_path_multigraph)

                try:
                    summary_multigraph = summarize_multigraph(
                        multigraph, stat_func)
                    add_nodal_positions(summary_multigraph, centroids)
                    save_graph(summary_multigraph, out_path_summary,
                               '{} summary'.format(stat_name))
                except:
                    print(
                        'Summary {} could not be computed - skipping!'.format(
                            stat_name))
                    traceback.print_exc()

    return edge_weights_all
Beispiel #3
0
def extract_per_subject_cortical(input_dir,
                                 base_feature,
                                 roi_labels,
                                 centroids,
                                 weight_method_list,
                                 atlas_spec,
                                 atlas_name,
                                 smoothing_param,
                                 node_size,
                                 num_bins,
                                 edge_range,
                                 out_dir,
                                 return_results,
                                 pretty_print_options,
                                 subject=None):
    # purposefully leaving subject parameter last to enable partial function creation
    """
    Extracts give set of weights for one subject.

    Parameters
    ----------
    subject
    input_dir
    base_feature
    roi_labels
    weight_method_list
    atlas_spec
    smoothing_param
    node_size
    num_bins
    edge_range
    out_dir
    return_results
    pretty_print_options

    Returns
    -------

    """

    if subject is None:
        return

    print('')

    try:
        features = import_features(input_dir, [
            subject,
        ],
                                   base_feature,
                                   fwhm=smoothing_param,
                                   atlas=atlas_spec)
    except:
        traceback.print_exc()
        warnings.warn(
            'Unable to read {} features for {}\n Skipping it.'.format(
                base_feature, subject), UserWarning)
        return

    data, rois = mask_background_roi(features[subject], roi_labels,
                                     cfg.null_roi_name)

    max_id_width, nd_id, num_weights, max_wtname_width, nd_wm = pretty_print_options

    if return_results:
        edge_weights_all = dict()
    else:
        edge_weights_all = None

    for ww, weight_method in enumerate(weight_method_list):
        # unique stamp for each subject and weight
        expt_id = stamp_expt_weight(base_feature, atlas_name, smoothing_param,
                                    node_size, weight_method)
        sys.stdout.write(
            '\nProcessing {sid:{id_width}} -- weight {wm:{wtname_width}} '
            '({wc:{nd_wm}}/{nw:{nd_wm}}) :\n'
            ''.format(sid=subject,
                      wm=weight_method,
                      wc=ww + 1,
                      nw=num_weights,
                      nd_id=nd_id,
                      nd_wm=nd_wm,
                      id_width=max_id_width,
                      wtname_width=max_wtname_width))

        # actual computation of pair-wise features
        try:
            graph = hiwenet.extract(data,
                                    rois,
                                    weight_method=weight_method,
                                    num_bins=num_bins,
                                    edge_range=edge_range,
                                    return_networkx_graph=True)

            # retrieving edge weights
            weight_vec = np.array(
                list(nx.get_edge_attributes(graph, 'weight').values()))
            warn_nan(weight_vec)
            # weight_vec = get_triu_handle_inf_nan(edge_weights)

            # adding position info to nodes (for visualization later)
            for roi in centroids:
                graph.nodes[roi]['x'] = float(centroids[roi][0])
                graph.nodes[roi]['y'] = float(centroids[roi][1])
                graph.nodes[roi]['z'] = float(centroids[roi][2])

            if return_results:
                edge_weights_all[(weight_method, subject)] = weight_vec

            # saving to disk
            try:
                # save(weight_vec, out_dir, subject, expt_id)
                save_per_subject_graph(graph, out_dir, subject, expt_id)
            except:
                raise IOError(
                    'Unable to save the network or vectorized weights '
                    'to:\n{}'.format(out_dir))

        except (RuntimeError, RuntimeWarning) as runexc:
            print(runexc)
        except KeyboardInterrupt:
            print('Exiting on keyborad interrupt! \n'
                  'Abandoning the remaining processing for {} weights:\n'
                  '{}.'.format(num_weights - ww, weight_method_list[ww:]))
            sys.exit(1)
        except:
            print('Unable to extract {} features for {}'.format(
                weight_method, subject))
            traceback.print_exc()

    return edge_weights_all
Beispiel #4
0
def roiwise_stats_indiv(subject_id_list,
                        input_dir,
                        base_feature=cfg.default_feature_single_edge,
                        chosen_roi_stats=cfg.default_roi_statistic,
                        atlas=cfg.default_atlas,
                        smoothing_param=cfg.default_smoothing_param,
                        node_size=cfg.default_node_size,
                        out_dir=None,
                        return_results=False):
    """
    Computes the chosen summary statistics within each ROI.
    These summary stats (such as median) can serve as a baseline for network-level values produced by graynet.

    Options for summary statistics include 'median', 'entropy', 'kurtosis' and
    any other appropriate summary statistics listed under scipy.stats:
    https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions

    Parameters
    ----------
    subject_id_list : str or list
        must be path to a file containing subject IDs, or a list of subject IDs

    input_dir : str
        Path to the input directory where features can be read.
        For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored.
        Or another directory with a structure that graynet can parse.

    base_feature : str
        Specific type of feature to read for each subject from the input directory.

    chosen_roi_stats : list of str or callable
        If requested, graynet will compute chosen summary statistics (such as median) within each ROI of the chosen parcellation (and network weight computation is skipped).
        Default: 'median'. Supported summary statistics include 'median', 'mode', 'mean', 'std', 'gmean', 'hmean', 'variation',
        'entropy', 'skew' and 'kurtosis'.

        Other appropriate summary statistics listed under scipy.stats could used
        by passing in a callable with their parameters encapsulated:
        https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions
        For example, if you would like to compute 3rd k-statistic, you could construct a callable and passing ``third_kstat`` as in the argument:

        .. code-block:: python

            third_kstat  = lambda array: scipy.stats.kstat(array, n = 3)
            roi_medians = roiwise_stats_indiv(subject_id_list, fs_dir, base_feature, chosen_measure = third_kstat,
                atlas, fwhm, out_dir=None, return_results=True)

        Other possible options could trimmed mean estimator with 5% outliers removed or 3rd k-statistic:
        .. code-block:: python
            trimmed_mean = lambda array: scipy.stats.trim_mean(array, proportiontocut = 0.05)
            third_kstat  = lambda array: scipy.stats.kstat(array, n = 3)

        Notes: 'hmean' requires all values be positive.

    atlas : str
        Name of the atlas whose parcellation to be used.
        Available choices for cortical parcellation: ['fsaverage', 'glasser2016'].
        Volumetric whole-brain atlases will be added soon.

    smoothing_param : scalar
        Smoothing parameter, which could be fwhm for Freesurfer cortical features,
        or another relevant for the chosen base_feature.
        Default: assumed as fwhm=10mm for the default feature choice 'thickness'

    node_size : scalar, optional
        Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature.
        Not implemented.

    out_dir : str, optional
        Path to output directory to store results.
        Default: None, results are returned, but not saved to disk.
        If this is None, return_results must be true.

    return_results : bool
        Flag to indicating whether to keep the results to be returned to caller method.
        Helps to save memory (as it doesn't retain results all subjects and weight combinations),
        when running from command line interface (or HPC). Default: False
        If this is False, out_dir must be specified to save the results to disk.

    Returns
    -------
    roi_stats_all : dict, None
        If return_results is True, this will be a dictionary keyed in by subject_ID
        The value of each key roi_summary_all[subject] is
        a numpy array of length k, with k = number of nodes in the atlas parcellation.
        If return_results is False, this will be None, which is the default.
    """

    check_params_single_edge(base_feature, input_dir, atlas, smoothing_param,
                             node_size, out_dir, return_results)
    subject_id_list, num_subjects, max_id_width, nd_id = check_subjects(
        subject_id_list)
    stat_func_list, stat_func_names, num_stats, \
        max_stat_width, nd_st = check_stat_methods(chosen_roi_stats)

    if base_feature in cfg.features_cortical:
        uniq_rois, centroids, roi_labels = roi_labels_centroids(atlas)
        null_roi_to_be_ignored = cfg.null_roi_name
    elif base_feature in cfg.features_volumetric:
        uniq_rois, centroids, roi_labels = volumetric_roi_info(atlas)
        null_roi_to_be_ignored = cfg.null_roi_index
    else:
        raise ValueError('Unrecognized type of base_feature! Must be one of {}'
                         ''.format(cfg.base_feature_list))

    print('\nProcessing {} features resampled to {} atlas,'
          ' smoothed at {} with node size {}'.format(base_feature, atlas,
                                                     smoothing_param,
                                                     node_size))

    if return_results:
        roi_stats_all = dict()
    else:
        roi_stats_all = None
        if out_dir is None:
            raise ValueError(
                'When return_results=False, out_dir must be specified '
                'to be able to save the results.')
        if not out_dir.exists():
            out_dir.mkdir(exist_ok=True, parents=True)

    for sub_idx, subject in enumerate(subject_id_list):

        try:
            features = import_features(input_dir, [
                subject,
            ],
                                       base_feature,
                                       atlas=atlas,
                                       fwhm=smoothing_param)
        except:
            raise IOError('Unable to read {} features for {}\n'
                          ' Skipping it.'.format(base_feature, subject))

        data, rois = mask_background_roi(features[subject], roi_labels,
                                         null_roi_to_be_ignored)

        for ss, stat_func in enumerate(stat_func_list):
            sys.stdout.write('\nProcessing id {sid:{id_width}} '
                             '({sidnum:{nd_id}}/{numsub:{nd_id}}) -- '
                             'statistic {stname:{stat_name_width}} '
                             '({statnum:{nd_st}}/{numst:{nd_st}})'
                             ' :'.format(sid=subject,
                                         sidnum=sub_idx + 1,
                                         numsub=num_subjects,
                                         stname=stat_func_names[ss],
                                         statnum=ss + 1,
                                         numst=num_stats,
                                         id_width=max_id_width,
                                         stat_name_width=max_stat_width,
                                         nd_id=nd_id,
                                         nd_st=nd_st))

            try:
                roi_stats = calc_roi_statistics(data, rois, uniq_rois,
                                                stat_func)
                expt_id_no_network = stamp_experiment(base_feature,
                                                      stat_func_names[ss],
                                                      atlas, smoothing_param,
                                                      node_size)
                save_summary_stats(roi_stats, uniq_rois, stat_func_names[ss],
                                   out_dir, subject, expt_id_no_network)
                sys.stdout.write('Done.')
            except KeyboardInterrupt:
                print('Exiting on keyborad interrupt! \n'
                      'Abandoning the remaining processing for {} stats:\n'
                      '{}.'.format(num_stats - ss, stat_func_names[ss:]))
                sys.exit(1)
            except:
                traceback.print_exc()
                logging.debug('Error : unable to compute roi-wise {} for {}.'
                              ' Skipping it.'.format(stat_func_names[ss],
                                                     subject))

        if return_results:
            roi_stats_all[subject] = roi_stats

    return roi_stats_all
Beispiel #5
0
# ---------------------------------#---------------------------------#-------------

from os.path import join as pjoin

import numpy as np

from graynet.utils import import_features, check_subjects

trim_percentile = 5

in_dir_vbm = '/Users/Reddy/dev/graynet/example_data/volumetric_CAT12'
sub_list, _, _, _ = check_subjects(pjoin(in_dir_vbm, 'sub_id_list.txt'))

base_feature = 'spm_cat_gmdensity'

features = import_features(in_dir_vbm, sub_list, base_feature)
print('\nThe min max per subject (after trimming {}% outliers):'.format(
    trim_percentile))

min_all = np.Inf
max_all = -np.Inf
for sid, data in features.items():
    vec = np.array(data).flatten()
    min_sub = np.percentile(vec, trim_percentile)
    max_sub = np.percentile(vec, 100 - trim_percentile)

    print('{} {} {}'.format(sid, min_sub, max_sub))
    if min_sub < min_all:
        min_all = min_sub

    if max_sub > max_all: