def extract_multiedge(subject_id_list, input_dir, base_feature_list=cfg.default_features_multi_edge, weight_method_list=cfg.default_weight_method, summary_stats=cfg.multi_edge_summary_func_default, num_bins=cfg.default_num_bins, edge_range_dict=cfg.edge_range_predefined, atlas=cfg.default_atlas, smoothing_param=cfg.default_smoothing_param, node_size=cfg.default_node_size, out_dir=None, return_results=False, overwrite_results=False, num_procs=cfg.default_num_procs): """ Extracts weighted networks (matrix of pair-wise ROI distances) based on multiple gray matter features based on Freesurfer processing. Parameters ---------- subject_id_list : str or list must be path to a file containing subject IDs, or a list of subject IDs input_dir : str Path to the input directory where features can be read. For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored. Or another directory with a structure that graynet can parse. base_feature_list : list Set of features that drive the different edges between the pair of ROIs. For example, if you choose thickness and pial_curv, each pair of ROIs will have two edges. This multi-edge network can be turned into a single network based on averaging weights from different individual networks. weight_method : string(s), optional Type of distance (or metric) to compute between the pair of histograms. It must be one of the following methods: - 'chebyshev' - 'chebyshev_neg' - 'chi_square' - 'correlate' - 'correlate_1' - 'cosine' - 'cosine_1' - 'cosine_2' - 'cosine_alt' - 'euclidean' - 'fidelity_based' - 'histogram_intersection' - 'histogram_intersection_1' - 'jensen_shannon' - 'kullback_leibler' - 'manhattan' - 'minowski' - 'noelle_1' - 'noelle_2' - 'noelle_3' - 'noelle_4' - 'noelle_5' - 'relative_bin_deviation' - 'relative_deviation' Note only the following are *metrics*: - 'manhattan' - 'minowski' - 'euclidean' - 'noelle_2' - 'noelle_4' - 'noelle_5' The following are *semi- or quasi-metrics*: - 'kullback_leibler' - 'jensen_shannon' - 'chi_square' - 'chebyshev' - 'cosine_1' - 'chebyshev_neg' - 'correlate_1' - 'histogram_intersection_1' - 'relative_deviation' - 'relative_bin_deviation' - 'noelle_1' - 'noelle_3' The following are classified to be similarity functions: - 'histogram_intersection' - 'correlate' - 'cosine' - 'cosine_2' - 'cosine_alt' - 'fidelity_based' *Default* choice: 'manhattan'. summary_stats : list of str A string, or list of strings, each representing a method (like 'median', 'prod' or 'max'), to compute a summay statistic from the array of multiple weights computed. This must be available as a member of numpy or scipy.stats. num_bins : int Number of histogram bins to use when computing pair-wise weights based on histogram distance. Default : 25 edge_range_dict : tuple or list The range of edges (two finite values) within which to build the histogram e.g. ``--edge_range 0 5``. This can be helpful (and important) to ensure correspondence across multiple invocations of graynet (e.g. for different subjects), in terms of range across all bins as well as individual bin edges. Default : - ( 0.0, 5.0) for ``freesurfer_thickness`` and - (-0.3, 0.3) for ``freesurfer_curv``. atlas : str Name of the atlas whose parcellation to be used. Choices for cortical parcellation: ['fsaverage', 'glasser2016'], which are primary cortical. Volumetric whole-brain atlases will be added soon. smoothing_param : scalar Smoothing parameter, which could be fwhm for Freesurfer cortical features, or another relevant for the chosen base_feature_list. Default: assumed as fwhm=10mm for the default feature choice 'thickness' node_size : scalar, optional Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature. This feature is not implemented yet, just a placeholder and to enable default computation. out_dir : str, optional Path to output directory to store results. Default: None, results are returned, but not saved to disk. If this is None, return_results must be true. return_results : bool Flag to indicate whether to return the results to be returned. This flag helps to reduce the memory requirements, when the number of nodes in a parcellation or the number of subjects or weight methods are large, as it doesn't retain results for all combinations, when running from commmand line interface (or HPC). Default: False If this is False, out_dir must be specified to save the results to disk. overwrite_results : bool Flag to request overwriting of existing results, in case of reruns/failed jobs. By default, if the expected output file exists and is of non-zero size, its computation is skipped (assuming the file is complete, usable and not corrupted). num_procs : int Number of parallel processes to use to speed up computation. Returns ------- edge_weights_all : dict, None If return_results is True, this will be a dictionary keyed in by a tuple: (weight method, subject_ID) The value of each edge_weights_all[(weight method, subject_ID)] is a numpy array of length p = k*(k-1)/2, with k = number of nodes in the atlas parcellation. If return_results is False, this will be None, which is the default. """ # All the checks must happen here, as this is key function in the API check_params_multiedge(base_feature_list, input_dir, atlas, smoothing_param, node_size, out_dir, return_results) atlas = check_atlas(atlas) subject_id_list, num_subjects, max_id_width, nd_id = check_subjects( subject_id_list) num_bins = check_num_bins(num_bins) edge_range_dict = check_edge_range_dict(edge_range_dict, base_feature_list) weight_method_list, num_weights, max_wtname_width, nd_wm = check_weights( weight_method_list) # validating the choice and getting a callable summary_stats, summary_stat_names, _, _, _ = check_stat_methods( summary_stats) num_procs = check_num_procs(num_procs) pretty_print_options = (max_id_width, nd_id, num_weights, max_wtname_width, nd_wm) # roi_labels, ctx_annot = parcellate.freesurfer_roi_labels(atlas) # uniq_rois, roi_size, num_nodes = roi_info(roi_labels) uniq_rois, centroids, roi_labels = parcellate.roi_labels_centroids(atlas) print('\nProcessing {} features resampled to {} atlas,' ' smoothed at {} with node size {}'.format(base_feature_list, atlas, smoothing_param, node_size)) if not return_results: if out_dir is None: raise ValueError( 'When return_results=False, out_dir must be specified to be able to save the results.' ) if not pexists(out_dir): os.mkdir(out_dir) partial_func_extract = partial( per_subject_multi_edge, input_dir, base_feature_list, roi_labels, centroids, weight_method_list, summary_stats, summary_stat_names, atlas, smoothing_param, node_size, num_bins, edge_range_dict, out_dir, return_results, overwrite_results, pretty_print_options) if num_procs > 1: chunk_size = int(np.ceil(num_subjects / num_procs)) with Manager(): with Pool(processes=num_procs) as pool: edge_weights_list_dicts = pool.map(partial_func_extract, subject_id_list, chunk_size) else: # reverting to sequential processing edge_weights_list_dicts = [ partial_func_extract(subject=sub_id) for sub_id in subject_id_list ] if return_results: edge_weights_all = dict() for combo in edge_weights_list_dicts: # each element from output of parallel loop is a dict keyed in by {subject, weight) edge_weights_all.update(combo) else: edge_weights_all = None print('\ngraynet computation done.') return edge_weights_all
'freesurfer_curv': (-0.3, +0.3), 'freesurfer_sulc': (-1.5, +1.5), 'freesurfer_area': (0.0, 1.5) } expt_prefix = 'thk_curv_sulc_area_nbins25' # You can choose only one or multiple, but keep them enclosed as a list or array. histogram_dist = np.array([ 'chebyshev', 'chi_square', 'correlate', 'cosine', 'euclidean', 'histogram_intersection', 'jensen_shannon', 'manhattan', 'minowski', 'relative_deviation' ]) summary_stat = 'prod' file_ext = 'multigraph_graynet.graphml' atlas_rois, centroids, vertex_labels = parcellate.roi_labels_centroids(atlas) def get_weights_order(graph, nodes=atlas_rois): "returns weights in the order of nodes requested" # order is not guaranteed below edge_dict = nx.get_edge_attributes(graph, 'weight') # so ordering it here, to ensure correspondence across subjects weights = [ graph[x][y]['weight'] for x in nodes for y in nodes if (x, y) in edge_dict ] return np.array(weights)
def roiwise_stats_indiv(subject_id_list, input_dir, base_feature=cfg.default_feature_single_edge, chosen_roi_stats=cfg.default_roi_statistic, atlas=cfg.default_atlas, smoothing_param=cfg.default_smoothing_param, node_size=cfg.default_node_size, out_dir=None, return_results=False): """ Computes the chosen summary statistics within each ROI. These summary stats (such as median) can serve as a baseline for network-level values produced by graynet. Options for summary statistics include 'median', 'entropy', 'kurtosis' and any other appropriate summary statistics listed under scipy.stats: https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions Parameters ---------- subject_id_list : str or list must be path to a file containing subject IDs, or a list of subject IDs input_dir : str Path to the input directory where features can be read. For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored. Or another directory with a structure that graynet can parse. base_feature : str Specific type of feature to read for each subject from the input directory. chosen_roi_stats : list of str or callable If requested, graynet will compute chosen summary statistics (such as median) within each ROI of the chosen parcellation (and network weight computation is skipped). Default: 'median'. Supported summary statistics include 'median', 'mode', 'mean', 'std', 'gmean', 'hmean', 'variation', 'entropy', 'skew' and 'kurtosis'. Other appropriate summary statistics listed under scipy.stats could used by passing in a callable with their parameters encapsulated: https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions For example, if you would like to compute 3rd k-statistic, you could construct a callable and passing ``third_kstat`` as in the argument: .. code-block:: python third_kstat = lambda array: scipy.stats.kstat(array, n = 3) roi_medians = roiwise_stats_indiv(subject_id_list, fs_dir, base_feature, chosen_measure = third_kstat, atlas, fwhm, out_dir=None, return_results=True) Other possible options could trimmed mean estimator with 5% outliers removed or 3rd k-statistic: .. code-block:: python trimmed_mean = lambda array: scipy.stats.trim_mean(array, proportiontocut = 0.05) third_kstat = lambda array: scipy.stats.kstat(array, n = 3) Notes: 'hmean' requires all values be positive. atlas : str Name of the atlas whose parcellation to be used. Available choices for cortical parcellation: ['fsaverage', 'glasser2016']. Volumetric whole-brain atlases will be added soon. smoothing_param : scalar Smoothing parameter, which could be fwhm for Freesurfer cortical features, or another relevant for the chosen base_feature. Default: assumed as fwhm=10mm for the default feature choice 'thickness' node_size : scalar, optional Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature. Not implemented. out_dir : str, optional Path to output directory to store results. Default: None, results are returned, but not saved to disk. If this is None, return_results must be true. return_results : bool Flag to indicating whether to keep the results to be returned to caller method. Helps to save memory (as it doesn't retain results all subjects and weight combinations), when running from command line interface (or HPC). Default: False If this is False, out_dir must be specified to save the results to disk. Returns ------- roi_stats_all : dict, None If return_results is True, this will be a dictionary keyed in by subject_ID The value of each key roi_summary_all[subject] is a numpy array of length k, with k = number of nodes in the atlas parcellation. If return_results is False, this will be None, which is the default. """ check_params_single_edge(base_feature, input_dir, atlas, smoothing_param, node_size, out_dir, return_results) subject_id_list, num_subjects, max_id_width, nd_id = check_subjects( subject_id_list) stat_func_list, stat_func_names, num_stats, max_stat_width, nd_st = check_stat_methods( chosen_roi_stats) # roi_labels, ctx_annot = parcellate.freesurfer_roi_labels(atlas) # uniq_rois, roi_size, num_nodes = roi_info(roi_labels) uniq_rois, centroids, roi_labels = parcellate.roi_labels_centroids(atlas) print('\nProcessing {} features resampled to {} atlas,' ' smoothed at {} with node size {}'.format(base_feature, atlas, smoothing_param, node_size)) if return_results: roi_stats_all = dict() else: roi_stats_all = None if out_dir is None: raise ValueError( 'When return_results=False, out_dir must be specified to be able to save the results.' ) if not pexists(out_dir): os.mkdir(out_dir) for sub_idx, subject in enumerate(subject_id_list): try: features = import_features(input_dir, [ subject, ], base_feature, atlas=atlas, fwhm=smoothing_param) except: raise IOError( 'Unable to read {} features for {}\n Skipping it.'.format( base_feature, subject)) data, rois = mask_background_roi(features[subject], roi_labels, cfg.null_roi_name) for ss, stat_func in enumerate(stat_func_list): sys.stdout.write( '\nProcessing id {sid:{id_width}} ({sidnum:{nd_id}}/{numsub:{nd_id}}) -- ' 'statistic {stname:{stat_name_width}} ({statnum:{nd_st}}/{numst:{nd_st}})' ' :'.format(sid=subject, sidnum=sub_idx + 1, numsub=num_subjects, stname=stat_func_names[ss], statnum=ss + 1, numst=num_stats, id_width=max_id_width, stat_name_width=max_stat_width, nd_id=nd_id, nd_st=nd_st)) try: roi_stats = calc_roi_statistics(data, rois, uniq_rois, stat_func) expt_id_no_network = stamp_experiment(base_feature, stat_func_names[ss], atlas, smoothing_param, node_size) save_summary_stats(roi_stats, uniq_rois, stat_func_names[ss], out_dir, subject, expt_id_no_network) sys.stdout.write('Done.') except KeyboardInterrupt: print('Exiting on keyborad interrupt! \n' 'Abandoning the remaining processing for {} stats:\n' '{}.'.format(num_stats - ss, stat_func_names[ss:])) sys.exit(1) except: traceback.print_exc() logging.debug( 'Error : unable to compute roi-wise {} for {}. Skipping it.' .format(stat_func_names[ss], subject)) if return_results: roi_stats_all[subject] = roi_stats return roi_stats_all
def extract(subject_id_list, input_dir, base_feature=cfg.default_feature_single_edge, weight_method_list=cfg.default_weight_method, num_bins=cfg.default_num_bins, edge_range=cfg.default_edge_range, atlas=cfg.default_atlas, smoothing_param=cfg.default_smoothing_param, node_size=cfg.default_node_size, out_dir=None, return_results=False, num_procs=cfg.default_num_procs): """ Extracts weighted networks (matrix of pair-wise ROI distances) from gray matter features based on Freesurfer processing. Parameters ---------- subject_id_list : str or list must be path to a file containing subject IDs, or a list of subject IDs input_dir : str Path to the input directory where features can be read. For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored. Or another directory with a structure that graynet can parse. base_feature : str Specific type of feature to read for each subject from the input directory. weight_method : string(s), optional Type of distance (or metric) to compute between the pair of histograms. It must be one of the following methods: - 'chebyshev' - 'chebyshev_neg' - 'chi_square' - 'correlate' - 'correlate_1' - 'cosine' - 'cosine_1' - 'cosine_2' - 'cosine_alt' - 'euclidean' - 'fidelity_based' - 'histogram_intersection' - 'histogram_intersection_1' - 'jensen_shannon' - 'kullback_leibler' - 'manhattan' - 'minowski' - 'noelle_1' - 'noelle_2' - 'noelle_3' - 'noelle_4' - 'noelle_5' - 'relative_bin_deviation' - 'relative_deviation' Note only the following are *metrics*: - 'manhattan' - 'minowski' - 'euclidean' - 'noelle_2' - 'noelle_4' - 'noelle_5' The following are *semi- or quasi-metrics*: - 'kullback_leibler' - 'jensen_shannon' - 'chi_square' - 'chebyshev' - 'cosine_1' - 'chebyshev_neg' - 'correlate_1' - 'histogram_intersection_1' - 'relative_deviation' - 'relative_bin_deviation' - 'noelle_1' - 'noelle_3' The following are classified to be similarity functions: - 'histogram_intersection' - 'correlate' - 'cosine' - 'cosine_2' - 'cosine_alt' - 'fidelity_based' *Default* choice: 'manhattan'. num_bins : int Number of histogram bins to use when computing pair-wise weights based on histogram distance. Default : 25 edge_range : tuple or list The range of edges (two finite values) within which to build the histogram e.g. ``--edge_range 0 5``. This can be helpful (and important) to ensure correspondence across multiple invocations of graynet (e.g. for different subjects), in terms of range across all bins as well as individual bin edges. Default : - ( 0.0, 5.0) for ``freesurfer_thickness`` and - (-0.3, 0.3) for ``freesurfer_curv``. atlas : str Name of the atlas whose parcellation to be used. Choices for cortical parcellation: ['fsaverage', 'glasser2016'], which are primary cortical. Volumetric whole-brain atlases will be added soon. smoothing_param : scalar Smoothing parameter, which could be fwhm for Freesurfer cortical features, or another relevant for the chosen base_feature. Default: assumed as fwhm=10mm for the default feature choice 'thickness' node_size : scalar, optional Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature. This feature is not implemented yet, just a placeholder and to enable default computation. out_dir : str, optional Path to output directory to store results. Default: None, results are returned, but not saved to disk. If this is None, return_results must be true. return_results : bool Flag to indicate whether to return the results to be returned. This flag helps to reduce the memory requirements, when the number of nodes in a parcellation or the number of subjects or weight methods are large, as it doesn't retain results for all combinations, when running from commmand line interface (or HPC). Default: False If this is False, out_dir must be specified to save the results to disk. num_procs : int Number of parallel processes to use to speed up computation. Returns ------- edge_weights_all : dict, None If return_results is True, this will be a dictionary keyed in by a tuple: (weight method, subject_ID) The value of each edge_weights_all[(weight method, subject_ID)] is a numpy array of length p = k*(k-1)/2, with k = number of nodes in the atlas parcellation. If return_results is False, this will be None, which is the default. """ # All the checks must happen here, as this is key function in the API check_params_single_edge(base_feature, input_dir, atlas, smoothing_param, node_size, out_dir, return_results) atlas, atlas_name = check_atlas(atlas) subject_id_list, num_subjects, \ max_id_width, nd_id = check_subjects(subject_id_list) num_bins, edge_range = check_weight_params(num_bins, edge_range) weight_method_list, num_weights, \ max_wtname_width, nd_wm = check_weights(weight_method_list) num_procs = check_num_procs(num_procs) pretty_print_options = (max_id_width, nd_id, num_weights, max_wtname_width, nd_wm) # roi_labels, ctx_annot = freesurfer_roi_labels(atlas) # uniq_rois, roi_size, num_nodes = roi_info(roi_labels) print('\nProcessing {} features'.format(base_feature)) if not return_results: if out_dir is None: raise ValueError( 'When return_results=False, out_dir must be specified ' 'to be able to save the results.') if not out_dir.exists(): out_dir.mkdir(exist_ok=True, parents=True) if base_feature in cfg.features_cortical: uniq_rois, centroids, roi_labels = roi_labels_centroids( atlas, node_size) partial_func_extract = partial(extract_per_subject_cortical, input_dir, base_feature, roi_labels, centroids, weight_method_list, atlas, atlas_name, smoothing_param, node_size, num_bins, edge_range, out_dir, return_results, pretty_print_options) elif base_feature in cfg.features_volumetric: uniq_rois, centroids, roi_labels = volumetric_roi_info(atlas) partial_func_extract = partial(extract_per_subject_volumetric, input_dir, base_feature, roi_labels, centroids, weight_method_list, atlas, atlas_name, smoothing_param, node_size, num_bins, edge_range, out_dir, return_results, pretty_print_options) else: raise NotImplementedError('Chosen feature {} is not recognized as ' 'either cortical or volumetric! Choose one' 'from the following options: {}' ''.format(cfg.base_feature_list)) chunk_size = int(np.ceil(num_subjects / num_procs)) with Manager(): with Pool(processes=num_procs) as pool: edge_weights_list_dicts = pool.map(partial_func_extract, subject_id_list, chunk_size) if return_results: edge_weights_all = dict() for combo in edge_weights_list_dicts: # each element from output of parallel loop is a dict keyed in # by {subject, weight) edge_weights_all.update(combo) else: edge_weights_all = None print('\ngraynet computation done.') return edge_weights_all