def roiwise_stats_indiv(subject_id_list, input_dir, base_feature=cfg.default_feature_single_edge, chosen_roi_stats=cfg.default_roi_statistic, atlas=cfg.default_atlas, smoothing_param=cfg.default_smoothing_param, node_size=cfg.default_node_size, out_dir=None, return_results=False): """ Computes the chosen summary statistics within each ROI. These summary stats (such as median) can serve as a baseline for network-level values produced by graynet. Options for summary statistics include 'median', 'entropy', 'kurtosis' and any other appropriate summary statistics listed under scipy.stats: https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions Parameters ---------- subject_id_list : str or list must be path to a file containing subject IDs, or a list of subject IDs input_dir : str Path to the input directory where features can be read. For example, this can be Freesurfer's SUBJECTS_DIR, where output processing is stored. Or another directory with a structure that graynet can parse. base_feature : str Specific type of feature to read for each subject from the input directory. chosen_roi_stats : list of str or callable If requested, graynet will compute chosen summary statistics (such as median) within each ROI of the chosen parcellation (and network weight computation is skipped). Default: 'median'. Supported summary statistics include 'median', 'mode', 'mean', 'std', 'gmean', 'hmean', 'variation', 'entropy', 'skew' and 'kurtosis'. Other appropriate summary statistics listed under scipy.stats could used by passing in a callable with their parameters encapsulated: https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions For example, if you would like to compute 3rd k-statistic, you could construct a callable and passing ``third_kstat`` as in the argument: .. code-block:: python third_kstat = lambda array: scipy.stats.kstat(array, n = 3) roi_medians = roiwise_stats_indiv(subject_id_list, fs_dir, base_feature, chosen_measure = third_kstat, atlas, fwhm, out_dir=None, return_results=True) Other possible options could trimmed mean estimator with 5% outliers removed or 3rd k-statistic: .. code-block:: python trimmed_mean = lambda array: scipy.stats.trim_mean(array, proportiontocut = 0.05) third_kstat = lambda array: scipy.stats.kstat(array, n = 3) Notes: 'hmean' requires all values be positive. atlas : str Name of the atlas whose parcellation to be used. Available choices for cortical parcellation: ['fsaverage', 'glasser2016']. Volumetric whole-brain atlases will be added soon. smoothing_param : scalar Smoothing parameter, which could be fwhm for Freesurfer cortical features, or another relevant for the chosen base_feature. Default: assumed as fwhm=10mm for the default feature choice 'thickness' node_size : scalar, optional Parameter to indicate the size of the ROIs, subparcels or patches, depending on type of atlas or feature. Not implemented. out_dir : str, optional Path to output directory to store results. Default: None, results are returned, but not saved to disk. If this is None, return_results must be true. return_results : bool Flag to indicating whether to keep the results to be returned to caller method. Helps to save memory (as it doesn't retain results all subjects and weight combinations), when running from command line interface (or HPC). Default: False If this is False, out_dir must be specified to save the results to disk. Returns ------- roi_stats_all : dict, None If return_results is True, this will be a dictionary keyed in by subject_ID The value of each key roi_summary_all[subject] is a numpy array of length k, with k = number of nodes in the atlas parcellation. If return_results is False, this will be None, which is the default. """ check_params_single_edge(base_feature, input_dir, atlas, smoothing_param, node_size, out_dir, return_results) subject_id_list, num_subjects, max_id_width, nd_id = check_subjects( subject_id_list) stat_func_list, stat_func_names, num_stats, max_stat_width, nd_st = check_stat_methods( chosen_roi_stats) # roi_labels, ctx_annot = parcellate.freesurfer_roi_labels(atlas) # uniq_rois, roi_size, num_nodes = roi_info(roi_labels) uniq_rois, centroids, roi_labels = parcellate.roi_labels_centroids(atlas) print('\nProcessing {} features resampled to {} atlas,' ' smoothed at {} with node size {}'.format(base_feature, atlas, smoothing_param, node_size)) if return_results: roi_stats_all = dict() else: roi_stats_all = None if out_dir is None: raise ValueError( 'When return_results=False, out_dir must be specified to be able to save the results.' ) if not pexists(out_dir): os.mkdir(out_dir) for sub_idx, subject in enumerate(subject_id_list): try: features = import_features(input_dir, [ subject, ], base_feature, atlas=atlas, fwhm=smoothing_param) except: raise IOError( 'Unable to read {} features for {}\n Skipping it.'.format( base_feature, subject)) data, rois = mask_background_roi(features[subject], roi_labels, cfg.null_roi_name) for ss, stat_func in enumerate(stat_func_list): sys.stdout.write( '\nProcessing id {sid:{id_width}} ({sidnum:{nd_id}}/{numsub:{nd_id}}) -- ' 'statistic {stname:{stat_name_width}} ({statnum:{nd_st}}/{numst:{nd_st}})' ' :'.format(sid=subject, sidnum=sub_idx + 1, numsub=num_subjects, stname=stat_func_names[ss], statnum=ss + 1, numst=num_stats, id_width=max_id_width, stat_name_width=max_stat_width, nd_id=nd_id, nd_st=nd_st)) try: roi_stats = calc_roi_statistics(data, rois, uniq_rois, stat_func) expt_id_no_network = stamp_experiment(base_feature, stat_func_names[ss], atlas, smoothing_param, node_size) save_summary_stats(roi_stats, uniq_rois, stat_func_names[ss], out_dir, subject, expt_id_no_network) sys.stdout.write('Done.') except KeyboardInterrupt: print('Exiting on keyborad interrupt! \n' 'Abandoning the remaining processing for {} stats:\n' '{}.'.format(num_stats - ss, stat_func_names[ss:])) sys.exit(1) except: traceback.print_exc() logging.debug( 'Error : unable to compute roi-wise {} for {}. Skipping it.' .format(stat_func_names[ss], subject)) if return_results: roi_stats_all[subject] = roi_stats return roi_stats_all
def per_subject_multi_edge( input_dir, base_feature_list, roi_labels, centroids, weight_method_list, summary_stats, summary_stat_names, atlas, smoothing_param, node_size, num_bins, edge_range_dict, out_dir, return_results, overwrite_results, pretty_print_options, subject=None ): # purposefully leaving it last to enable partial function creation """ Extracts give set of weights for one subject. """ if subject is None: return if return_results: edge_weights_all = dict() else: edge_weights_all = None max_id_width, nd_id, num_weights, max_wtname_width, nd_wm = pretty_print_options for ww, weight_method in enumerate(weight_method_list): expt_id_multi = stamp_expt_multiedge(base_feature_list, atlas, smoothing_param, node_size, weight_method) out_path_multigraph = make_output_path_graph( out_dir, subject, [expt_id_multi, 'multigraph']) # skipping the computation if the file exists already if not overwrite_results and isfile( out_path_multigraph) and getsize(out_path_multigraph) > 0: print( '\nMultigraph exists already at\n\t{}\n skipping its computation!' .format(out_path_multigraph)) multigraph = None # signal to re-read else: multigraph = nx.MultiGraph() for base_feature in base_feature_list: # # TODO refactor # unigraph, weight_vec = compute_unigraph(input_dir, subject, base_feature, weight_method, roi_labels, # atlas, smoothing_param, node_size, centroids, # num_bins, edge_range_dict, # out_dir, overwrite_results, pretty_print_options) # if return_results: # edge_weights_all[(weight_method, base_feature, subject)] = weight_vec try: features = single_edge.import_features( input_dir, [ subject, ], base_feature, fwhm=smoothing_param, atlas=atlas) except: traceback.print_exc() warnings.warn( 'Unable to read {} features' ' for {}\n Skipping it.'.format(base_feature, subject), UserWarning) return data, rois = mask_background_roi(features[subject], roi_labels, cfg.null_roi_name) # unique stamp for each subject and weight expt_id_single = stamp_expt_weight(base_feature, atlas, smoothing_param, node_size, weight_method) sys.stdout.write( '\nProcessing id {:{id_width}} --' ' weight {:{wtname_width}} ({:{nd_wm}}/{:{nd_wm}})' ' :'.format(subject, weight_method, ww + 1, num_weights, nd_id=nd_id, nd_wm=nd_wm, id_width=max_id_width, wtname_width=max_wtname_width)) # actual computation of pair-wise features try: unigraph = hiwenet.extract( data, rois, weight_method=weight_method, num_bins=num_bins, edge_range=edge_range_dict[base_feature], return_networkx_graph=True) # retrieving edge weights weight_vec = np.array( list( nx.get_edge_attributes(unigraph, 'weight').values())) warn_nan(weight_vec) if return_results: edge_weights_all[(weight_method, base_feature, subject)] = weight_vec except (RuntimeError, RuntimeWarning) as runexc: print(runexc) except KeyboardInterrupt: print('Exiting on keyborad interrupt! \n' 'Abandoning the remaining processing ') sys.exit(1) except: print('Unable to extract {} weights for {} for {}'.format( weight_method, base_feature, subject)) traceback.print_exc() print('Done.') # TODO consider extracting some network features upon user request. add_nodal_positions(unigraph, centroids) single_edge.save_graph(unigraph, out_dir, subject, expt_id_single) # adding edges/weights from each feature to a multigraph # this also encodes the sources for u, v in unigraph.edges(): multigraph.add_edge(u, v, weight=unigraph[u][v]['weight'], base_feature=base_feature) # adding position info to nodes (for visualization later) add_nodal_positions(multigraph, centroids) save_graph(multigraph, out_path_multigraph, 'multi-edge') for stat_func, stat_name in zip(summary_stats, summary_stat_names): # creating single graph with a summary edge weight (like median) out_path_summary = make_output_path_graph( out_dir, subject, [expt_id_multi, stat_name, 'multigraph']) if not overwrite_results and isfile( out_path_summary) and getsize(out_path_summary) > 0: print( 'Summary {} of multigraph exists already at\n\t{}\n skipping its computation!' .format(stat_name, out_path_summary)) else: if multigraph is None: multigraph = nx.read_graphml(out_path_multigraph) try: summary_multigraph = summarize_multigraph( multigraph, stat_func) add_nodal_positions(summary_multigraph, centroids) save_graph(summary_multigraph, out_path_summary, '{} summary'.format(stat_name)) except: print( 'Summary {} could not be computed - skipping!'.format( stat_name)) traceback.print_exc() return edge_weights_all
def extract_per_subject( input_dir, base_feature, roi_labels, centroids, weight_method_list, atlas, smoothing_param, node_size, num_bins, edge_range, out_dir, return_results, pretty_print_options, subject=None ): # purposefully leaving it last to enable partial function creation """ Extracts give set of weights for one subject. Parameters ---------- subject input_dir base_feature roi_labels weight_method_list atlas smoothing_param node_size num_bins edge_range out_dir return_results pretty_print_options Returns ------- """ if subject is None: return try: features = import_features(input_dir, [ subject, ], base_feature, fwhm=smoothing_param, atlas=atlas) except: traceback.print_exc() warnings.warn( 'Unable to read {} features' ' for {}\n Skipping it.'.format(base_feature, subject), UserWarning) return data, rois = mask_background_roi(features[subject], roi_labels, cfg.null_roi_name) max_id_width, nd_id, num_weights, max_wtname_width, nd_wm = pretty_print_options if return_results: edge_weights_all = dict() else: edge_weights_all = None for ww, weight_method in enumerate(weight_method_list): # unique stamp for each subject and weight expt_id = stamp_expt_weight(base_feature, atlas, smoothing_param, node_size, weight_method) sys.stdout.write( '\nProcessing id {:{id_width}} -- weight {:{wtname_width}} ({:{nd_wm}}/{:{nd_wm}})' ' :'.format(subject, weight_method, ww + 1, num_weights, nd_id=nd_id, nd_wm=nd_wm, id_width=max_id_width, wtname_width=max_wtname_width)) # actual computation of pair-wise features try: graph = hiwenet.extract(data, rois, weight_method=weight_method, num_bins=num_bins, edge_range=edge_range, return_networkx_graph=True) # retrieving edge weights weight_vec = np.array( list(nx.get_edge_attributes(graph, 'weight').values())) warn_nan(weight_vec) # weight_vec = get_triu_handle_inf_nan(edge_weights) # adding position info to nodes (for visualization later) for roi in centroids: graph.node[roi]['x'] = float(centroids[roi][0]) graph.node[roi]['y'] = float(centroids[roi][1]) graph.node[roi]['z'] = float(centroids[roi][2]) if return_results: edge_weights_all[(weight_method, subject)] = weight_vec # saving to disk try: save(weight_vec, out_dir, subject, expt_id) save_graph(graph, out_dir, subject, expt_id) except: raise IOError( 'Unable to save the network/vectorized weights to:\n{}'. format(out_dir)) except (RuntimeError, RuntimeWarning) as runexc: print(runexc) except KeyboardInterrupt: print('Exiting on keyborad interrupt! \n' 'Abandoning the remaining processing for {} weights:\n' '{}.'.format(num_weights - ww, weight_method_list[ww:])) sys.exit(1) except: print('Unable to extract {} features for {}'.format( weight_method, subject)) traceback.print_exc() sys.stdout.write('Done.') return edge_weights_all