def test_collect_pandas_df_make(plot_switch, sql_out, nc_collect, create_summary, graph_num): """ """ base_dir = str(Path(__file__).parent / "examples") network = None ID = '002' if graph_num == -1: net_mets_csv_list = [ f"{base_dir}/miscellaneous/002_parcels_Default.nii.gz" ] elif graph_num == 0: net_mets_csv_list = [] elif graph_num == 1: net_mets_csv_list = [ f"{base_dir}/topology/metrics_sub-0021001_modality-dwi_nodetype-parc_model-csa_thrtype-PROP_thr-0.2.csv" ] else: net_mets_csv_list = [ f"{base_dir}/topology/metrics_sub-0021001_modality-dwi_nodetype-parc_model-csa_thrtype-PROP_thr-0.2.csv", f"{base_dir}/topology/metrics_sub-0021001_modality-dwi_nodetype-parc_model-csa_thrtype-PROP_thr-0.3.csv" ] combination_complete = netstats.collect_pandas_df_make( net_mets_csv_list, ID, network, plot_switch, nc_collect=nc_collect, create_summary=create_summary, sql_out=sql_out) assert combination_complete is True
def test_collect_pandas_df_make(plot_switch, sql_out, nc_collect, create_summary, graph_num): """ """ base_dir = str(Path(__file__).parent / "examples") network = None ID = '002' plot_switch = False if graph_num == -1: # This should raise an error but doesn't. net_mets_csv_list = [ f"{base_dir}/miscellaneous/002_parcels_Default.nii.gz" ] elif graph_num == 0: net_mets_csv_list = [] elif graph_num == 1: net_mets_csv_list = [ f"{base_dir}/netmetrics/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_thr-0.2_net_mets.csv" ] else: # This was breaking. net_mets_csv_list = [ f"{base_dir}/netmetrics/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_thr-0.2_net_mets.csv", f"{base_dir}/netmetrics/0021001_modality-dwi_nodetype-parc_est-csa_thrtype-PROP_thr-0.3_net_mets.csv" ] combination_complete = netstats.collect_pandas_df_make( net_mets_csv_list, ID, network, plot_switch, nc_collect=nc_collect, create_summary=create_summary, sql_out=sql_out) assert combination_complete is True
def recover_missing(bad_col, bad_cols_dict, rerun_dict, modality, working_path, drop_cols, frame): import glob import os atlas = bad_col.split('_')[0] + '_' + bad_col.split('_')[1] rerun = False for lab in bad_cols_dict[bad_col]: sub = lab.split('_')[0] ses = lab.split('_')[1] if sub not in rerun_dict.keys(): rerun_dict[sub] = {} if ses not in rerun_dict[sub].keys(): rerun_dict[sub][ses] = {} if modality not in rerun_dict[sub][ses].keys(): rerun_dict[sub][ses][modality] = {} if atlas not in rerun_dict[sub][ses][modality].keys(): rerun_dict[sub][ses][modality][atlas] = [] search_str = bad_col.replace(f"{atlas}_", '').split('_thrtype')[0] if not os.path.isdir(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/auc"): if not os.path.isdir( f"{working_path}/{sub}/{ses}/{modality}/{atlas}/topology"): print(f"Missing graph analysis for {sub}, {ses} for " f"{atlas}...") else: from pynets.stats.netstats import collect_pandas_df_make collect_pandas_df_make( glob.glob(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/" f"topology/*_neat.csv"), f"{sub}_{ses}", None, False) rerun = True outs = [ i for i in glob.glob(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/auc/*") if search_str in i ] if len(outs) == 1: # Fill in gaps (for things that get dropped during earlier # stages) try: df_tmp = pd.read_csv(outs[0], chunksize=100000, compression="gzip", encoding="utf-8", engine='python').read() except: try: df_tmp = pd.read_csv(outs[0], chunksize=100000, compression="gzip", encoding="utf-8", engine='c').read() except: print(f"Cannot load {outs[0]}") continue if not df_tmp.empty: for drop in drop_cols: if drop in bad_col: print(f"Removing column: {drop}") frame = frame.drop(columns=bad_col) if bad_col not in frame.columns: continue from pynets.stats.netstats import \ collect_pandas_df_make collect_pandas_df_make( glob.glob(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/*_neat.csv"), f"{sub}_{ses}", None, False) try: frame.loc[lab, bad_col] = df_tmp.filter(regex=bad_col.split( 'auc_')[1:][0]).values.tolist()[0][0] print(f"Recovered missing data from {sub}, {ses} for " f"{bad_col}...") except: from pynets.stats.netstats import \ collect_pandas_df_make collect_pandas_df_make( glob.glob(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/*_neat.csv"), f"{sub}_{ses}", None, False) continue del df_tmp else: from pynets.stats.netstats import collect_pandas_df_make collect_pandas_df_make( glob.glob(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/*_neat.csv"), f"{sub}_{ses}", None, False) rerun_dict[sub][ses][modality][atlas].append(bad_col) continue elif len(outs) > 1: for out in outs: try: df_tmp = pd.read_csv(out, chunksize=100000, compression="gzip", encoding="utf-8", engine='python').read() except: try: df_tmp = pd.read_csv(out, chunksize=100000, compression="gzip", encoding="utf-8", engine='c').read() except: print(f"Cannot load {out}") continue if not df_tmp.empty: print(f"Recovered missing data from {sub}, {ses} for " f"{bad_col}...") for drop in drop_cols: if drop in bad_col: print(f"Removing column: {drop}") frame = frame.drop(columns=bad_col) try: frame.loc[lab, bad_col] = df_tmp.filter(regex=bad_col.split( 'auc_')[1:][0]).values.tolist()[0][0] except: from pynets.stats.netstats import \ collect_pandas_df_make collect_pandas_df_make( glob.glob( f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/*_neat.csv"), f"{sub}_{ses}", None, False) continue del df_tmp else: # Add to missingness inventory if not found rerun_dict[sub][ses][modality][atlas].append(bad_col) from pynets.stats.netstats import \ collect_pandas_df_make collect_pandas_df_make( glob.glob(f"{working_path}/{sub}/{ses}/" f"{modality}/{atlas}/topology/*_neat.csv"), f"{sub}_{ses}", None, False) return rerun_dict, rerun
def collect_pandas_df(network, ID, net_mets_csv_list, plot_switch, multi_nets, multimodal): """ API for summarizing independent lists of pickled pandas dataframes of graph metrics for each modality, RSN, and roi. Parameters ---------- network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. ID : str A subject id or other unique identifier. net_mets_csv_list : list List of file paths to pickled pandas dataframes as themselves. plot_switch : bool Activate summary plotting (histograms, ROC curves, etc.) multi_nets : list List of Yeo RSN's specified in workflow(s). multimodal : bool Indicates whether multiple modalities of input data have been specified. Returns ------- combination_complete : bool If True, then collect_pandas_df completed successfully """ from pathlib import Path import yaml from pynets.core.utils import flatten from pynets.stats.netstats import collect_pandas_df_make # Available functional and structural connectivity models with open("%s%s" % (str(Path(__file__).parent.parent), '/runconfig.yaml'), 'r') as stream: hardcoded_params = yaml.load(stream) try: func_models = hardcoded_params['available_models']['func_models'] except KeyError: print('ERROR: available functional models not sucessfully extracted from runconfig.yaml') try: struct_models = hardcoded_params['available_models']['struct_models'] except KeyError: print('ERROR: available structural models not sucessfully extracted from runconfig.yaml') net_mets_csv_list = list(flatten(net_mets_csv_list)) if multi_nets is not None: net_mets_csv_list_nets = net_mets_csv_list for network in multi_nets: net_mets_csv_list = list(set([i for i in net_mets_csv_list_nets if network in i])) if multimodal is True: net_mets_csv_list_dwi = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0] in struct_models])) combination_complete_dwi = collect_pandas_df_make(net_mets_csv_list_dwi, ID, network, plot_switch) net_mets_csv_list_func = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0] in func_models])) combination_complete_func = collect_pandas_df_make(net_mets_csv_list_func, ID, network, plot_switch) if combination_complete_dwi is True and combination_complete_func is True: combination_complete = True else: combination_complete = False else: combination_complete = collect_pandas_df_make(net_mets_csv_list, ID, network, plot_switch) else: if multimodal is True: net_mets_csv_list_dwi = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0] in struct_models])) combination_complete_dwi = collect_pandas_df_make(net_mets_csv_list_dwi, ID, network, plot_switch) net_mets_csv_list_func = list(set([i for i in net_mets_csv_list if i.split('mets_')[1].split('_')[0] in func_models])) combination_complete_func = collect_pandas_df_make(net_mets_csv_list_func, ID, network, plot_switch) if combination_complete_dwi is True and combination_complete_func is True: combination_complete = True else: combination_complete = False else: combination_complete = collect_pandas_df_make(net_mets_csv_list, ID, network, plot_switch) return combination_complete